In [1]:
import transformers
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn
from torch.cuda.amp import custom_fwd, custom_bwd

import logging
logging.disable(logging.WARNING)

class FrozenBNBLinear(nn.Module):
    def __init__(self, weight, bias=None):
        assert isinstance(bias, nn.Parameter) or bias is None
        super().__init__()
        adapter_dim = 4
        p = 0.1
        self.out_features, self.in_features = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.adapter = nn.Sequential(
                            nn.Linear(self.in_features, adapter_dim, bias=False),
                            nn.Dropout(p=p),
                            nn.Linear(adapter_dim, self.out_features, bias=False),
                        )
        nn.init.zeros_(self.adapter[2].weight)
        self.bias = bias


    def forward(self, input):
        output = F.linear(input, self.weight, self.bias)
        if self.adapter:
            output += self.adapter(input)
        return output

    def __repr__(self):
        return f"{self.__class__.__name__}({self.in_features}, {self.out_features})"

class FrozenBNBEmbedding(nn.Module):
    def __init__(self, weight):
        super().__init__()
        self.num_embeddings, self.embedding_dim = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        adapter_dim = 4
        p = 0.1
        self.adapter = nn.Sequential(
                            nn.Embedding(self.num_embeddings, adapter_dim),
                            nn.Dropout(p=p),
                            nn.Linear(adapter_dim, self.embedding_dim, bias=False),
                        )
        nn.init.zeros_(self.adapter[2].weight)

    def forward(self, input, **kwargs):
        with torch.no_grad():
            output = F.embedding(input, self.weight, **kwargs)
        if self.adapter:
            output += self.adapter(input)
        return output

    def __repr__(self):
        return f"{self.__class__.__name__}({self.num_embeddings}, {self.embedding_dim})"


def freeze_layer(model):
    adapter_dim=4
    p = 0.1
    for module in model.modules():
        #print('From', model)
        for name, child in module.named_children():
            if isinstance(child, nn.Linear):
                if name != '0' and name != '2':
                    print('Freezing', name, child)
                    setattr(
                        module,
                        name,
                        FrozenBNBLinear(
                            weight=torch.zeros(child.out_features, child.in_features),
                            bias=child.bias,
                        ),
                    )


            elif isinstance(child, nn.Embedding):
                if name != '0' and name != '2':
                    print('Freezing', name, child)
                    setattr(
                        module,
                        name,
                        FrozenBNBEmbedding(
                            weight=torch.zeros(child.num_embeddings, child.embedding_dim),
                        )
                    )
                

class GPTJBlock(transformers.models.gptj.modeling_gptj.GPTJBlock):
    def __init__(self, config):
        super().__init__(config)
        freeze_layer(self.attn)
        freeze_layer(self.mlp)


class GPTJModel(transformers.models.gptj.modeling_gptj.GPTJModel):
    def __init__(self, config):
        super().__init__(config)
        freeze_layer(self)


class GPTJForCausalLM(transformers.models.gptj.modeling_gptj.GPTJForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        freeze_layer(self)

transformers.models.gptj.modeling_gptj.GPTJBlock = GPTJBlock

tokenizer = transformers.AutoTokenizer.from_pretrained(
    'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b-float16',  # or float32 version: revision=KoGPT6B-ryan1.5b
     bos_token='[BOS]', eos_token='[EOS]', unk_token='[UNK]', pad_token='[PAD]', mask_token='[MASK]')#, add_bos_token = True, add_eos_token = True)


gpt = GPTJForCausalLM.from_pretrained(
                    'dilab-cau/deft-korean-alpaca',
                    eos_token_id=tokenizer.eos_token_id,
                    pad_token_id=tokenizer.pad_token_id,
                    torch_dtype=torch.float16,
                    use_cache=False
                )



Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing q_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing out_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing fc_in Linear(in_features=4096, out_features=16384, bias=True)
Freezing fc_out Linear(in_features=16384, out_features=4096, bias=True)
Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing q_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing out_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing fc_in Linear(in_features=4096, out_features=16384, bias=True)
Freezing fc_out Linear(in_features=16384, out_features=4096, bias=True)
Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bi

Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing q_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing out_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing fc_in Linear(in_features=4096, out_features=16384, bias=True)
Freezing fc_out Linear(in_features=16384, out_features=4096, bias=True)
Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing q_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing out_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing fc_in Linear(in_features=4096, out_features=16384, bias=True)
Freezing fc_out Linear(in_features=16384, out_features=4096, bias=True)
Freezing k_proj Linear(in_features=4096, out_features=4096, bias=False)
Freezing v_proj Linear(in_features=4096, out_features=4096, bi

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
device = "cuda:3"
gpt.to(device)
gpt.eval()


GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(64512, 4096)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
      (1): GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.1, inplace=False)
 

In [3]:
import os
def chat():
    while(True):
        with torch.no_grad():    
            text = input('질문: ')
            if text == '끝':
                break
            prompt = '### 명령어:\n'+text+'\n\n### 응답:'
            tokens = tokenizer.encode(prompt, return_tensors='pt').to(device=device, non_blocking=True)
            gen_tokens = gpt.generate(tokens,
                                      do_sample=True,
                                      temperature=0.9,                                  top_p=0.99,
                                      max_new_tokens=256,
                                      early_stopping=True,
                                      #num_return_sequences=4, #3개의 결과를 디코딩해낸다
                                     )
            generated = tokenizer.batch_decode(gen_tokens)[0]
            response = generated[len(prompt):].split('[EOS]')[0]
            print('답변:',response)
            print("\n\n\n")


In [4]:
chat()

질문: 다이어트 식단 추천해줘
답변: 칼로리는 낮고 단백질이 높은 식단으로 식단을 구성해보세요. 닭가슴살, 달걀, 버섯, 견과류, 브로콜리, 토마토, 두부, 시금치 등 다양한 단백질 식품을 골고루 섭취해보세요. 특히 고구마를 많이 드시면 포만감도 쉽게 유지되고 건강도 챙길 수 있습니다.

### 추가로 식단은 무조건 운동과 병행해야 합니다. 운동을 하면서 살을 빼는 것은 불가능하죠. 운동을 하고 난 후에는 살이 안 빠지기 때문에 식사량을 조절하며 운동을 해야 합니다.




질문: 대한민국의 대통령 이름은?
답변: 문재인입니다. 문재인은 우리나라 제 19대 대통령입니다.




질문: 끝
