## Alpaca

### Loading Models

In [1]:
import torch
import transformers
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import warnings
warnings.filterwarnings('ignore')

In [2]:
model_ckpt = "chainyo/alpaca-lora-7b"

Alpaca_tokenizer = LlamaTokenizer.from_pretrained(model_ckpt)
Alpaca_model = LlamaForCausalLM.from_pretrained(model_ckpt)

Loading checkpoint shards:   0%|          | 0/39 [00:00<?, ?it/s]

In [3]:
Alpaca_model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=31999)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
      (1): LlamaDecoderLayer(
  

### Alpaca Prompt input

In [4]:
PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Response:"
    ),
}

### Generating Prompt

In [5]:
def generate_prompt(instruction: str, input_ctxt: str = None) -> str:
    if input_ctxt:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

              ### Instruction:
              {instruction}

              ### Input:
              {input_ctxt}

              ### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

              ### Instruction:
              {instruction}

              ### Response:"""

In [6]:
generation_config = GenerationConfig(
    temperature=0.2,
    top_p=0.75,
    top_k=40,
    num_beams=4,
    max_new_tokens=128,
)

In [7]:
instruction = "What is the meaning of life?"
input_ctxt = None

prompt = generate_prompt(instruction, input_ctxt)
input_ids = Alpaca_tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(Alpaca_model.device)

with torch.no_grad():
    outputs = Alpaca_model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
    )

response = Alpaca_tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
print(response)

2023-05-04 22:15:10.582160: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Below is an instruction that describes a task. Write a response that appropriately completes the request.

              ### Instruction:
              What is the meaning of life?

              ### Response:
              The meaning of life is to find purpose and meaning in one's own life. It is a journey of self-discovery and self-actualization. It is a process of exploring one's values, beliefs, and goals. It is a search for meaning and fulfillment in one's own life.


### Loading Alpaca Cleaned Dataset

In [8]:
from datasets import load_dataset
alpaca_cleaned_dataset = load_dataset("yahma/alpaca-cleaned")
alpaca_cleaned_dataset # Dataset 클래스 객체 반환

Found cached dataset json (/home/chaewon/.cache/huggingface/datasets/yahma___json/yahma--alpaca-cleaned-fe377fcd47a14100/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 51760
    })
})

In [9]:
def df_generate_prompt(x) -> str:
    if x["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

              ### Instruction:
              {x["instruction"]}

              ### Input:
              {x["input"]}

              ### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

                  ### Instruction:
                  {x["instruction"]}

                  ### Response:"""

In [10]:
import pandas as pd
# pd.options.display.max_colwidth = 2000

df = pd.DataFrame(alpaca_cleaned_dataset["train"])
train_df = df.drop(columns=["output"])

train_df["prompt_input"] = train_df.apply(df_generate_prompt, axis=1)
train_df

Unnamed: 0,instruction,input,prompt_input
0,Give three tips for staying healthy.,,Below is an instruction that describes a task....
1,What are the three primary colors?,,Below is an instruction that describes a task....
2,Describe the structure of an atom.,,Below is an instruction that describes a task....
3,How can we reduce air pollution?,,Below is an instruction that describes a task....
4,Pretend you are a project manager of a constru...,,Below is an instruction that describes a task....
...,...,...,...
51755,You will be given a piece of text about an eve...,Text: John went out for a walk with his dog Ro...,"Below is an instruction that describes a task,..."
51756,You will be given a paragraph of text with var...,Text: Michael Jordan is an American former pro...,"Below is an instruction that describes a task,..."
51757,You will be given a piece of text about an eve...,Text: A tree fell over in the wind and caused ...,"Below is an instruction that describes a task,..."
51758,I will give you a list of steps. You need to ...,"Steps: ['She takes out her books', 'The teache...","Below is an instruction that describes a task,..."


In [11]:
def tokenize(x):
  return Alpaca_tokenizer(x, return_tensors="pt").input_ids

In [12]:
train_df["input_ids"] = train_df["prompt_input"].map(tokenize)
train_df

Unnamed: 0,instruction,input,prompt_input,input_ids
0,Give three tips for staying healthy.,,Below is an instruction that describes a task....,"[[tensor(0), tensor(13866), tensor(338), tenso..."
1,What are the three primary colors?,,Below is an instruction that describes a task....,"[[tensor(0), tensor(13866), tensor(338), tenso..."
2,Describe the structure of an atom.,,Below is an instruction that describes a task....,"[[tensor(0), tensor(13866), tensor(338), tenso..."
3,How can we reduce air pollution?,,Below is an instruction that describes a task....,"[[tensor(0), tensor(13866), tensor(338), tenso..."
4,Pretend you are a project manager of a constru...,,Below is an instruction that describes a task....,"[[tensor(0), tensor(13866), tensor(338), tenso..."
...,...,...,...,...
51755,You will be given a piece of text about an eve...,Text: John went out for a walk with his dog Ro...,"Below is an instruction that describes a task,...","[[tensor(0), tensor(13866), tensor(338), tenso..."
51756,You will be given a paragraph of text with var...,Text: Michael Jordan is an American former pro...,"Below is an instruction that describes a task,...","[[tensor(0), tensor(13866), tensor(338), tenso..."
51757,You will be given a piece of text about an eve...,Text: A tree fell over in the wind and caused ...,"Below is an instruction that describes a task,...","[[tensor(0), tensor(13866), tensor(338), tenso..."
51758,I will give you a list of steps. You need to ...,"Steps: ['She takes out her books', 'The teache...","Below is an instruction that describes a task,...","[[tensor(0), tensor(13866), tensor(338), tenso..."


In [13]:
def generate_response(x):
  input_ids = x.to(Alpaca_model.device)
    
  with torch.no_grad():
      outputs = Alpaca_model.generate(
          input_ids=input_ids,
          generation_config=generation_config,
          return_dict_in_generate=True,
          output_scores=True,
      )

  return Alpaca_tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)

In [14]:
from tqdm import tqdm
tqdm.pandas()

train_df_10 = train_df.iloc[:10]
train_df_10["response"] = train_df_10["input_ids"].progress_map(generate_response)

100%|██████████| 10/10 [13:58<00:00, 83.82s/it]


In [15]:
pd.options.display.max_colwidth = 2000
train_df_10

Unnamed: 0,instruction,input,prompt_input,input_ids,response
0,Give three tips for staying healthy.,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Give three tips for staying healthy.\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(25538), tensor(2211), tensor(25562), tensor(363), tensor(7952), tensor(292), tensor(9045), tensor(29891), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]",Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Give three tips for staying healthy.\n\n ### Response:\n 1. Eat a balanced diet.\n 2. Exercise regularly.\n 3. Get enough sleep.
1,What are the three primary colors?,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n What are the three primary colors?\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(1724), tensor(526), tensor(278), tensor(2211), tensor(7601), tensor(11955), tensor(29973), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n What are the three primary colors?\n\n ### Response:\n The three primary colors are red, blue, and yellow."
2,Describe the structure of an atom.,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Describe the structure of an atom.\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(20355), tensor(915), tensor(278), tensor(3829), tensor(310), tensor(385), tensor(12301), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Describe the structure of an atom.\n\n ### Response:\n An atom is composed of a nucleus, which is made up of protons and neutrons, and electrons, which are arranged in shells around the nucleus. The nucleus is made up of positively charged protons and uncharged neutrons. The electrons are negatively charged particles that orbit the nucleus."
3,How can we reduce air pollution?,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n How can we reduce air pollution?\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(1128), tensor(508), tensor(591), tensor(10032), tensor(4799), tensor(21180), tensor(918), tensor(29973), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n How can we reduce air pollution?\n\n ### Response:\n We can reduce air pollution by switching to renewable energy sources, such as solar and wind power. We can also reduce our reliance on fossil fuels by using public transportation, carpooling, and biking. We can also reduce air pollution by planting trees and shrubs, which absorb carbon dioxide and other pollutants."
4,Pretend you are a project manager of a construction company. Describe a time when you had to make a difficult decision.,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Pretend you are a project manager of a construction company. Describe a time when you had to make a difficult decision.\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(349), tensor(2267), tensor(355), tensor(366), tensor(526), tensor(263), tensor(2060), tensor(8455), tensor(310), tensor(263), tensor(7632), tensor(5001), tensor(29889), tensor(20355), tensor(915), tensor(263), tensor(931), tensor(746), tensor(366), tensor(750), tensor(304), tensor(1207), tensor(263), tensor(5189), tensor(10608), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Pretend you are a project manager of a construction company. Describe a time when you had to make a difficult decision.\n\n ### Response:\nAs a project manager of a construction company, I had to make difficult decisions on a regular basis. One example was when I had to decide whether to continue with a project that was running over budget. I had to weigh the pros and cons of continuing with the project, as well as the potential risks of not completing it. In the end, I decided to continue with the project, as I believed that the benefits of completing it outweighed the risks of not doing so."
5,"Write a concise summary of the following:\n""Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. \n\nAdditionally, the Commodore 64 dominated the market with between 30% and 40% share and 2 million units sold per year, outselling the IBM PC clones, Apple Computers, and Atari computers. Adding to their success, Sam Tramiel (former Atari president), during an interview in 1989, said they were building 400,000 C64s a month for a couple of years. """,,"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Write a concise summary of the following:\n""Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. \n\nAdditionally, the Commodore 64 dominated the market with between 30% and 40% share and 2 million units sold per year, outselling the IBM PC clones, Apple Computers, and Atari computers. Adding to their success, Sam Tramiel (former Atari president), during an interview in 1989, said they were building 400,000 C64s a month for a couple of years. ""\n\n ### Response:","[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(14350), tensor(263), tensor(3022), tensor(895), tensor(15837), tensor(310), tensor(278), tensor(1494), tensor(29901), tensor(13), tensor(29908), tensor(1523), tensor(1545), tensor(487), tensor(29871), tensor(29953), tensor(29946), tensor(313), tensor(2055), tensor(6194), tensor(2998), tensor(408), tensor(278), tensor(315), tensor(29953), tensor(29946), tensor(470), tensor(315), tensor(29933), tensor(29924), tensor(29871), tensor(29953), tensor(29946), tensor(29897), tensor(471), tensor(12012), tensor(2955), tensor(491), tensor(422), tensor(1545), tensor(487), tensor(15197), tensor(6189), tensor(313), tensor(21685), tensor(29924), tensor(29897), tensor(297), tensor(3111), tensor(29871), tensor(29896), tensor(29929), tensor(29947), tensor(29906), tensor(411), tensor(263), tensor(6257), tensor(8666), tensor(310), tensor(395), tensor(29945), tensor(29929), tensor(29945), tensor(29889), tensor(739), tensor(471), tensor(385), tensor(29871), ...]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Write a concise summary of the following:\n""Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. \n\nAdditionally, the Commodore 64 dominated the market with between 30% and 40% share and 2 million units sold per year, outselling the IBM PC clones, Apple Computers, and Atari computers. Adding to their success, Sam Tramiel (former Atari president), during an interview in 1989, said they were building 400,000 C64s a month for a couple of years. ""\n\n ### Response:\nThe Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. Additionally, the Commodore"
6,Explain why the following fraction is equivalent to 1/4,4/16,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n ### Instruction:\n Explain why the following fraction is equivalent to 1/4\n\n ### Input:\n 4/16\n\n ### Response:","[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29892), tensor(3300), tensor(2859), tensor(411), tensor(385), tensor(1881), tensor(393), tensor(8128), tensor(4340), tensor(3030), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(795), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(795), tensor(12027), tensor(7420), tensor(2020), tensor(278), tensor(1494), tensor(15958), tensor(338), tensor(7126), tensor(304), tensor(29871), tensor(29896), tensor(29914), tensor(29946), tensor(13), tensor(13), tensor(795), tensor(835), tensor(10567), tensor(29901), tensor(13), tensor(1669), tensor(29946), tensor(29914), tensor(29896), tensor(29953), tensor(13), tensor(13), tensor(795), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n ### Instruction:\n Explain why the following fraction is equivalent to 1/4\n\n ### Input:\n 4/16\n\n ### Response:\n The fraction 4/16 is equivalent to 1/4 because it can be simplified to 1/4 by dividing both numerator and denominator by 4."
7,Write a short story in third person narration about a protagonist who has to make an important career decision.,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Write a short story in third person narration about a protagonist who has to make an important career decision.\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(14350), tensor(263), tensor(3273), tensor(5828), tensor(297), tensor(4654), tensor(2022), tensor(15474), tensor(362), tensor(1048), tensor(263), tensor(15572), tensor(391), tensor(1058), tensor(756), tensor(304), tensor(1207), tensor(385), tensor(4100), tensor(6413), tensor(10608), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n Write a short story in third person narration about a protagonist who has to make an important career decision.\n\n ### Response:\nOnce upon a time, there was a young man named John who had to make an important career decision. He had just graduated from college with a degree in computer science, but he wasn't sure if he wanted to pursue a career in that field. \n\nJohn had always been interested in business, but he was afraid to take the leap and pursue a career in that field. He was worried that he wouldn't be able to make enough money to support himself and his family. \n\nOne day, John decided to take a leap of faith and pursue a career in business. He"
8,Evaluate this sentence for spelling and grammar mistakes,He finnished his meal and left the resturant,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n ### Instruction:\n Evaluate this sentence for spelling and grammar mistakes\n\n ### Input:\n He finnished his meal and left the resturant\n\n ### Response:","[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29892), tensor(3300), tensor(2859), tensor(411), tensor(385), tensor(1881), tensor(393), tensor(8128), tensor(4340), tensor(3030), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(795), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(795), tensor(382), tensor(4387), tensor(403), tensor(445), tensor(10541), tensor(363), tensor(805), tensor(7807), tensor(322), tensor(25437), tensor(28947), tensor(13), tensor(13), tensor(795), tensor(835), tensor(10567), tensor(29901), tensor(13), tensor(795), tensor(940), tensor(1436), tensor(29876), tensor(3276), tensor(670), tensor(592), tensor(284), tensor(322), tensor(2175), tensor(278), tensor(1791), tensor(332), tensor(424), tensor(13), tensor(13), tensor(795), tensor(835), tensor(13291), tensor(29901)]]","Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n ### Instruction:\n Evaluate this sentence for spelling and grammar mistakes\n\n ### Input:\n He finnished his meal and left the resturant\n\n ### Response:\n He finished his meal and left the restaurant."
9,How did Julius Caesar die?,,Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n How did Julius Caesar die?\n\n ### Response:,"[[tensor(0), tensor(13866), tensor(338), tensor(385), tensor(15278), tensor(393), tensor(16612), tensor(263), tensor(3414), tensor(29889), tensor(14350), tensor(263), tensor(2933), tensor(393), tensor(7128), tensor(2486), tensor(1614), tensor(2167), tensor(278), tensor(2009), tensor(29889), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(2799), tensor(4080), tensor(29901), tensor(13), tensor(462), tensor(29871), tensor(1128), tensor(1258), tensor(23762), tensor(9243), tensor(26892), tensor(762), tensor(29973), tensor(13), tensor(13), tensor(462), tensor(29871), tensor(835), tensor(13291), tensor(29901)]]",Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n ### Instruction:\n How did Julius Caesar die?\n\n ### Response:\n Julius Caesar was assassinated on the Ides of March in 44 BC.


In [16]:
train_df_10["refine_response"] = train_df_10["response"].map(lambda x: x.split("### Response:")[1].strip())
refine_df = train_df_10[["instruction", "input", "refine_response"]]

In [17]:
pd.options.display.max_colwidth = 2000
refine_df

Unnamed: 0,instruction,input,refine_response
0,Give three tips for staying healthy.,,1. Eat a balanced diet.\n 2. Exercise regularly.\n 3. Get enough sleep.
1,What are the three primary colors?,,"The three primary colors are red, blue, and yellow."
2,Describe the structure of an atom.,,"An atom is composed of a nucleus, which is made up of protons and neutrons, and electrons, which are arranged in shells around the nucleus. The nucleus is made up of positively charged protons and uncharged neutrons. The electrons are negatively charged particles that orbit the nucleus."
3,How can we reduce air pollution?,,"We can reduce air pollution by switching to renewable energy sources, such as solar and wind power. We can also reduce our reliance on fossil fuels by using public transportation, carpooling, and biking. We can also reduce air pollution by planting trees and shrubs, which absorb carbon dioxide and other pollutants."
4,Pretend you are a project manager of a construction company. Describe a time when you had to make a difficult decision.,,"As a project manager of a construction company, I had to make difficult decisions on a regular basis. One example was when I had to decide whether to continue with a project that was running over budget. I had to weigh the pros and cons of continuing with the project, as well as the potential risks of not completing it. In the end, I decided to continue with the project, as I believed that the benefits of completing it outweighed the risks of not doing so."
5,"Write a concise summary of the following:\n""Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. \n\nAdditionally, the Commodore 64 dominated the market with between 30% and 40% share and 2 million units sold per year, outselling the IBM PC clones, Apple Computers, and Atari computers. Adding to their success, Sam Tramiel (former Atari president), during an interview in 1989, said they were building 400,000 C64s a month for a couple of years. """,,"The Commodore 64 (commonly known as the C64 or CBM 64) was manufactured by Commodore Business Machine (CBM) in August 1982 with a starting price of $595. It was an 8-bit home computer with remarkable market success. Between 1983-1986, C64 sales amounted to about 17 million units sold, making them the best-selling single personal computer model of all time in 1983-1986. Additionally, the Commodore"
6,Explain why the following fraction is equivalent to 1/4,4/16,The fraction 4/16 is equivalent to 1/4 because it can be simplified to 1/4 by dividing both numerator and denominator by 4.
7,Write a short story in third person narration about a protagonist who has to make an important career decision.,,"Once upon a time, there was a young man named John who had to make an important career decision. He had just graduated from college with a degree in computer science, but he wasn't sure if he wanted to pursue a career in that field. \n\nJohn had always been interested in business, but he was afraid to take the leap and pursue a career in that field. He was worried that he wouldn't be able to make enough money to support himself and his family. \n\nOne day, John decided to take a leap of faith and pursue a career in business. He"
8,Evaluate this sentence for spelling and grammar mistakes,He finnished his meal and left the resturant,He finished his meal and left the restaurant.
9,How did Julius Caesar die?,,Julius Caesar was assassinated on the Ides of March in 44 BC.


## KoAlpaca

In [18]:
import torch
import transformers
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import warnings
warnings.filterwarnings('ignore')

model_ckpt = "beomi/KoAlpaca"

KoAlpaca_tokenizer = LlamaTokenizer.from_pretrained(model_ckpt)
KoAlpaca_model = LlamaForCausalLM.from_pretrained(model_ckpt)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [19]:
PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context.\n"
        "아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.\n\n"
        "Write a response that appropriately completes the request.\n요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{instruction}\n\n### Input(입력):\n{input}\n\n### Response(응답):"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task.\n"
        "아래는 작업을 설명하는 명령어입니다.\n\n"
        "Write a response that appropriately completes the request.\n명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{instruction}\n\n### Response(응답):"
    ),
}

In [20]:
def gen(prompt, user_input=None, max_new_tokens=128, temperature=0.5):
    if user_input:
        x = PROMPT_DICT['prompt_input'].format(instruction=prompt, input=user_input)
    else:
        x = PROMPT_DICT['prompt_no_input'].format(instruction=prompt)
    print(f"prompt instruction : {x}")
    
    input_ids = KoAlpaca_tokenizer.encode(x, return_tensors="pt")
    print(input_ids)

    gen_tokens = KoAlpaca_model.generate(
        input_ids, 
        max_new_tokens=max_new_tokens, 
        num_return_sequences=1, 
        temperature=temperature,
        no_repeat_ngram_size=6,
        do_sample=True,
    )
    print(gen_tokens)
    
    gen_text = KoAlpaca_tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
    
    return gen_text.replace(x, '')

In [21]:
# Example usage:
prompt = "이 문장에 철자와 문법 오류가 있는지 평가하세요."
user_input = "그는 식사를 마치고 식당을 나섰습니다."
generated_text = gen(prompt, user_input)
print(generated_text)

prompt instruction : Below is an instruction that describes a task, paired with an input that provides further context.
아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.

Write a response that appropriately completes the request.
요청을 적절히 완료하는 응답을 작성하세요.

### Instruction(명령어):
이 문장에 철자와 문법 오류가 있는지 평가하세요.

### Input(입력):
그는 식사를 마치고 식당을 나섰습니다.

### Response(응답):
tensor([[    2, 13866,   338,   385, 15278,   393, 16612,   263,  3414, 29892,
          3300,  2859,   411,   385,  1881,   393,  8128,  4340,  3030, 29889,
            13, 30860,   238,   161,   155, 31081, 29871,   239,   161,   148,
           239,   154,   136, 31286, 29871,   239,   135,   167, 31976, 30944,
         31081, 29871, 31976,   238,   163,   188, 31129,   239,   156,   131,
         29871,   239,   185,   151, 30903,   239,   163,   132, 29871,   238,
           170,   168,   238,   160,   192, 31286, 29871, 31306, 31334, 30944,
         31081, 29871,   239,   161,   136,   238,   163,   168, 30393, 29871,
       