# Install Required Libraries

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets einops

# Log in to Hugging Face

In [None]:
from huggingface_hub import notebook_login

notebook_login()

# Load & Quantize the model

In [None]:
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# model_id = "Hieu-Pham/Llama-2-7B-QLoRA-cooking-300-merged"

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16
# )

# tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token = tokenizer.eos_token
# model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "Hieu-Pham/Llama2-7B-IA3-cooking-text-gen-merged"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":0}, torch_dtype=torch.bfloat16)

# Load and preprocessing the dataset

In [None]:
from datasets import load_dataset

# Load test dataset
test_data = load_dataset("Hieu-Pham/cooking_squad_splitted", split='test')

# Preprocessing test and validation dataset
def group_test_data(dataset):

    grouped = []
    ref_answers = []

    for row in dataset:
      #input = "Question: " + row['question'] + " Context: " + row['context'] + " Answer: "
      input = "Question: " + row['question'] + " Context: " + row['context'] + " Answer: "

      grouped.append(input)

      ref_answer = row['answers']['text']
      ref_answers.append(ref_answer)

    return grouped, ref_answers

# Test dataset
output_column, reference = group_test_data(test_data)
test_data = test_data.add_column('Outputs', output_column)

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/157k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/155k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
from transformers import pipeline
from transformers import StoppingCriteria, StoppingCriteriaList

stop_token_ids = tokenizer.convert_tokens_to_ids(["\n", "#", "\\", "`", "###", "##", "Question", "Comment"])

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    stopping_criteria=stopping_criteria,
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    top_p=0.15,  # select from top tokens whose probability add up to 15%
    top_k=0,  # select from top 0 tokens (because zero, relies on top_p)
    max_new_tokens=100,  # max number of tokens to generate in the output
    repetition_penalty=1.1 # without this output begins repeating
)

test = "Question: How many eggs are there in the basket? Context: There are 2 eggs in the basket. Answer: "

outputs = []

for out in pipe(test_data['Outputs']):
  outputs.append(out)

In [None]:
outputs

In [None]:
import pandas as pd

prediction = []

for row in outputs:
  prediction.append(row[0]['generated_text'])



In [None]:
replace_list = ['\\', '\n', '`', '#', 'Comment']

formatted_prediction = []

for row in prediction:
  for replace in replace_list:
    row = row.replace(replace, '')

  formatted_prediction.append(row)




In [None]:
formatted_prediction

['3 1/2 c. (8 oz.) large shells (uncooked)',
 '3 hours',
 '4 (1 oz.) slices Canadian bacon',
 '1/2 c.',
 '15 minutes',
 '3 eggs, 1 c. oil, 1 1/2 c. honey, 2 tsp. vanilla, 2 c. shredded zucchini, 1 c. unsweetened crushed pineapple, 1 c. raisins, 2 c. whole wheat flour, 3/4 c. unbleached flour, 2 tsp. soda, 1 tsp. salt, ',
 '1 (14 oz.) can evaporated skim milk, 1/2 c. vinegar, 1 envelope (about 1 1/2 oz.) onion soup mix, 1/3 c. catsup, 1/2 tsp. Worcestershire sauce, 1/4 tsp. red pepper sauce',
 '1. Mix all ingredients except the ketchup.2. Put mixture in a large iron skillet.3. Spoon ketchup over top.4. Bake, uncovered, in a 350° oven for 1 hour and 15 minutes.',
 '2 lb. ground meat',
 '3 layers',
 '1/3 cup of oil',
 '8 minutes',
 '8 minutes',
 '1. Cook all ingredients in 1 1/2-quart saucepan over medium heat for 4 to 5 minutes, stirring vigorously, until mixture forms a ball.2. Remove dough from saucepan and let stand on counter 5 minutes.3. Knead dough about 30 seconds or until smooth 

In [None]:
df = pd.DataFrame(formatted_prediction)
df.to_csv('prediction.csv')

In [None]:
reference

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score, accuracy_score

binarizer = MultiLabelBinarizer()

binarizer.fit(reference)

# F1 Score
f1_score(binarizer.transform(reference),
         binarizer.transform(formatted_prediction),
         average='samples')

# Accuracy Score
accuracy_score(reference, formatted_prediction)



0.7141221837959175