## Load fine-tuned model

In [4]:
import sys, os
BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"
OUTPUT_DIR = os.getcwd()+"/../../models/rlhf_step1_sft/"
device_map="auto"


In [10]:
from peft import PeftModel, PeftConfig
config = PeftConfig.from_pretrained(OUTPUT_DIR)

In [13]:
from transformers import AutoTokenizer
import sys, os
sys.path.append(os.getcwd()+"/../../")
from pykoi.chat.llm.instruct_pipeline import END_KEY, INSTRUCTION_KEY, RESPONSE_KEY, INTRO_BLURB
tokenizer_ft = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer_ft.pad_token = tokenizer_ft.eos_token
tokenizer_ft.add_special_tokens({"additional_special_tokens": [END_KEY, INSTRUCTION_KEY, RESPONSE_KEY+"_NL"]})

3

In [15]:
from transformers import AutoModelForCausalLM
model_ft = AutoModelForCausalLM.from_pretrained(  
    config.base_model_name_or_path,  
    #load_in_8bit=True,
    device_map=device_map,
)

Loading checkpoint shards: 100%|██████████| 2/2 [01:49<00:00, 54.80s/it]


In [16]:
model_ft = PeftModel.from_pretrained(
    model_ft, 
    OUTPUT_DIR, 
    device_map=device_map,
)

model_ft.resize_token_embeddings(len(tokenizer_ft))

Embedding(32003, 4096)

## Load data

In [26]:
INPUT_FILE = os.getcwd()+'/../../data/chapter22_valfromseed_data_processed.csv'
OUTPUT_FILE = os.getcwd()+'/../../data/d2lai_selfinstruct_mistral7b-loraft_1123.csv'

In [28]:
import pandas as pd
import time
import csv

df_results = pd.read_csv (INPUT_FILE)
prompt_list = df_results.instruction.values.tolist()
reference_list = df_results.response.values.tolist()

In [30]:

len(prompt_list)


660

## Create instruct pipeline


In [34]:
from pykoi.chat.llm.instruct_pipeline import InstructionTextGenerationPipeline
import time
response_list = []
question_list = []
generate_text = InstructionTextGenerationPipeline(model=model_ft, tokenizer=tokenizer_ft)

st = time.time()

i=0
for question in prompt_list:
    #print(i," ",end = '')
    
    print(i,"-",question, end='')
    res = generate_text(question)
    response = [res[0]["generated_text"]]
    question_list.append(question)
    response_list.append(response)
    i = i+1

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

0 - What does linear algebra study?1 - Can you explain the concepts of linear algebra?2 - What are the two ways to visualize vectors in geometry?3 - How can vectors be represented using lines and arrows in geometry?4 - How do column vectors differ from row vectors?5 - Can you explain the distinction between column and row vectors?6 - What is the process of representing vectors as points in a three-dimensional space?7 - Can you explain how to graph vectors using coordinate systems and point representations?8 - Can you represent vectors using arrows on a flat surface?9 - How can vectors be depicted as arrows within a two-dimensional space?10 - What is the process of representing vector addition graphically?



11 - How can vectors be depicted when added together using diagrams?12 - What is the process of representing vector subtraction visually?13 - How can vectors be depicted in a way that shows their difference when they are subtracted?14 - How do you calculate the inner product of two row matrices?15 - What is the formula for finding the product of two vectors with their components aligned vertically?16 - How does the dot product of two vectors relate to their magnitudes and directions in geometry?17 - Can you explain how the dot product of two vectors can be represented graphically using points and lines?18 - How do you calculate the dot product of two vectors using a formula?19 - Can you explain the formula used to determine the dot product between two vectors on page 3?20 - What is the formula to determine the angle between two vectors using their dot product and magnitudes?21 - Can you explain how to compute the angle between two vectors based on their dot product and lengths?22 - Wha

KeyboardInterrupt: 

In [None]:
model_gen = InstructionTextGenerationPipeline(model=model_ft, tokenizer=tokenizer_ft)

## Evaluation with Rouge score

In [19]:
!pip install rouge_score
from rouge_score import rouge_scorer

import evaluate
rouge = evaluate.load('rouge')

import numpy as np

Collecting rouge_score
  Using cached rouge_score-0.1.2-py3-none-any.whl
Collecting absl-py (from rouge_score)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting nltk (from rouge_score)
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Downloading absl_py-2.1.0-py3-none-any.whl (133 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.7/133.7 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nltk, absl-py, rouge_score
Successfully installed absl-py-2.1.0 nltk-3.8.1 rouge_score-0.1.2


Downloading builder script: 100%|██████████| 6.27k/6.27k [00:00<00:00, 15.9MB/s]


In [20]:
def calculate_rouge(pred_list,ref_list):
    
    rouge_results = rouge.compute(predictions=pred_list,
                         references=ref_list,
                         use_aggregator=True)
    avg_rougeLsum = np.mean(rouge_results["rougeLsum"])
    avg_rougeL = np.mean(rouge_results["rougeL"])
    avg_rouge2 = np.mean(rouge_results["rouge2"])
    avg_rouge1 = np.mean(rouge_results["rouge1"])
    
    print("Average rouge score - ", avg_rougeLsum, avg_rougeL, avg_rouge2, avg_rouge1)
    
    return 

In [21]:
ref_list = reference_list
response_list = response_list

NameError: name 'reference_list' is not defined

## Evaluation with semantic similarity score

In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd

In [None]:
model = SentenceTransformer('thenlper/gte-large')    # use this one in production


In [None]:
def calculate_semantic_sim(pred_list,ref_list):
    
    sem_score = []
    average_sem_sim = 0
    
    for i in range(len(ref_list)):
        
        ref_embedding = model.encode(ref_list[i])
        pred_embedding = model.encode(pred_list[i])
        cos_sim = util.cos_sim(ref_embedding, pred_embedding)
        #print(cos_sim[0][0].item())
        
        sem_score.append(cos_sim[0][0].item())
    
    average_sem_sim = np.mean(sem_score)   
    
    #print("Average similarity: ", average_sem_sim)
    
    return average_sem_sim

In [None]:
avg_score = calculate_semantic_sim(reference_list,response_list)
avg_score