In [1]:
%cd ..

/workspaces/Chatchat_AIMeng/src


In [5]:
from datasets import load_from_disk
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments, Trainer
import torch
import os
## Downloading Model 
from utils.constants import *
from utils.util_funcs import *
from sentence_transformers import SentenceTransformer
from transformers import T5Tokenizer


In [3]:
generate_prompt("What is Nvidia?")

'You are a Question-Answering assistant. According to the following question:\n\nWhat is Nvidia?\n\nHere are some good answers:\n'

In [4]:
SAVE_MODEL_PATH = os.path.join(MODEL_DIR, 'save')


In [8]:
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained(SAVE_MODEL_PATH).to(DEVICE)
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
# tokenizer = AutoTokenizer.from_pretrained(SAVE_MODEL_PATH)
clear_gpu_memory()

# Result is wonderful!

In [9]:
generate_response(model=finetuned_model, tokenizer=tokenizer, prompt="What is the purpose of AmpMe's ""Predictive Sync"" technology in the context of music streaming?")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.


'ampMes predictive sync technology enhances music streaming by providing accurate synchronization enabling friends to enjoy music together without manual syncing'

# Compare with original model

In [10]:
original_model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_PATH).to(DEVICE)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
clear_gpu_memory()

In [11]:
generate_response(model=original_model, tokenizer=tokenizer,  prompt="What is the purpose of AmpMe's ""Predictive Sync"" technology in the context of music streaming?")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.


'enables a user to select the music they want to listen to'

In [12]:
import pandas as pd


test_df = pd.read_csv('data/processed/test.csv')
test_df

Unnamed: 0,question,answer
0,what are some advantages of using simtstd libr...,the simtstd library provides a way to add cuda...
1,what is the significance of writing scalar pro...,writing scalar programs in the cuda programmin...
2,what is the use of the v and vv flags when sta...,the v flag is used to enable info logging and ...
3,what is the impact of unified memory on develo...,unified memory automatically manages large dat...
4,what is the purpose of the real arrays x and y...,the real arrays x and y in the host code are t...
...,...,...
1417,what are the prerequisites for running cuda pr...,to run cuda programs a system needs a compatib...
1418,how do cudagdb and nsight compute debugger imp...,the cudagdb and nsight compute debugger can di...
1419,what is the primary focus of staca2 benchmarks,the primary focus of staca2 benchmarks is to r...
1420,what happens when the repoprecacheexts tool is...,when the repoprecacheexts tool is run with the...


In [16]:
res_df = test_df[0:100]

In [17]:
res_df['generated_answer_original'] = res_df['question'].apply(lambda x: generate_response(model=original_model, tokenizer=tokenizer, prompt=generate_prompt(x)))
res_df['generated_answer_finetuned'] = res_df['question'].apply(lambda x: generate_response(model=finetuned_model, tokenizer=tokenizer, prompt=generate_prompt(x)))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.
The attentio

In [18]:
res_df.to_csv('data/output/compared_results.csv', index=False)

In [19]:
res_df

Unnamed: 0,question,answer,generated_answer_original,generated_answer_finetuned
0,what are some advantages of using simtstd libr...,the simtstd library provides a way to add cuda...,a).,simtstd library offers preoptimized functions ...
1,what is the significance of writing scalar pro...,writing scalar programs in the cuda programmin...,arithmetic,writing scalar programs in the cuda programmin...
2,what is the use of the v and vv flags when sta...,the v flag is used to enable info logging and ...,a tool for detecting and detecting a car's engine,the v and vv flags are used to indicate which ...
3,what is the impact of unified memory on develo...,unified memory automatically manages large dat...,a).,unified memory makes data science more accessi...
4,what is the purpose of the real arrays x and y...,the real arrays x and y in the host code are t...,arithmetic,the real arrays x and y are used in the host c...
...,...,...,...,...
95,how does cuda support data transfer between ho...,cuda provides mechanisms for data transfer bet...,a).,cuda supports data transfer by allowing device...
96,what improvements are made to nvjpeg in cuda 120,nvjpeg in cuda 120 has an improved implementat...,a).,cuda 120 adds nvjpeg to its library to enhance...
97,what is the purpose of the jacobi iteration in...,the purpose of the jacobi iteration is to comp...,a way to make a joke,the jacobi iteration is used to determine the ...
98,how did the researchers use machine learning i...,the researchers utilized machine learning incl...,a computer program,the researchers used machine learning to train...


# Compute Similarity

In [21]:
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

In [22]:
embedded_correct_ans = sentence_model.encode(res_df['answer'].tolist())
embedded_generated_ans = sentence_model.encode(res_df['generated_answer_finetuned'].tolist())

In [25]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [26]:

res_df['cosine_similarity_score'] = [cosine_similarity(embedded_correct_ans[i], embedded_generated_ans[i]) for i in range(len(res_df))]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  res_df['cosine_similarity_score'] = [cosine_similarity(embedded_correct_ans[i], embedded_generated_ans[i]) for i in range(len(res_df))]


In [27]:
res_df

Unnamed: 0,question,answer,generated_answer_original,generated_answer_finetuned,cosine_similarity_score
0,what are some advantages of using simtstd libr...,the simtstd library provides a way to add cuda...,a).,simtstd library offers preoptimized functions ...,0.843124
1,what is the significance of writing scalar pro...,writing scalar programs in the cuda programmin...,arithmetic,writing scalar programs in the cuda programmin...,0.998549
2,what is the use of the v and vv flags when sta...,the v flag is used to enable info logging and ...,a tool for detecting and detecting a car's engine,the v and vv flags are used to indicate which ...,0.783109
3,what is the impact of unified memory on develo...,unified memory automatically manages large dat...,a).,unified memory makes data science more accessi...,0.679016
4,what is the purpose of the real arrays x and y...,the real arrays x and y in the host code are t...,arithmetic,the real arrays x and y are used in the host c...,0.682479
...,...,...,...,...,...
95,how does cuda support data transfer between ho...,cuda provides mechanisms for data transfer bet...,a).,cuda supports data transfer by allowing device...,0.819952
96,what improvements are made to nvjpeg in cuda 120,nvjpeg in cuda 120 has an improved implementat...,a).,cuda 120 adds nvjpeg to its library to enhance...,0.717391
97,what is the purpose of the jacobi iteration in...,the purpose of the jacobi iteration is to comp...,a way to make a joke,the jacobi iteration is used to determine the ...,0.768952
98,how did the researchers use machine learning i...,the researchers utilized machine learning incl...,a computer program,the researchers used machine learning to train...,0.420088


In [28]:
res_df.to_csv('data/output/compared_results_with_score.csv', index=False)

# Other metric for evaluation
Ref: https://colab.research.google.com/drive/1tJGP-Oe_B_lH2EaiD3sNGBoIUv8tpJdO?usp=sharing#scrollTo=BjYz9DvaXgi9

1. Perplexity
- First things first, perplexity is limited to autoregressive (CausalLM) models. That does restrict its usefulness, but not tremendously!
- Intuitively, AutoModelForSeq2SeqLM is used for language models with encoder-decoder architecture, like T5 and BART, while AutoModelForCausalLM is used for auto-regressive language models like all the GPT models.

We can not use it in this case!!!


2. Human or AI Evaluation
Now, let's get into how we could compare the actual final production of the model - with human or AI supervision! The idea here is that we ask the model to perform a task - and then get some kind of results from a human being. This method similarly comes with some pros and cons:

Pros:
- Should provide excellent feedback on wether or not your model is performing as expected
Cons:
- Extremely expensive
- Since we're going to be leveraging AI in this example, you will need an OpenAI API key!

In [None]:
# from evaluate import load
# model_id = '0xhzx/nv-qa'
# perplexity = load("perplexity", module_type="metric")
# results = perplexity.compute(predictions=test_text, model_id=model_id)

# Use HF evaluate package for good evaluation

In [36]:
# from datasets import load_dataset
# from evaluate import evaluator
# from transformers import AutoModelForSequenceClassification, pipeline

# data = load_dataset("ajsbsd/nvidia-qa", split="train").shuffle(seed=42).select(range(100))
# task_evaluator = evaluator("text2text-generation")

# model = AutoModelForSequenceClassification.from_pretrained("0xhzx/nv-qa")
# pipe = pipeline("text2text-generation", model="0xhzx/nv-qa")


Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at 0xhzx/nv-qa and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


In [38]:
# eval_results = task_evaluator.compute(
#     model_or_pipeline="0xhzx/nv-qa", # Pass a model name or path
#     # model_or_pipeline=model,  # Pass an instantiated model
#     # model_or_pipeline=pipe,   # Pass an instantiated pipeline 
#     data=data,
#     input_column  = 'answer'
# )
# print(eval_results)

ValueError: Invalid `label_column` label specified. The dataset contains the following columns: ['Unnamed: 0', 'question', 'answer'].