## Setup Environment

In [None]:
%%capture
!pip install langchain
!pip install langchain-hub
!pip install langchain-community langchain-huggingface
!pip install huggingface_hub transformers
!pip install sentence_transformers==2.2.2
!pip install chromadb faiss accelerate
!pip install -U bitsandbytes
!pip install tiktoken python-dotenv
!pip install faiss-gpu
!pip install InstructorEmbedding docarray

## Import Modules

In [None]:
# llm modules
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from transformers import GenerationConfig
from langchain.llms import HuggingFacePipeline

# chain
from langchain.chains import ConversationChain
from langchain.chains import LLMChain

# prompt modules
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate

# base modules
import os
import torch
import warnings

warnings.filterwarnings("ignore")

## LLM

In [None]:
model_name = "minkhantycc/Llama-2-7b-chat-finetune-quantized"
device_map = {"": 0}

# bnb config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Generation Configuration
base_model.generation_config = GenerationConfig(
    max_new_tokens = 256,
    temperature = 0.01,
    repetition_penalty = 1.15,
    do_sample = False,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.eos_token_id,
)

# pipeline
pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    device_map=device_map,
    return_full_text=False
)

# llm
llm = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## CSV Loader, Vector Store, and Retrieval QA Chain

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

# Load CSV
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

# embedding
embedding = HuggingFaceInstructEmbeddings(
            model_name="BAAI/bge-large-en-v1.5",
            model_kwargs={'device': "cuda"},
            encode_kwargs={'normalize_embeddings': True}
)

# Vector Store
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embedding,
).from_loaders([loader])

load INSTRUCTOR_Transformer
max_seq_length  512


In [None]:
# QA prompt template
prompt_template = """<s>[INST]<<SYS>>Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.<<SYS>>
{context}
User's question: {question}
[/INST]
"""
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["question", "context"]
)


# QA Chain
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>",
        "prompt": prompt
    }
)

## Evaluation of LLM Chains

### Question-Answer Pairs Generation to Evaluate LLM

In [None]:
from langchain.evaluation.qa import QAGenerateChain

# QA Generation template
qa_generate_template = """<s>[INST]<<SYS>>You are a teacher coming up with questions to ask on a quiz.
Given the following document, please generate a question and answer based on that document.

Example Format:
<Begin Document>
...
<End Document>
QUESTION: question here
ANSWER: answer here

These questions should be detailed and be based explicitly on information in the document. Begin!

<Begin Document>
{doc}
<End Document>
<<SYS>>[/INST]"""

qa_generate_prompt = PromptTemplate(
    template=qa_generate_template,
    input_variables=["doc"]
)

# create chain
example_gen_chain = QAGenerateChain(llm=llm, prompt=qa_generate_prompt)

In [None]:
# Hard-coded examples
examples = [
    {
        "qa_pairs": {
            "query": "Do the Cozy Comfort Pullover Set\
            have side pockets?",
            "answer": "Yes"
        }
    },
    {
        "qa_pairs": {
            "query": "What collection is the Ultra-Lofty \
            850 Stretch Down Hooded Jacket from?",
            "answer": "The DownTek collection"
      }
    }
]

# llm generated examples
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

# Combine examples
examples += new_examples

new_examples[0]

{'qa_pairs': {'query': "What is the name of this women's shoe brand?",
  'answer': 'The name of this women\'s shoe brand is "Women\'s Campside Oxfords". '}}

In [None]:
# reformat into List of a dictionary
egs = []
for ex in examples:
  egs.append({"query": ex['qa_pairs']['query'], "answer": ex['qa_pairs']['answer']})
egs

[{'qa_pairs': {'query': 'Do the Cozy Comfort Pullover Set            have side pockets?',
   'answer': 'Yes'}},
 {'qa_pairs': {'query': 'What collection is the Ultra-Lofty             850 Stretch Down Hooded Jacket from?',
   'answer': 'The DownTek collection'}},
 {'qa_pairs': {'query': "What is the name of this women's shoe brand?",
   'answer': 'The name of this women\'s shoe brand is "Women\'s Campside Oxfords". '}},
 {'qa_pairs': {'query': 'What is the name of this product?',
   'answer': 'The name of this product is "Recycled Waterhog Dog Mat."'}},
 {'qa_pairs': {'query': 'What is the name of the product being described?',
   'answer': 'According to the document, the name of the product being described is "Infant and Toddler Girls\' Coastal Chill Swimsuit, Two-Piece". '}},
 {'qa_pairs': {'query': 'What percentage of the fabric used in this product is made from recycled materials?',
   'answer': 'According to the provided document, 82% of the fabric used in this product is made fro

In [None]:
# check our RetrievalQA Chain can answer the query
qa_stuff.run(egs[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The Cozy Comfort Pullover Set does not have side pockets. '

### Manual Evaluation

In [None]:
# import langchain
# langchain.debug = True

qa_stuff.run(egs[0]["query"])

# langchain.debug = False

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set            have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set            have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nA

'The Cozy Comfort Pullover Set does not have side pockets. '

### LLM Assisted Evaluation

In [None]:
predictions = qa_stuff.apply(egs)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [None]:
from langchain.evaluation.qa import QAEvalChain

# evaluation template
eval_str = """<s>[INST]<<SYS>>
You are a teacher grading a quiz.
You are given a question, the student's answer, and the true answer,
and are asked to score the student answer as either CORRECT or INCORRECT.

Example Format:
QUESTION: question here
STUDENT ANSWER: student's answer here
TRUE ANSWER: true answer here
GRADE: CORRECT or INCORRECT here

Grade the student answers based ONLY on their factual accuracy.
Ignore differences in punctuation and phrasing between the student answer and true answer.
It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements.
Begin!<</SYS>>

QUESTION: {query}
STUDENT ANSWER: {result}
TRUE ANSWER: {answer}[/INST]
GRADE:"""

eval_template = PromptTemplate(
    input_variables=['answer', 'query', 'result'],
    template=eval_str,
)

# evaluation chain
eval_chain = QAEvalChain.from_llm(llm, prompt=eval_template)
graded_outputs = eval_chain.evaluate(egs, predictions)

graded_outputs[0]

{'results': ' CORRECT\n '}

### Performance Results

In [None]:
for i, eg in enumerate(egs):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set            have side pockets?
Real Answer: Yes
Predicted Answer: The Cozy Comfort Pullover Set does not have side pockets. 
Predicted Grade:  CORRECT
 

Example 1:
Question: What collection is the Ultra-Lofty             850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is part of the DownTek Collection. 
Predicted Grade:  CORRECT

The correct answer is "The DownTek collection". This is because the student answer correctly identifies the collection that the jacket belongs to. 

Example 2:
Question: What is the name of this women's shoe brand?
Real Answer: The name of this women's shoe brand is "Women's Campside Oxfords". 
Predicted Answer: The name of this women's shoe brand is "No-Wrinkle".
Predicted Grade:  INCORRECT

The correct answer for the question is "Women's Campside Oxfords", which differs from the student answer provided. Therefore, I 