In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DefaultDataCollator, TrainingArguments, Trainer, BitsAndBytesConfig, GenerationConfig
from datasets import load_dataset
import torch
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
import evaluate
import pandas as pd
import numpy as np
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# !pip install langchain==0.0.335

In [4]:
# !pip install openai==1.3.0
# !pip install openai==0.28

In [5]:
model_id = "mistralai/Mistral-7B-v0.1"

## Dataset

In [6]:
# new data 2024_04_05
train_dataset = load_dataset("csv", data_files="./total_data_2023_04_05.csv", split="train[:90%]")
eval_dataset = load_dataset("csv", data_files="./total_data_2023_04_05.csv", split="train[90%:]")

In [7]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=1024,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

def generate_prompt(data_point):
    full_prompt = f"""<s>[INST]{data_point['instruction']}
    {f"Here is some context: {data_point['context']}" if len(data_point["context"]) > 0 else None}
    [/INST] {data_point['response']}
    </s>"""
    return {"text": full_prompt}

# def generate_prompt_two(data_point):
#     full_prompt = f"""<s>[INST]### Instruction:{data_point['instruction']}
#     {f"\n\n Here is some context: ### Input:" {data_point['context']}" if len(data_point["context"]) > 0 else None}
#     [/INST]\n\n completion: {data_point['response']}
#     </s>"""
#     return {"text": full_prompt}

In [8]:
def formatting_func(example):
  if example.get("context", "") != "":
      input_prompt = (f"Below is an instruction that describes a task, paired with an input that provides further context. "
      "Write a response that appropriately completes the request.\n\n"
      "### Instruction:\n"
      f"{example['instruction']}\n\n"
      f"### Input: \n"
      f"{example['context']}\n\n"
      f"### Response: \n"
      f"{example['response']}")

  else:
    input_prompt = (f"Below is an instruction that describes a task. "
      "Write a response that appropriately completes the request.\n\n"
      "### Instruction:\n"
      f"{example['instruction']}\n\n"
      f"### Response:\n"
      f"{example['response']}")

  return {"text" : input_prompt}

In [9]:
generated_train_dataset = train_dataset.map(formatting_func, remove_columns=list(train_dataset.features))
generated_eval_dataset = eval_dataset.map(formatting_func, remove_columns=list(eval_dataset.features))

## Inference

### Base model

In [10]:
model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    # quantization_config = bnb_config,
    device_map="auto",
    # trust_remote_code=True,
    return_dict=True,
    torch_dtype=torch.float16,
    # use_auth_token=True
)
eval_tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    # add_bos_token=True,
    trust_remote_code=True,
    padding_size="left"
)
eval_tokenizer.pad_token = eval_tokenizer.eos_token
# eval_tokenizer.add_eos_token=True
# eval_tokenizer.add_bos_token, eval_tokenizer.add_eos_token

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.95s/it]


In [11]:
def inference(instruction, context = None):
  if context:
    prompt = f"Below is an instruction that describes a task, paired with an input that provides further context.\n\n### Instruction: \n{instruction}\n\n### Input: \n{context}\n\n### Response: \n"
  else:
    prompt = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction: \n{instruction}\n\n### Response: \n"
  
  # model_input = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
  input_ids = eval_tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  base_model.eval()
  with torch.no_grad():
      return eval_tokenizer.decode(base_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=50, num_beams=1, pad_token_id=2))[0], 
                            skip_special_tokens=True)


In [11]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in eval_dataset:
    out = inference(data['instruction'], data['context'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out.split('Response: \n')[-1])

In [12]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)

{'rouge1': 0.07218230010375547, 'rouge2': 0.041823822482589236, 'rougeL': 0.0593621444294312, 'rougeLsum': 0.05981011588193556}


### Finetuned model

In [13]:
ft_model = PeftModel.from_pretrained(base_model, "./peft-training-1712523322/checkpoint-1005", is_trainable=False, torch_dtype=torch.bfloat16)

In [14]:
def inference(model, instruction , context = None):
  if context:
    prompt = f"Below is an instruction that describes a task, paired with an input that provides further context.\n\n### Instruction: \n{instruction}\n\n### Input: \n{context}\n\n### Response: \n"
  else:
    prompt = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction: \n{instruction}\n\n### Response: \n"
  
  # model_input = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
  input_ids = eval_tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  model.eval()
  with torch.no_grad():
      return eval_tokenizer.decode(model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=50, num_beams=1, pad_token_id=2))[0], 
                            skip_special_tokens=True)


In [15]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in eval_dataset:
    out = inference(ft_model, data['instruction'], data['context'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out.split('Response: \n')[-1])

In [16]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)

{'rouge1': 0.37572347736798006, 'rouge2': 0.2162117457770065, 'rougeL': 0.3064643618441903, 'rougeLsum': 0.30663628941332355}


## OpenAI Model

In [35]:
# !pip install --upgrade openai
# !pip install -U openai
# !pip install openai
# !pip install openai==0.28

In [36]:
import os
from openai import OpenAI
# import openai

In [37]:
import sys
import os
import openai
from dotenv import load_dotenv, find_dotenv

def setup():
    sys.path.append('./content')
    os.environ["OPENAI_API_KEY"] = ''
    _ = load_dotenv(find_dotenv())
    openai.api_key  = os.environ['OPENAI_API_KEY']
setup()

In [38]:
data_sample = next(iter(eval_dataset))
data_sample['instruction']

' How can I reduce my risk of developing complications from an infection during pregnancy?\n\n'

In [39]:
client = OpenAI()

def inference_openai(instruction , context = None):
    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=[
        {"role": "system", "content": "Below is an instruction that describes a task, paired with an input that provides further context."},
        {"role": "user", "content": instruction + ' '+ context}
      ]
    )
    return completion.choices[0].message.content

In [16]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in eval_dataset:
    out = inference_openai(data['instruction'], data['context'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out.split('Response: \n')[-1])

In [17]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)

{'rouge1': 0.41020469809302945, 'rouge2': 0.22515338947438995, 'rougeL': 0.3063812409360829, 'rougeLsum': 0.31903575248890903}


## RAG

In [40]:
from langchain.document_loaders import PyPDFLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.embeddings import LlamaCppEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
# from llamaapi import LlamaAPI

# from langchain.llms import LlamaCPP
from langchain.llms import HuggingFaceHub
from langchain.chains import LLMChain
import os

from langchain.llms import LlamaCpp  
from langchain.prompts import PromptTemplate  
from langchain.chains import LLMChain  
from langchain.callbacks.manager import CallbackManager  
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 
from os import path
import openai

In [41]:
output = {}
output['context'] = []
output['response'] = []

for data in eval_dataset:
    output['context'].append(data['context'])
    output['response'].append(data['response'])


In [22]:
df = pd.DataFrame.from_dict(output)
df.head()

Unnamed: 0,context,response
0,spore) is causing the infection • Whether you ...,By taking proactive measures such as frequent...
1,of sperm with morpho - logic abnormalities [16...,Fathers play a significant role in shaping th...
2,"normal from abnormal,soobservationstendtobecat...",Low birth weight (LBW) refers to infants who ...
3,washcloth on your face helps ease tension and ...,Using a birthing ball can help decrease disco...
4,of these studies was that of Chanarin and Roth...,The study by Chanarin and Rothman suggests th...


In [23]:
df.to_csv('nutrition_pred_04_05.csv')

In [24]:
loader = CSVLoader(file_path='nutrition_pred_04_05.csv', source_column="context")
data = loader.load()

In [25]:
data[0]

Document(page_content=': 0\ncontext: spore) is causing the infection • Whether you have antibodies to the organism from a prior exposure • Whether the disease is treatable • When during pregnancy you acquired the infection Even if you get an infection during pregnancy, your baby might not become infected—and even if your baby gets infected, he might not be harmed. The chart on pages 132–133 identiﬁes infections that are harmful during pregnancy, and the following sections provide information on the most serious of them. Ways to Avoid Getting Sick The best way to prevent complications from an infection is to avoid getting sick. Here are a few guidelines to follow: 1. Wash your hands several times each day, especially before eating and after using the toilet. Germs live on doorknobs, handrails, phones, hands, and other surfaces. After touching a germ-covered surface with your hands, you transmit the germs to your food, mouth, nose, and anything else you touch. 2. Stay away from sick peop

In [26]:
len(data)

1805

In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
splits = text_splitter.split_documents(data)

In [28]:
persist_directory = 'docs/chroma/'

In [29]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embeddings
)
print(vectordb._collection.count())

3031


In [30]:
vectordb.persist()

In [42]:
def get_prediction(llm, vectordb, question):
    template_2 = """Below is an instruction that describes a task, paired with an input that provides further context.
        \n\n###Instruction: {question}
        \n\n### Input: {context}
        \n\n### Response: \n"""
   
    QA_CHAIN_PROMPT_2 = PromptTemplate.from_template(template_2)
    qa_chain_2 = RetrievalQA.from_chain_type(
        llm,
        retriever=vectordb.as_retriever(),
        return_source_documents=True,
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT_2}
    )
    result_p_2= qa_chain_2({"query":question})
    # return result_p_2['result']
    return result_p_2

In [43]:
next(iter(generated_train_dataset))

{'text': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n How can I tell if my baby is getting enough movement in the womb?\n\n\n### Input: \nIf you have bright red bleeding of more than a spot or two at any time this month, call your care provider right away . It could be a sign of placental abruption, a serious problem in which your placenta separates from the wall of your uterus. This condition is a medical emer gency . However , try not to confuse this kind of bleeding with the slight bleeding you may have after a pelvic exam or with the blood and mucus you may see as the cervix thins. Constant, severe abdominal pain If you have constant, severe abdominal pain, contact your care provider immediately . Although uncommon, this can be another sign of placental abruption. If you also have a fever and vaginal dischar ge along with the pain, you may have an 

In [44]:
llm_name='gpt-3.5-turbo'
llm = OpenAI(model_name=llm_name)
question = "When did Virgin Australia start operating?"
result = get_prediction(llm, vectordb, question)



AttributeError: module 'openai' has no attribute 'error'

In [None]:
result['result']

In [None]:
llm_name='gpt-3.5-turbo'
llm = ChatOpenAI(model_name=llm_name, temperature=0)

template_2 = """Below is an instruction that describes a task, paired with an input that provides further context.
        \n\n###Instruction: {question}
        \n\n### Input: {context}
        \n\n### Response: \n"""

QA_CHAIN_PROMPT_2 = PromptTemplate.from_template(template_2)
qa_chain_2 = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT_2}
)
question_p_2 = "When did Virgin Australia start operating?"
result_p_2= qa_chain_2({"query":question_p_2})
result_p_2['result']

#### Using Mistral with RAG

In [None]:
from langchain import HuggingFaceHub

model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config = bnb_config,
    device_map="auto",
    # trust_remote_code=True,
    return_dict=True,
    torch_dtype=torch.float16,
    # use_auth_token=True
)
eval_tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    # add_bos_token=True,
    trust_remote_code=True,
    padding_size="left"
)
eval_tokenizer.pad_token = eval_tokenizer.eos_token

In [None]:
import transformers
generate_text = transformers.pipeline(
    model=base_model, 
    tokenizer=eval_tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    # stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [None]:
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

In [None]:
question = "When did Virgin Australia start operating?"
result = get_prediction(llm, vectordb, question)

In [None]:
result['result'].split('\nresponse:')[-1]

In [None]:
output = {}
output['instruction'] = []
output['context'] = []
output['response'] = []
output['model_output'] = []
for data in eval_dataset:
    out = get_prediction(llm, vectordb, data['instruction'])
    output['instruction'].append(data['instruction'])
    output['context'].append(data['context'])
    output['response'].append(data['response'])
    output['model_output'].append(out['result'].split('Response: \n')[-1])

In [None]:
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=output['model_output'], references=output['response'][0:len(output['model_output'])],
                        use_aggregator=True,
                        use_stemmer=True,)
print(results)