# Fine-Tuning the LLM

In [None]:
!pip install bitsandbytes accelerate xformers einops langchain faiss-cpu transformers sentence-transformers

In [None]:
from typing import List
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig
import torch
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.callbacks.tracers import ConsoleCallbackHandler
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device:", device)
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

# >>> Device: cuda
# >>> Tesla T4

In [None]:
# Hugging face login
token = '<hf-token>'

orig_model_path = "HuggingFaceH4/zephyr-7b-beta"
# model_path = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"
bnb_config = BitsAndBytesConfig(
                                load_in_4bit=True,
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_compute_dtype=torch.bfloat16,
                               )
model = AutoModelForCausalLM.from_pretrained(orig_model_path, trust_remote_code=True, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(orig_model_path)

In [None]:
from langchain import HuggingFacePipeline
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=100,
)
LLM = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
text = "tell me about Indian Population (Give the related information and if you dont know the asnwer then tell me you dont know about it)"
LLM.invoke(text)

# RAG

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    #model_name="sentence-transformers/all-MiniLM-l6-v2",
    model_name = "efederici/sentence-bert-base", # Sentence Bert Base
    model_kwargs={"device": "cuda"},
)

In [None]:
import pandas as pd
import pickle
import pandas as pd
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.vectorstores import VectorStoreRetriever

# Read data from Excel file
dataset = pd.read_excel('all_topics_vectordb.xlsx') # Resources Scrapped from the Internet
db_docs = dataset['Data'].astype(str).tolist()

embeddings = HuggingFaceEmbeddings()

# Create FAISS VectorStore from texts and embeddings
vector_db = FAISS.from_texts(db_docs, embeddings)

# Save VectorStore to binary format
vectorDB_filename = "vectorDB.bin"
with open(vectorDB_filename, "wb") as f:
    pickle.dump(vector_db, f)
print("----------------")
print("VectorDB Stored")
print("----------------\n")

# Load VectorStore from binary format
with open(vectorDB_filename, "rb") as f:
    loaded_vector_db = pickle.load(f)
print("\n----------------")
print("VectorDB Loaded")
print("----------------\n")

In [None]:
from langchain.vectorstores import FAISS
from langchain_core.vectorstores import VectorStoreRetriever

# Create embeddings object
embeddings = HuggingFaceEmbeddings()

vector_db = FAISS.from_texts(db_docs, embeddings)
retriever = VectorStoreRetriever(vectorstore=vector_db)

In [None]:
template = """You are a excellent AI assistant and you know the world's knowledge.Now i want you to retrieve the relavent splits from the VectorDB according to the given query then summarize that relavent splits and print a conclusion from the context that we got from VectorDB.
              {context}
              If you don't know the answer, just say that you don't know, don't try to make up an answer.
              Chat history: {history}
              Question: {question}
              Write your answers short. Helpful Answer:"""

prompt = PromptTemplate(
        template=template, input_variables=["history", "context", "question"]
    )
qa = RetrievalQA.from_chain_type(
        llm=LLM,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={
            "verbose": False,
            "prompt": prompt,
            "memory": ConversationBufferMemory(
                memory_key="history",
                input_key="question"),
        }
    )

# Getting Reasons form the VectorDB

In [None]:
from tqdm import tqdm
import pandas as pd

# Read data from Excel file
dataset = pd.read_excel('ML_Project_main_hate_fake.xlsx') # 25 records data from claim from main dataset

# Convert data to list of strings
db_docs = dataset['Pre_Processed_English_text'].astype(str).tolist()
print("No.of Records : ", len(db_docs))
# Iterate through each document, get RAG response, and store reasoning in new column
reasons = []
total_docs = len(dataset)
with tqdm(total=total_docs, desc="Processing documents") as pbar:
    for index, row in dataset.iterrows():
        query = row['Pre_Processed_English_text']
        rag_response = qa.run(query)
        reasons.append(rag_response)
        pbar.update(1)  # Update progress bar

# Add new column 'Reason' to the dataset
dataset['Reason'] = reasons

# Save the dataset with the name "RAG_With_Reasons"
dataset.to_excel("ML_Projects_With_RAG_Reasons.xlsx", index=False)
