<a href="https://colab.research.google.com/github/MAN1108/github-test/blob/main/MISTRAL_%26B_WITH_RAG_AND%20_METRICS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# AI MVP Project from datatec.studio
!pip install transformers torch accelerate bitsandbytes langchain
!pip install -U sentence-transformers chromadb
!pip install pypdf

from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.document_loaders import TextLoader
import os
import time
import torch
import transformers

# Create model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

# Generate HuggingFacePipeline use pretrained model Mistral-7B-Instruct-v0.1
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    repetition_penalty=1.2,
    return_full_text=True,
    max_new_tokens=1000)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# load the document and split it into chunks
from langchain_community.document_loaders import PyPDFLoader
def load_pdf_files(directory_path):
          documents = []
          for root, _, files in os.walk(directory_path):
               for file in files:
                if file.endswith('.pdf'):
                  file_path = os.path.join(root, file)
                  loader = PyPDFLoader(file_path)
                  documents.extend(loader.load())
          return documents
directory_path = "/content/sample_data/harry"  # Replace with your directory\n",
documents = load_pdf_files(directory_path)
#loader = TextLoader("./demo.txt", encoding='utf-8')
#docs = loader.load()

# Split test data into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, chunk_overlap=10, separators=['\n\n', '\n', '.']
)
document_chunks = text_splitter.split_documents(documents)

# Create Embedding and chroma db for test data
embedding_model = SentenceTransformerEmbeddings(model_name='BAAI/bge-large-en-v1.5')
chroma_db = Chroma.from_documents(document_chunks, embedding_model)


################### Use Prompt only once ###################

# Create question answer chain
retriever1 = chroma_db.as_retriever()
qa_chain1 = RetrievalQA.from_chain_type(mistral_llm, retriever=retriever1)

while True:
    # Ask questions to chatbot
    # Do you know language DtsDummyLanguage?
    # How to use it for web development?
    question = input("Please enter your question (or 'quit' to stop): ")

    if question.lower() == 'quit':
        break

    start_time = time.time()

    response1 = qa_chain1({"query": question})

    end_time = time.time()
    total_time = int(end_time - start_time)

    print(response1['result'])
    print(f"Total calculation time: {total_time} seconds")


###################Use Prompt twice ###################

while True:
    # Ask questions to chatbot
    # Do you know language DtsDummyLanguage?
    # How to use it for web development?
    question = input("Please enter your question (or 'quit' to stop): ")

    if question.lower() == 'quit':
        break

    start_time = time.time()

    # Get similar content and generate related chroma database
    similar_search_result = chroma_db.similarity_search(question)
    chroma_db_for_prompt = Chroma.from_documents(similar_search_result, embedding_model)

    # Create question answer chain
    retriever2 = chroma_db_for_prompt.as_retriever()
    qa_chain2 = RetrievalQA.from_chain_type(mistral_llm, retriever=retriever2)

    response2 = qa_chain2({"query": question})

    end_time = time.time()
    total_time = int(end_time - start_time)

    print(response2['result'])
    print(f"Total calculation time: {total_time} seconds")

Collecting accelerate
  Downloading accelerate-0.30.0-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.19-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m70.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cach

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Please enter your question (or 'quit' to stop): quit
Please enter your question (or 'quit' to stop): quit


In [14]:
while True:
    # Ask questions to chatbot
    # Do you know language DtsDummyLanguage?
    # How to use it for web development?
    question = input("Please enter your question (or 'quit' to stop): ")

    if question.lower() == 'quit':
        break

    start_time = time.time()

    response1 = qa_chain1({"query": question})

    end_time = time.time()
    total_time = int(end_time - start_time)

    print(response1['result'])
    print(f"Total calculation time: {total_time} seconds")


###################Use Prompt twice ###################

while True:
    # Ask questions to chatbot
    # Do you know language DtsDummyLanguage?
    # How to use it for web development?
    question = input("Please enter your question (or 'quit' to stop): ")

    if question.lower() == 'quit':
        break

    start_time = time.time()

    # Get similar content and generate related chroma database
    similar_search_result = chroma_db.similarity_search(question)
    chroma_db_for_prompt = Chroma.from_documents(similar_search_result, embedding_model)

    # Create question answer chain
    retriever2 = chroma_db_for_prompt.as_retriever()
    qa_chain2 = RetrievalQA.from_chain_type(mistral_llm, retriever=retriever2)

    response2 = qa_chain2({"query": question})

    end_time = time.time()
    total_time = int(end_time - start_time)

    print(response2['result'])
    print(f"Total calculation time: {total_time} seconds")

Please enter your question (or 'quit' to stop): quit
Please enter your question (or 'quit' to stop): quit


In [15]:
import pandas as pd
test_df = pd.read_csv("/content/sample_data/Mistral 78 -generated-RAG_dataset (1)-without  contexts.csv")

In [16]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

ground_truth = test_df['ground_truths'].tolist()
predictions = test_df['answer'].tolist()


def calculate_metrics(ground_truth, predictions):
  """
  Calculates and prints various evaluation metrics for question answering.

  Args:
      ground_truth: A list of strings containing the correct answers.
      predictions:  A list of strings containing the model's predicted answers.

  Returns:
      A dictionary containing the calculated metrics (accuracy, precision, recall, F1-score).
  """
  metrics = {}

  # Ensure equal list lengths (handle cases with missing predictions)
  ground_truth = ground_truth[:len(predictions)]
  predictions = predictions[:len(ground_truth)]
  predictions = [pred if pred is not None else "I'm not sure" for pred in predictions]
  # Calculate metrics
  metrics["accuracy"] = accuracy_score(ground_truth, predictions)
  metrics["precision"] = precision_score(ground_truth, predictions, average='weighted')  # Weighted precision for imbalanced datasets
  metrics["recall"] = recall_score(ground_truth, predictions, average='weighted')  # Weighted recall for imbalanced datasets
  metrics["f1"] = f1_score(ground_truth, predictions, average='weighted')  # Weighted F1-score for imbalanced datasets

  # Print Metric Results
  print("Evaluation Metrics:")
  for metric_name, value in metrics.items():
    print(f"{metric_name}: {value:.4f}")

  return metrics

# ... (Your code for generating ground_truth and predictions) ...

metrics = calculate_metrics(ground_truth, predictions)

Evaluation Metrics:
accuracy: 0.4000
precision: 0.4000
recall: 0.4000
f1: 0.4000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
import pandas as pd

dataset1 = pd.read_csv("/content/sample_data/Mistral 78 -generated-RAG_dataset (1)-with contexts.csv")

display(dataset1.head())



Unnamed: 0,question,contexts,answer,ground_truth
0,What is the name of the magical plant that str...,[Stop moving! Hermione ordered them. I know...,devil's snare,devil's snare
1,What is the name of the spell used to create a...,"[CHAPTER THIRTY-SIX  814  guard, felt his ha...",Protego,Protego
2,What is the name of the wand shop in Diagon Al...,"[""Welcome,"" said Hagrid, ""to Diagon Alley."" He...",Ollivanders,Ollivanders
3,Which Quidditch players fend off the Bludgers?,"[""You were the youngest House player in a hund...",The beaters,The beaters
4,What is the name of Filch's cat?,[very first morning. Filch found them trying t...,Mrs.Norris,Mrs.Norris


In [14]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

result = evaluate(
    dataset=dataset1,
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

df = result.to_pandas()

AttributeError: 'DataFrame' object has no attribute 'rename_columns'

In [15]:
from datasets import Dataset
def assemble_ragas_dataset(input_df):
    question_list, truth_list, context_list = [], [], []
    question_list = input_df.question.astype(str).to_list()
    truth_list = dataset1.ground_truth.astype(str).to_list()
    context_list = input_df.contexts.astype(str).to_list()
    context_list = [[context] for context in context_list]
    rag_answer_list = input_df.astype(str).answer.to_list()
    ragas_ds = Dataset.from_dict({"question": question_list,
                            "contexts": context_list,
                            "answer": rag_answer_list,
                            "ground_truth": truth_list
                            })
    return ragas_ds
ragas_input_ds = assemble_ragas_dataset(dataset1)
display(ragas_input_ds)

Dataset({
    features: ['question', 'contexts', 'answer', 'ground_truth'],
    num_rows: 20
})

In [18]:
import os, openai, pprint
from openai import OpenAI

# Save the api key in an env variable.
os.environ['OPENAI_API_KEY'] = 'sk-proj-JTdOU5O48hilCpFxhsZ2T3BlbkFJuvYoXUqZ1Orh7cUHhuAO'

# Choose the metrics you want to see.
from ragas.metrics import ( context_recall, context_precision, faithfulness, answer_relevancy)
metrics = ['context_recall', 'context_precision', 'faithfulness', 'answer_relevancy']

# Change the llm-as-critic.
from ragas.llms import llm_factory
LLM_NAME = "gpt-3.5-turbo"
ragas_llm = llm_factory(model=LLM_NAME)

# Also change the embeddings.
from langchain_openai.embeddings import OpenAIEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper
lc_embeddings = OpenAIEmbeddings( model="text-embedding-3-small", dimensions=512 )
ragas_emb = LangchainEmbeddingsWrapper(embeddings=lc_embeddings)

# Change the default models used for each metric.
for metric in metrics:
    globals()[metric].llm = ragas_llm
    globals()[metric].embeddings = ragas_emb

# Evaluate the dataset.
from ragas import evaluate
ragas_result = evaluate( ragas_input_ds,
    metrics=[ context_precision, context_recall, faithfulness, answer_relevancy],
    llm=ragas_llm,
)

Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

In [19]:
ragas_output_df = ragas_result.to_pandas()
ragas_output_df

Unnamed: 0,question,contexts,answer,ground_truth,context_precision,context_recall,faithfulness,answer_relevancy
0,What is the name of the magical plant that str...,[[Stop moving! Hermione ordered them. I kno...,devil's snare,devil's snare,1.0,1.0,1.0,0.459901
1,What is the name of the spell used to create a...,"[[CHAPTER THIRTY-SIX  814  guard, felt his h...",Protego,Protego,1.0,1.0,1.0,0.423936
2,What is the name of the wand shop in Diagon Al...,"[[""Welcome,"" said Hagrid, ""to Diagon Alley."" H...",Ollivanders,Ollivanders,1.0,1.0,1.0,0.918473
3,Which Quidditch players fend off the Bludgers?,"[[""You were the youngest House player in a hun...",The beaters,The beaters,1.0,1.0,1.0,0.755236
4,What is the name of Filch's cat?,[[very first morning. Filch found them trying ...,Mrs.Norris,Mrs.Norris,1.0,1.0,1.0,1.0
5,What's Nearly Headless Nick's real name?,[[upstairs. To escape from Filch's office with...,Sir Patrick,Nicholas de Mimsy-Porpington,1.0,1.0,1.0,0.29858
6,What is the name of the spell used to immobili...,[[BEYOND THE VEIL  803  He made the same sla...,Petrificus Totalus,Immobilus,1.0,0.0,1.0,0.481905
7,Which character is the head of the Malfoy family?,"[[Harry, Ron, and Hermione turned quickly. Edg...",Lucius Malfoy,Lucius Malfoy,1.0,1.0,1.0,0.534869
8,What is the name of the ghost who haunts the R...,[[The Battle of Hogwarts diadem that had elude...,Gray Lady,Gray Lady,1.0,1.0,1.0,0.91087
9,What was the name of the Potters' house-elf?,[[The Bluebottle: A Broom for All the Family ...,Dobby,Dobby,1.0,1.0,0.0,0.401341
