# Generate Response for IRS : GPT-4

## Setup

In [None]:
# Setup directory
%cd E:/Github_Repo/Info-Retrieve-AI/

In [None]:
# Install required packages
%pip install -r raga_requirements.txt
%pip install sentence_transformers rouge nltk bert-score
%pip install openai==0.28

In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Import necessary modules
import os
from __init__ import cfg
import pandas as pd
import requests
from bs4 import BeautifulSoup  # If web scraping is needed directly in RAGA
from huggingface_hub import HfApi  # For interacting with Hugging Face Hub if necessary
import ragas
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_relevancy, context_recall, context_entity_recall, answer_similarity, answer_correctness
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec
from gpt4_script import QASystem, BlogIndexer
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from datasets import Dataset
from bert_score import score as bert_score
import openai


# Set environment variables for API access
os.environ['HF_HOME'] = '/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/huggingface'
os.environ['TRANSFORMERS_CACHE'] = os.environ['HF_HOME']
os.environ['HF_DATASETS_CACHE'] = os.environ['HF_HOME']
os.environ['HF_METRICS_CACHE'] = os.environ['HF_HOME']
os.environ['HF_TOKEN'] = cfg.HF_TOKEN
os.environ['OPENAI_API_KEY'] = cfg.OPENAI_API_KEY

nltk.download('punkt')
rouge = Rouge()

Mounted at /content/drive
/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA
Collecting ragas (from -r raga_requirements.txt (line 1))
  Downloading ragas-0.1.7-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.2/81.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting pinecone-client==4.0.0 (from -r raga_requirements.txt (line 7))
  Downloading pinecone_client-4.0.0-py3-none-any.whl (214 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m214.5/214.5 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bs4 (from -r raga_requirements.txt (line 8))
  Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Collecting datasets (from ragas->-r raga_requirements.txt (line 1))
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken (from ragas->-r 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


## Load Synthetic Baseline Responses

In [None]:
# Read the Dataset
data = pd.read_csv(r"E:\Github_Repo\Info-Retrieve-AI\RAGAs\code\raga_syn_qa_generator.ipynb")

# Fill NaN values in 'ground_truth' column with a specific placeholder
data['ground_truth'] = data['ground_truth'].fillna('No response available')
data

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What challenges do auto manufacturers face in ...,"['In both the UK and US, there is currently a ...",Severe price increases imposed via tariffs tha...,simple,[{'title': 'How Disruptive Pricing Will Impact...,True
1,What was the limitation of the quick-serve res...,[' quick-serve restaurant (QSR) we worked with...,The quick-serve restaurant's segmentation solu...,simple,"[{'title': 'Obtain a Deeper, Multidimensional ...",True
2,How does negative press coverage impact a bran...,[' spurred viral videos of fans destroying the...,No response available,simple,"[{'title': 'The Power, and Pitfalls, of Politi...",True
3,How does EVForward provide valuable insight in...,[' to much more effective results.\n\n\nEVForw...,EVForward provides valuable insight into the E...,simple,[{'title': 'Finding the Future of Mobility wit...,True
4,What challenges do landlords face when install...,"[' sharing (i.e., if multiple EVs are plugged ...",Landlords face a balancing act in installing i...,simple,[{'title': 'Multifamily Housing EV Charging: W...,True
5,How can effective sponsorship measurement help...,"[' marketers don’t see the value, or worse sti...",Effective sponsorship measurement provides det...,simple,[{'title': 'Measuring the Value of Sponsorship...,True
6,What is the current availability of charging s...,['EV Potential Not Yet Realized\n\n\nAs we loo...,The number of public charging stations has inc...,simple,"[{'title': 'The Challenges of Making EV Easy',...",True
7,How is the global rideshare industry projected...,['It’s safe to say that ridesharing services l...,The global rideshare industry is projected to ...,simple,[{'title': 'Rideshare Vehicles: A Meaningful B...,True
8,What is the importance of delivering multiface...,['there’s an obstacle in my way\n—\nperhaps I ...,The critical support offered to EV buyers need...,simple,[{'title': 'How to Create an “EV Easy” Mindset...,True
9,What are some of the specific areas that manuf...,"['For fans of the Terminator series of movies,...","Manufacturers, suppliers, and technology compa...",simple,[{'title': 'Rise of the Smart Car: The Shape-S...,True


## LLM Performance Evaluator for GPT-4

In [None]:
class SynResponseGenerator:
    def __init__(self, model, indexer, data):
        self.model = model
        self.indexer = indexer
        self.data = data

    def generate_responses(self):
        # Generate responses for each question using the model
        self.data['Response'] = self.data['question'].apply(lambda x: self.model.answer_query(x))
        return self.data

    # def compute_metrics(self):
    #     # Ensure ground truth data is present for all records to avoid processing errors
    #     filtered_data = self.data.dropna(subset=['ground_truth'])

    #     # Preprocessing text for metric calculation
    #     filtered_data['Reference_Tokens'] = filtered_data['ground_truth'].apply(lambda x: nltk.word_tokenize(x.lower()))
    #     filtered_data['Response_Tokens'] = filtered_data['Response'].apply(lambda x: nltk.word_tokenize(x.lower()))

    #     # Calculate BLEU and ROUGE scores only for entries with valid ground truth
    #     filtered_data['BLEU'] = filtered_data.apply(lambda row: sentence_bleu([row['Reference_Tokens']], row['Response_Tokens'], weights=(0.25, 0.25, 0.25, 0.25)), axis=1)
    #     filtered_data['ROGUE'] = filtered_data.apply(lambda row: rouge.get_scores(row['Response'], row['ground_truth'])[0]['rouge-l']['f'], axis=1)


    #     # Calculate BERT scores
    #     # Convert to list for BERT score calculation
    #     references = filtered_data['ground_truth'].tolist()
    #     predictions = filtered_data['Response'].tolist()

    #     # Calculate BERT scores
    #     P, R, F1 = bert_score(predictions, references, lang="en", verbose=True)

    #     # Add BERT scores to DataFrame
    #     filtered_data['BERT_P'] = P.numpy()
    #     filtered_data['BERT_R'] = R.numpy()
    #     filtered_data['BERT_F1'] = F1.numpy()

    #     # filtered_data['BERT'] = bert_score(filtered_data['Response'], filtered_data['ground_truth'], lang="en")[2]

    #     # Prepare data samples for RAGA metrics, ensuring all necessary fields are present
    #     data_samples = {
    #         'question': filtered_data['question'].tolist(),
    #         'answer': filtered_data['Response'].tolist(),
    #         'contexts': [nltk.sent_tokenize(x) for x in filtered_data['ground_truth']],
    #         'ground_truth': filtered_data['ground_truth'].tolist()  # Ensuring ground_truth is included for RAGA metrics
    #     }
    #     dataset = Dataset.from_dict(data_samples)

    #     # Compute RAGA metrics
    #     raga_results = evaluate(dataset, metrics=[
    #         faithfulness, answer_relevancy, context_precision, context_relevancy,
    #         context_recall, context_entity_recall, answer_similarity, answer_correctness
    #     ])
    #     raga_df = raga_results.to_pandas()

    #     # Combine all metrics into a single DataFrame
    #     self.metrics_df = pd.concat([filtered_data, raga_df], axis=1)
    #     return self.metrics_df


## Generate Responses

In [None]:
# Example usage
indexer = BlogIndexer(
    url='https://escalent.co/thought-leadership/blog/?industry=automotive-and-mobility',
    headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
)
model = QASystem('gpt-4', indexer, openai_key='cfg.OPENAI_API_KEY')

evaluator = SynResponseGenerator(model, indexer, data)
evaluator.generate_responses()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating query embedding...
Retrieving context...
Generating response based on the context...
Duplicate query detected; not logging.
Response logged successfully.
Generating

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done,Response
0,What challenges do auto manufacturers face in ...,"['In both the UK and US, there is currently a ...",Severe price increases imposed via tariffs tha...,simple,[{'title': 'How Disruptive Pricing Will Impact...,True,Potential massive price hikes due to trade tar...
1,What was the limitation of the quick-serve res...,[' quick-serve restaurant (QSR) we worked with...,The quick-serve restaurant's segmentation solu...,simple,"[{'title': 'Obtain a Deeper, Multidimensional ...",True,The quick-serve restaurant's segmentation solu...
2,How does negative press coverage impact a bran...,[' spurred viral videos of fans destroying the...,No response available,simple,"[{'title': 'The Power, and Pitfalls, of Politi...",True,Negative press coverage can impact a brand's h...
3,How does EVForward provide valuable insight in...,[' to much more effective results.\n\n\nEVForw...,EVForward provides valuable insight into the E...,simple,[{'title': 'Finding the Future of Mobility wit...,True,EVForward provides valuable insight into the E...
4,What challenges do landlords face when install...,"[' sharing (i.e., if multiple EVs are plugged ...",Landlords face a balancing act in installing i...,simple,[{'title': 'Multifamily Housing EV Charging: W...,True,Landlords of multifamily housing face several ...
5,How can effective sponsorship measurement help...,"[' marketers don’t see the value, or worse sti...",Effective sponsorship measurement provides det...,simple,[{'title': 'Measuring the Value of Sponsorship...,True,Effective sponsorship measurement can help in ...
6,What is the current availability of charging s...,['EV Potential Not Yet Realized\n\n\nAs we loo...,The number of public charging stations has inc...,simple,"[{'title': 'The Challenges of Making EV Easy',...",True,The text does not provide a specific number or...
7,How is the global rideshare industry projected...,['It’s safe to say that ridesharing services l...,The global rideshare industry is projected to ...,simple,[{'title': 'Rideshare Vehicles: A Meaningful B...,True,The global rideshare industry is projected to ...
8,What is the importance of delivering multiface...,['there’s an obstacle in my way\n—\nperhaps I ...,The critical support offered to EV buyers need...,simple,[{'title': 'How to Create an “EV Easy” Mindset...,True,Delivering multifaceted EV support to EV buyer...
9,What are some of the specific areas that manuf...,"['For fans of the Terminator series of movies,...","Manufacturers, suppliers, and technology compa...",simple,[{'title': 'Rise of the Smart Car: The Shape-S...,True,"Manufacturers, suppliers, and technology compa..."


In [None]:
# Save the updated data to a CSV file
data.to_csv(r"E:\Github_Repo\Info-Retrieve-AI\RAGAs\code\gpt4_responses.ipynb", index=False)
print("Responses have been saved successfully.")

Responses have been saved successfully.
