# RAGA

## Setup & Installations

In [None]:
# Setup directory
%cd E:/Github_Repo/Info-Retrieve-AI/

In [None]:
# Install required packages
%pip install -r raga_requirements.txt

/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA


In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Use Hugging Face API
# from huggingface_hub import HfApi  # For interacting with Hugging Face Hub (unsure if it is required or not)

In [None]:
import os
from __init__ import cfg
import pandas as pd
import requests
from bs4 import BeautifulSoup
import ragas
from ragas import evaluation
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec
import time
from ragas.testset.docstore import Document

In [None]:
print(dir(ragas))

['RunConfig', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_analytics', '_version', 'adapt', 'adaptation', 'callbacks', 'embeddings', 'evaluate', 'evaluation', 'exceptions', 'executor', 'llms', 'metrics', 'run_config', 'testset', 'utils', 'validation']


In [None]:
# Set the Hugging Face API token for authentication
os.environ['HF_HOME'] = '/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/huggingface'
os.environ['TRANSFORMERS_CACHE'] = '/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/huggingface'
os.environ['HF_DATASETS_CACHE'] = '/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/huggingface'
os.environ['HF_METRICS_CACHE'] = '/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/huggingface'
os.environ['HF_TOKEN'] = cfg.HF_TOKEN

In [None]:
# Setup OpenAI API key from your config
os.environ['OPENAI_API_KEY'] = cfg.OPENAI_API_KEY

## Blog Scrapper

In [None]:
class BlogScraper:
    def __init__(self, url, headers):
        self.url = url
        self.headers = headers

    def scrape(self):
        response = requests.get(self.url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            box = soup.find('div', class_='gridbox gridbox-170-970')
            items = box.find_all('div', class_='card-title headingC sans')

            data = []
            for index, item in enumerate(items, start=1):
                title = item.text.strip()
                link = item.find('a')['href']
                link_response = requests.get(link, headers=self.headers)
                if link_response.status_code == 200:
                    link_soup = BeautifulSoup(link_response.content, 'html.parser')
                    content = link_soup.find('div', class_='wysiwyg').get_text(separator='\n').strip()
                    data.append({"Index": index, "Heading": title, "Hyperlink": link, "Content": content})
                else:
                    print(f"Failed to fetch content for hyperlink: {link}")

            df = pd.DataFrame(data)
            return df
        else:
            print("Failed to fetch the webpage.")
            return None

In [None]:
url = 'https://escalent.co/thought-leadership/blog/?industry=automotive-and-mobility'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

scraper = BlogScraper(url, headers)
df = scraper.scrape()
if df is not None:
    df.to_csv("/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/blogdata.csv", index=False)
    print("DataFrame saved as CSV successfully.")

DataFrame saved as CSV successfully.


In [None]:
df.head()

Unnamed: 0,Index,Heading,Hyperlink,Content
0,1,“Breaking the Mold” by Widening the Innovation...,https://escalent.co/blog/breaking-the-mold-by-...,While organizations embrace innovation as crit...
1,2,How Battery Electric Vehicles (BEVs) Will Resh...,https://escalent.co/blog/how-battery-electric-...,The European automotive landscape is undergoin...
2,3,"Among Fleets, Recommendations Serve as Primary...",https://escalent.co/blog/among-fleets-recommen...,The telematics market is poised for a surge of...
3,4,Charging Up: Why Electric Vehicle (EV) Chargin...,https://escalent.co/blog/charging-up-why-elect...,This blog is written from the point of view of...
4,5,How Electric Vehicles (EVs) Will Transform the...,https://escalent.co/blog/how-electric-vehicles...,The e-mobility transition is scrambling consum...


## Synthetic Test Generator

In [None]:
# Load Blog data
documents = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/Capstone_Project/Web_URL/IRS_Models/RAGA/blogdata.csv')

In [None]:
documents.head()

Unnamed: 0,Index,Heading,Hyperlink,Content
0,1,“Breaking the Mold” by Widening the Innovation...,https://escalent.co/blog/breaking-the-mold-by-...,While organizations embrace innovation as crit...
1,2,How Battery Electric Vehicles (BEVs) Will Resh...,https://escalent.co/blog/how-battery-electric-...,The European automotive landscape is undergoin...
2,3,"Among Fleets, Recommendations Serve as Primary...",https://escalent.co/blog/among-fleets-recommen...,The telematics market is poised for a surge of...
3,4,Charging Up: Why Electric Vehicle (EV) Chargin...,https://escalent.co/blog/charging-up-why-elect...,This blog is written from the point of view of...
4,5,How Electric Vehicles (EVs) Will Transform the...,https://escalent.co/blog/how-electric-vehicles...,The e-mobility transition is scrambling consum...


In [None]:
for index, row in documents.iterrows():
    print("Index:", index)
    print("Heading:", row['Heading'])
    print("Hyperlink:", row['Hyperlink'])
    print("Content:", row['Content'])
    # print("\n")

Index: 0
Heading: “Breaking the Mold” by Widening the Innovation Lens
Hyperlink: https://escalent.co/blog/breaking-the-mold-by-widening-the-innovation-lens/
Content: While organizations embrace innovation as critical to their long-term success, they are also often stymied by how to develop such innovations. The pressure and risks associated with developing groundbreaking products and services can be enormous and overwhelming. But innovations don’t have to be created from scratch or in a vacuum. Current products and services may meet basic customer needs, but sometimes they don’t fully address higher-level customer objectives (or “jobs to be done”). If firms can identify these broader objectives and “break the mold” with new and reimagined products and services, these innovations can sometimes be so successful that they transform their respective industries.


The proof?


Several years ago, I was in the field interviewing respondents on a wide range of topics. At one point during the i

In [None]:
# Convert DataFrame rows to Document objects expected by Ragas
document_objects = [
    Document(page_content=row['Content'], metadata={'title': row['Heading'], 'url': row['Hyperlink']})
    for index, row in documents.iterrows()
]

# Initialize the Ragas Testset Generator with models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# Generate synthetic test data using document_objects instead of documents
distributions = {
    simple: 0.5,
    reasoning: 0.25,
    multi_context: 0.25
}
try:
    testset = generator.generate_with_langchain_docs(document_objects, test_size=50, distributions=distributions, raise_exceptions=False)

    # Export the results to a DataFrame
    test_df = testset.to_pandas()
    test_df.to_csv(r"E:\Github_Repo\Info-Retrieve-AI\RAGAs\output\syn_gpt_qa_50.csv", index=False)
    print("Synthetic Q&A pairs generated and saved successfully.")
except Exception as e:
    print("An error occurred during the generation or export of synthetic Q&A pairs:", e)

Projective exercises—the presentation of calibrated stimuli onto which a respondent projects their feelings, attitudes or beliefs—are a critical asset in the qualitative researcher’s emotional toolbox. The technique offers the promise of achieving greater depth and validity of insight by facilitating expression of subconscious or difficult-to-articulate feelings that are less accessible using direct “Q&A.”


But what makes for a good projective exercise?


We put learning into action at a recent market research conference by testing a series of probes that revealed some lessons about projectives.


First, we asked visitors to our booth to help us learn about, “What makes great qualitative research.” We then invited them to post onto a chalkboard their reactions to a probe related to the goal of understanding how to deliver great 
qualitative research
. Our lesson on projectives focuses on contrasting two of several probes that we asked:




What is your qualitative superpower?


What D

embedding nodes:   0%|          | 0/474 [00:00<?, ?it/s]



Generating:   0%|          | 0/50 [00:00<?, ?it/s]

Synthetic Q&A pairs generated and saved successfully.


In [None]:
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What challenges do auto manufacturers face in ...,"[In both the UK and US, there is currently a r...",Severe price increases imposed via tariffs tha...,simple,[{'title': 'How Disruptive Pricing Will Impact...,True
1,What was the limitation of the quick-serve res...,[ quick-serve restaurant (QSR) we worked with ...,The quick-serve restaurant's segmentation solu...,simple,"[{'title': 'Obtain a Deeper, Multidimensional ...",True
2,How does negative press coverage impact a bran...,[ spurred viral videos of fans destroying the ...,,simple,"[{'title': 'The Power, and Pitfalls, of Politi...",True
3,How does EVForward provide valuable insight in...,[ to much more effective results.\n\n\nEVForwa...,EVForward provides valuable insight into the E...,simple,[{'title': 'Finding the Future of Mobility wit...,True
4,What challenges do landlords face when install...,"[ sharing (i.e., if multiple EVs are plugged i...",Landlords face a balancing act in installing i...,simple,[{'title': 'Multifamily Housing EV Charging: W...,True
5,How can effective sponsorship measurement help...,"[ marketers don’t see the value, or worse stil...",Effective sponsorship measurement provides det...,simple,[{'title': 'Measuring the Value of Sponsorship...,True
6,What is the current availability of charging s...,[EV Potential Not Yet Realized\n\n\nAs we look...,The number of public charging stations has inc...,simple,"[{'title': 'The Challenges of Making EV Easy',...",True
7,How is the global rideshare industry projected...,[It’s safe to say that ridesharing services li...,The global rideshare industry is projected to ...,simple,[{'title': 'Rideshare Vehicles: A Meaningful B...,True
8,What is the importance of delivering multiface...,[there’s an obstacle in my way\n—\nperhaps I s...,The critical support offered to EV buyers need...,simple,[{'title': 'How to Create an “EV Easy” Mindset...,True
9,What are some of the specific areas that manuf...,"[For fans of the Terminator series of movies, ...","Manufacturers, suppliers, and technology compa...",simple,[{'title': 'Rise of the Smart Car: The Shape-S...,True
