# IRS - Gemini

## Setup and Installations

In [1]:
# Setup directory
%cd E:/Github_Repo/Info-Retrieve-AI/

E:\Github_Repo\Info-Retrieve-AI


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
# Install required packages
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Import necessary libraries
from __init__ import cfg
import pandas as pd
import numpy as np
import requests
import google.generativeai as genai
import pinecone
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from pinecone import Pinecone, ServerlessSpec


In [4]:
# Configure the Gemini API using the key from config.py
genai.configure(api_key=cfg.GOOGLE_API_KEY)

## Web Scrapper

In [6]:
class BlogScraper:
    def __init__(self, url, headers):
        self.url = url
        self.headers = headers

    def scrape(self):
        response = requests.get(self.url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            box = soup.find("div", class_="gridbox gridbox-170-970")
            items = box.find_all("div", class_="card-title headingC sans")

            data = []
            for index, item in enumerate(items, start=1):
                title = item.text.strip()
                link = item.find("a")["href"]
                link_response = requests.get(link, headers=self.headers)
                if link_response.status_code == 200:
                    link_soup = BeautifulSoup(link_response.content, "html.parser")
                    content = (
                        link_soup.find("div", class_="wysiwyg")
                        .get_text(separator="\n")
                        .strip()
                    )
                    data.append(
                        {
                            "Index": index,
                            "Heading": title,
                            "Hyperlink": link,
                            "Content": content,
                        }
                    )
                else:
                    print(f"Failed to fetch content for hyperlink: {link}")

            return data
        else:
            print("Failed to fetch the webpage.")
            return None

## BlogIndexer

In [7]:
# BlogIndexer
class BlogIndexer:
    def __init__(self, url, headers):
        self.scraper = BlogScraper(url, headers)
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.index_name = "blog-index"
        self.index = pinecone.Index(name=self.index_name, api_key=cfg.PINECONE_API_KEY, host='https://blog-index-ntt4sfk.svc.aped-4627-b74a.pinecone.io')
        self.index.describe_index_stats()

    def index_content(self):
        data = self.scraper.scrape()
        if data:
            upsert_data = []
            for item in data:
                combined_text = f"{item['Heading']}. {item['Content']}"
                embedding = self.model.encode(combined_text, convert_to_tensor=False)
                embedding_list = embedding.tolist()
                # Include content in metadata for retrieval in the QA system
                upsert_data.append((str(item['Index']), embedding_list, {'content': item['Content']}))
            self.index.upsert(vectors=upsert_data)
            print("Content indexed successfully.")

    def view_scraped_data(self):
        data = self.scraper.scrape()
        for item in data:
            print(item)

    def test_embeddings(self):
        data = self.scraper.scrape()
        for item in data:
            embedding = self.model.encode(f"{item['Heading']}. {item['Content']}", convert_to_tensor=False)
            print(f"Index: {item['Index']}, Heading: {item['Heading']}, Embedding: {embedding[:5]}...")

In [8]:
# Test Case:
indexer = BlogIndexer(url='https://escalent.co/thought-leadership/blog/?industry=automotive-and-mobility', headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"})
indexer.view_scraped_data()

{'Index': 1, 'Heading': 'Class 2b-3 Commercial Vehicles: How the Industry Can Find the Right Balance Across Supply, Demand, and Emissions Requirements', 'Hyperlink': 'https://escalent.co/blog/class-2b-3-commercial-vehicles-how-the-industry-can-find-the-right-balance-across-supply-demand-and-emissions-requirements/', 'Content': 'In \nPart 1\n of this mini blog series,\xa0we dove into the current state of the class 2b-3 commercial vehicles market, including how customers are using these vehicles, the preferred body types and body manufacturers, most frequently considered chassis types, and which original equipment manufacturers (OEMs) are leading the market.\n\n\nIn this blog, we’ll cover where the market is headed, explore important considerations around the recently published Environmental Protection Agency (EPA) regulations, and uncover what customers will be looking for when buying class 2b-3 vehicles in the future.\n\n\nThrough this two-part mini blog series, you’ll walk away with a

In [9]:
# Test Case
indexer.index_content()
indexer.test_embeddings()

Content indexed successfully.
Index: 1, Heading: Class 2b-3 Commercial Vehicles: How the Industry Can Find the Right Balance Across Supply, Demand, and Emissions Requirements, Embedding: [-0.02165475 -0.02275845  0.04729758  0.02454941  0.09072912]...
Index: 2, Heading: Three Brand Authenticity Lessons to Foster Trust & Loyalty, Embedding: [-0.02224085 -0.06355947 -0.01894405 -0.05489418  0.02942   ]...
Index: 3, Heading: Class 2b-3 Commercial Vehicles: Where We Are and Where We’re Going, Embedding: [-0.04205374 -0.01413041  0.07097294 -0.01596165  0.04394965]...
Index: 4, Heading: New-Car Buyers and Selective Sustainability in Europe, Embedding: [ 0.00636731  0.07291902  0.06189306 -0.02130575  0.07443894]...
Index: 5, Heading: How Technology Companies, App Providers and Advertisers Stand to Gain From a Growing EV Market, Embedding: [0.04037909 0.03987464 0.00520844 0.01189051 0.09666727]...
Index: 6, Heading: “Breaking the Mold” by Widening the Innovation Lens, Embedding: [-0.0278266

## QA System : Gemini AI

In [10]:
class QASystem:
    def __init__(self, model_name, indexer_instance):
        self.model = genai.GenerativeModel(model_name)
        self.indexer = indexer_instance
        self.logs = pd.DataFrame(columns=['Query', 'Response'])  # Initialize an empty DataFrame

    def query_to_embedding(self, query):
        embedding = self.indexer.model.encode(query, convert_to_tensor=False)
        return embedding.tolist()

    def retrieve_context(self, query_embedding, top_k=3):
        query_results = self.indexer.index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
        documents = []
        if query_results.get('matches'):
            for match in query_results['matches']:
                documents.append(match['metadata']['content'])
        return documents

    def answer_query(self, query):
        print("Generating query embedding...")
        query_embedding = self.query_to_embedding(query)
        print("Retrieving context...")
        contexts = self.retrieve_context(query_embedding)

        if not contexts:
            response_text = "I don't know. Thanks for asking!"
        else:
            augmented_query = " ".join(contexts) + "\n\n" + query
            prompt = f"Here is the information I found on the topic:\n{augmented_query}\n\nCan you provide a detailed answer based on the information above?"
            print("Generating response based on the context...")
            response = self.model.generate_content(prompt)
            try:
                response_text = response.candidates[0].content.parts[0].text
            except AttributeError:
                response_text = "Failed to parse the response correctly."

            print("Response generated.")

        # # Log the query and the response in the DataFrame
        # new_log_entry = {'Query': query, 'Response': response_text}
        # self.logs = pd.concat([self.logs, pd.DataFrame([new_log_entry])], ignore_index=True)
        return response_text

    # def save_logs_to_csv(self, filename="gemini_query_logs.csv"):
    #     self.logs.to_csv(filename, index=False)
    #     print(f"Logs saved to {filename}.")

    # def print_log(self):
    #     if self.logs.empty:
    #         print("No entries in the log.")
    #     else:
    #         print(self.logs)

## Test

In [11]:
# Test Case:
indexer = BlogIndexer(
    url='https://escalent.co/thought-leadership/blog/?industry=automotive-and-mobility',
    headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
)

qa_system = QASystem('gemini-pro', indexer)

### Queries

In [12]:
print(qa_system.answer_query("How Battery Electric Vehicles (BEVs) Will Reshape Retail in Europe?"))

Generating query embedding...
Retrieving context...
Generating response based on the context...
Response generated.
**How Battery Electric Vehicles (BEVs) Will Reshape Retail in Europe**

**1. Decline in Fuel Sales and Impact on Retailers**

As BEVs gain popularity, fuel sales, a significant revenue stream for modern retailers like Tesco, Carrefour, and E.Leclerc, will decline. This poses a challenge for retailers to compensate for the lost revenue.

**2. Opportunity for Retailers to Offer BEV Charging Stations**

Retailers can create new revenue streams by strategically investing in BEV charging stations. This investment benefits retailers by:

* **Attracting BEV Drivers:** BEV drivers need to spend time charging their vehicles, presenting an opportunity for retailers to offer a broader range of services.
* **Offering Solutions to Driver Pain Points:** Retail chains like Ikea and Tesco have recognized the value of incorporating BEV charging into their retail locations, addressing the 

In [13]:
print(qa_system.answer_query("How will the telematics market's growth be in the coming years?"))

Generating query embedding...
Retrieving context...
Generating response based on the context...
Response generated.
The telematics market is poised for a surge of growth in the coming years. The evolving regulatory landscape, a growing appetite for connected technologies, and the ever-present pressure to remain competitive are all prompting fleet businesses to investigate the benefits of telematics solutions.

According to the report, the global commercial vehicle telematics market is projected to grow from $37.7 billion in 2022 to $83.6 billion by 2029, at a CAGR of 11.4%. The factors driving this growth include:

* Increasing adoption of telematics solutions by fleet operators to improve operational efficiency and reduce costs
* Government regulations mandating the use of telematics devices in commercial vehicles
* Growing demand for connected vehicles and the integration of telematics solutions with other vehicle technologies
* Increasing awareness of the benefits of telematics solu

In [15]:
print(qa_system.answer_query("Provide some statistical insights on the Impact of Word of Mouth on Fleet Decision-Makers"))

Generating query embedding...
Retrieving context...
Generating response based on the context...
Response generated.
**Impact of Word of Mouth on Fleet Decision-Makers**

* **44%** of fleet decision-makers cite a recommendation as the trigger for their telematics purchase journey.

* **28%** of these recommendations came from industry peers.

* **25%** came from colleagues.

* **20%** came from telematics service providers (TSPs).

**Significance of Word of Mouth for TSPs**

* Positive word of mouth presents an opportunity for TSPs to enhance their penetration in the market.

* TSPs should ensure they consistently meet or exceed customer expectations to remain top of mind for potential referrals.

**Importance of Understanding the Impact of Word of Mouth for Service Providers**

* Service providers need to understand the role of word of mouth in the telematics purchase process.

* They should tailor their marketing strategies and consultative services to align with the influence of word

In [14]:
print(qa_system.answer_query("Who is Narendra Modi?"))

Generating query embedding...
Retrieving context...
Generating response based on the context...
Response generated.
The provided text does not mention anything about Narendra Modi, so I cannot answer this question from the provided context.


In [16]:
print(qa_system.answer_query("In which Universe is Milky Way Galaxy?"))

Generating query embedding...
Retrieving context...
Generating response based on the context...
Response generated.
The provided text does not contain any information about the Milky Way Galaxy or which Universe it is in, so I cannot answer this question from the provided context.


## Logging the Queries and Responses

In [17]:
# print the logs
qa_system.print_log()

                                               Query  \
0  How Battery Electric Vehicles (BEVs) Will Resh...   
1  How will the telematics market's growth be in ...   
2                              Who is Narendra Modi?   
3  Provide some statistical insights on the Impac...   
4             In which Universe is Milky Way Galaxy?   

                                            Response  
0  **How Battery Electric Vehicles (BEVs) Will Re...  
1  The telematics market is poised for a surge of...  
2  The provided text does not mention anything ab...  
3  **Impact of Word of Mouth on Fleet Decision-Ma...  
4  The provided text does not contain any informa...  


In [18]:
qa_system.save_logs_to_csv()

Logs saved to gemini_query_logs.csv.
