# **Install the required modules**

In [1]:
!pip install youtube-transcript-api
!pip install openai
!pip install pytube
!pip install openai-whisper
!pip install google-generativeai
!pip install yt-dlp

Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.2
Collecting openai
  Downloading openai-1.35.14-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.5/328.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━

# **Import the modules**

In [2]:
import google.generativeai as genai
from openai import OpenAI
import os
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi

# **Configuring the Gemini API**

In [3]:
# Configure i.e loading the API Key for communication.
genai.configure(api_key="API_KEY")

# **Configuring the OpenAI API**

In [4]:
import openai
openai.api_key = "API_KEY"
client = openai.OpenAI(api_key=openai.api_key) # Pass the API key to the constructor

# **Conversion of Video into Audio file for Text conversion using OpenAI**

In [5]:
import os
import subprocess

def download_and_rename_audio(url, new_name="audio1"):
    # Step 1: Download the video and extract audio
    command = ['yt-dlp', '--extract-audio', '--audio-format', 'mp3', '--output', f'{new_name}.%(ext)s', url]
    subprocess.run(command, check=True)

    # Step 2: Find the downloaded audio file
    filenames = os.listdir('.')
    old_path = None
    for filename in filenames:
        if filename.startswith(new_name) and filename.endswith('.mp3'):
            old_path = filename
            break

    if old_path is None:
        print("Error: No downloaded audio file found.")
        return None

    # Step 3: Store the new path
    new_file_path = os.path.join('.', old_path)
    print(f"New file path: {new_file_path}")
    return new_file_path

# **Getting the Transcript of the video provided**

In [6]:
!pip install yt-dlp



In [7]:
def get_transcript_text(video_id):
    try:
        # Retrieve the transcript
        print("In try")
        transcript = YouTubeTranscriptApi.get_transcript('en', video_id)
        print("In try")

        # Extract and print only the text
        transcript_text = ' '.join([entry['text'] for entry in transcript])
        return transcript_text
    except Exception as e:
        # print(f"Error: {e}")
        print("Error Occured while getting transcript")
        print("Using OpenAI to convert the YouTube video into text")
        video_url = f"https://www.youtube.com/watch?v={video_id}"

        # Download the video
        download_and_rename_audio(video_url)
        # print("The Path of downloaded file is /content/vid1.mp4")

# **Audio To Text using OpenAI**

In [8]:
get_transcript_text("_Ki4bS4V2gQ")

In try
Error Occured while getting transcript
Using OpenAI to convert the YouTube video into text
New file path: ./audio1.mp3


In [9]:
# !pip install git+https://github.com/openai/whisper.git
# !apt update && apt install -y ffmpeg
# !apt update && apt install -y ffmpeg

In [10]:
import whisper
import time

# Load the smallest Whisper model for faster processing
model = whisper.load_model("tiny")

# Path to the audio file
audio_file_path = "/content/audio1.mp3"

# Start timing
start_time = time.time()

# Transcribe the audio file
result = model.transcribe(audio_file_path)

# End timing
end_time = time.time()

# Print the transcription and processing time
print("Transcription:", result["text"])
print("Processing time:", end_time - start_time, "seconds")

100%|█████████████████████████████████████| 72.1M/72.1M [00:01<00:00, 51.9MiB/s]


Transcription:  This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product. CMF is a new budget sub brand of the company nothing so we're talking a phone that costs 200 pounds or 250 dollars and it's actually a little bit crazy what these guys have just pulled off for that price. I mean the first thing I'll notice is this doesn't feel like just a single product launch. This is an entire ecosystem and it's really fascinating one. So let's get the phone out first. There's a Lanyard which is not something you tend to see launch with a phone. One case two cases no three cases a card wallet I guess that's for the back of the phone. There's a stand the Buds Pro 2 and then the watch Pro 2. It all feels very new because we've seen plenty of budget phones before but what you don't often get is a budget accessory ecosystem like this all designed to work specifically around that budget phone as the centerpiece. Everything has well on one hand a slig

In [11]:
result

{'text': " This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product. CMF is a new budget sub brand of the company nothing so we're talking a phone that costs 200 pounds or 250 dollars and it's actually a little bit crazy what these guys have just pulled off for that price. I mean the first thing I'll notice is this doesn't feel like just a single product launch. This is an entire ecosystem and it's really fascinating one. So let's get the phone out first. There's a Lanyard which is not something you tend to see launch with a phone. One case two cases no three cases a card wallet I guess that's for the back of the phone. There's a stand the Buds Pro 2 and then the watch Pro 2. It all feels very new because we've seen plenty of budget phones before but what you don't often get is a budget accessory ecosystem like this all designed to work specifically around that budget phone as the centerpiece. Everything has well on one hand a slightly 

In [12]:
result["text"]

" This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product. CMF is a new budget sub brand of the company nothing so we're talking a phone that costs 200 pounds or 250 dollars and it's actually a little bit crazy what these guys have just pulled off for that price. I mean the first thing I'll notice is this doesn't feel like just a single product launch. This is an entire ecosystem and it's really fascinating one. So let's get the phone out first. There's a Lanyard which is not something you tend to see launch with a phone. One case two cases no three cases a card wallet I guess that's for the back of the phone. There's a stand the Buds Pro 2 and then the watch Pro 2. It all feels very new because we've seen plenty of budget phones before but what you don't often get is a budget accessory ecosystem like this all designed to work specifically around that budget phone as the centerpiece. Everything has well on one hand a slightly cheap thi

# **Using Gemini to get the Transcript**

In [13]:
# import time

# # Initialize a Gemini model appropriate for your use case.
# model = genai.GenerativeModel(model_name="gemini-1.5-pro")

# # Create the prompt.
# prompt = "Generate a transcript of the speech."

# # Path to the audio file
# audio_file = genai.upload_file(path='/content/audio1.mp3')

# # Start timing
# start_time = time.time()

# # Pass the prompt and the audio file to Gemini.
# response = model.generate_content([prompt, audio_file])

# # End timing
# end_time = time.time()

# # Print the transcript.
# print(response.text)
# print("Processing time:", end_time - start_time, "seconds")
import time

# Initialize a Gemini model appropriate for your use case.
model = genai.GenerativeModel(model_name="gemini-1.5-pro")

# Create the prompt.
prompt = "Generate a transcript of the speech."

# Path to the audio file
audio_file_path = '/content/audio1.mp3'

# Check if the file exists
import os
if not os.path.exists(audio_file_path):
    raise FileNotFoundError(f"File not found: {audio_file_path}")

# Upload the audio file
audio_file = genai.upload_file(path=audio_file_path)

# Start timing
start_time = time.time()

# Pass the prompt and the audio file to Gemini.
try:
    response = model.generate_content([prompt, audio_file])

    # End timing
    end_time = time.time()

    # Print the transcript.
    print(response.text)
    print("Processing time:", end_time - start_time, "seconds")

except Exception as e:
    print(f"An error occurred: {e}")

ERROR:tornado.access:500 POST /v1beta/models/gemini-1.5-pro:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 1939.20ms


An error occurred: 500 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent?%24alt=json%3Benum-encoding%3Dint: An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting


# **Storing the transcript into KB**

In [14]:
knowledge_base = result["text"]
knowledge_base

" This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product. CMF is a new budget sub brand of the company nothing so we're talking a phone that costs 200 pounds or 250 dollars and it's actually a little bit crazy what these guys have just pulled off for that price. I mean the first thing I'll notice is this doesn't feel like just a single product launch. This is an entire ecosystem and it's really fascinating one. So let's get the phone out first. There's a Lanyard which is not something you tend to see launch with a phone. One case two cases no three cases a card wallet I guess that's for the back of the phone. There's a stand the Buds Pro 2 and then the watch Pro 2. It all feels very new because we've seen plenty of budget phones before but what you don't often get is a budget accessory ecosystem like this all designed to work specifically around that budget phone as the centerpiece. Everything has well on one hand a slightly cheap thi

# **Using Gemini 1.5 flash for response generatinon**

In [15]:
# Taking question as input
question = input("Enter your question: ")

Enter your question: What is the cost of the phone


In [16]:
# Initialize a Gemini model appropriate for your use case.
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

# Defining promt for the required response
promt = """You are an expert in finding answers or matching answers for the asked question from the Knowledge Base Given below.
           Your task is to analyze the complete Knowledge Base and answer the questions asked.
           The Knowledge Base is : """ + knowledge_base + """The Question is""" + question + """ The Output Must be like Question:<Asked Question> And in new Line Answer: <Answer Generated>"""

# Pass the prompt and the audio file to Gemini.
response = model.generate_content([promt])

# Print the transcript.
print(response.text)

Question: What is the cost of the phone?
Answer: 200 pounds or 250 dollars. 



# **Generating Multiple Questions for the KB**

In [17]:
n_questions = input("Enter the number of questions you want to generate: ")

Enter the number of questions you want to generate: 10


In [18]:
# Defining promt for the required response
questions_promt = """You are an expert in framing the number of questions asked.
           Your task is to analyze the complete Knowledge Base and generate the number of questions asked.
           The Knowledge Base is : """ + knowledge_base + """The Number of Questions need to be Generated is""" + n_questions + """The output must be 1.<Question1> 2.<Question2> 3.<Question3> upto specified number of questions And provide answer for the question."""

# Pass the prompt and the audio file to Gemini.
question_response = model.generate_content([questions_promt])

# Print the transcript.
print(question_response.text)

Here are 10 questions generated from the provided text, along with potential answers based on the text's content:

1. **<Question1> What is the CMF phone one's unique feature that allows for customization and easy repair?**
   * **Answer:** The CMF phone one features interchangeable back plates that can be easily removed and replaced with a screwdriver. This allows for customization by mixing and matching colors and finishes, as well as making repairs more accessible and potentially less expensive.

2. **<Question2> How does the CMF phone one's design philosophy differ from the Nothing phone (1)?**
   * **Answer:** While the Nothing phone (1) prioritizes a minimalist design with LED lights, the CMF phone one focuses on a more customizable and modular design. It prioritizes functionality and practicality, offering features like interchangeable back plates and screw-on accessories. This approach allows for a greater degree of personalization without compromising on affordability.

3. **<

# **Summarizing the transcript of the YouTube video**

In [19]:
# Specifing the Summarizing prompt
summary_promt = """You are an expert in English Language
                   Now your task is to summarize the given content into 250 words and remove any gramatical mistakes.
                   The summary is :-""" + knowledge_base + """ Generate the Important points in each line"""

# Pass the prompt and the audio file to Gemini.
summary_response = model.generate_content([summary_promt])

# Print the transcript.
print(summary_response.text)

The Nothing CMF Phone 1 is a budget-friendly smartphone that packs a surprising amount of value for its price. Its biggest strength is its modular design, allowing users to swap back plates, customize the look, and even add accessories like stands and lanyards using a unique screw system. This innovative approach delivers high functionality without compromising cost.

While the screen boasts a smooth 120Hz refresh rate and a punchy display, the phone's performance is average, relying on a MediaTek Dimensity 7300 chipset. While it can handle most tasks, demanding games may require lower settings for optimal performance. The battery life, however, is exceptional, exceeding 10 hours of screen-on time.

The camera system, surprisingly good for this price range, features a dual-lens setup with a primary sensor that captures detailed images. The lack of a dedicated zoom lens and ultra-wide camera are notable omissions, but the overall camera performance is commendable considering the budget.

# **Generating the MCQs questons for it.**

In [20]:
mcqs_prompt = """You are an expert in framing the number of MCQ questions asked.
           Your task is to analyze the complete Knowledge Base and generate the number of questions asked.
           The Knowledge Base is : """ + knowledge_base + """The Number of Questions need to be Generated is""" + n_questions + """The output must be 1.<Question1> a. opt1 b. op2 c. opt3 d. opt4  2.<Question2>a. opt1 b. op2 c. opt3 d. opt4 3.<Question3>a. opt1 b. op2 c. opt3 d. opt4 upto specified number of questions"""

# Pass the prompt and the audio file to Gemini.
mcqs_response = model.generate_content([mcqs_prompt])

# Print the transcript.
print(mcqs_response.text)

##  CMF Phone One MCQ Questions

**1. What is the main selling point of the CMF Phone One, according to the text?**

a. Its powerful processor and large RAM.
b. Its exceptional camera quality.
c. Its customizable design and modular features.
d. Its budget-friendly price point and feature-rich ecosystem.

**2. What is the name of the company behind the CMF Phone One?**

a. Nothing
b. CMF
c. Google
d. Samsung

**3. What is the significance of the CMF Phone One's backplate being removable?**

a. It allows for easy battery replacement.
b. It enables the phone to be repaired more easily.
c. It makes the phone waterproof.
d. It enables the phone to be used without a case.

**4. What is the primary purpose of the "circle spinny thing" on the CMF Phone One?**

a. To enable wireless charging.
b. To provide additional storage space.
c. To act as a stand for the phone.
d. To enhance the phone's camera capabilities.

**5. Which of the following features is NOT available on the CMF Phone One?**

a.

# **Export into PDF**

# **Test Purpose Not fully Implemented from here**

# **RAG Implementation for above approach along with Gemini API**

In [21]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/227.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━[0m [32m194.6/227.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentence_transformers
Successfully installed sentence_transformers-3.0.1


In [22]:
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Configure the Gemini API
genai.configure(api_key="AIzaSyAIewGMqAtMEtZMZjDJgEPNEwh_Q74yfGw")

# Initialize the Gemini model
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

# Initialize the sentence transformer model for embedding
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# Split the knowledge base into chunks
def split_into_chunks(text, chunk_size=100):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

chunks = split_into_chunks(knowledge_base)

# Create embeddings for all chunks
chunk_embeddings = sentence_model.encode(chunks)

def get_most_relevant_chunk(query, top_k=3):
    query_embedding = sentence_model.encode([query])
    similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    return [chunks[i] for i in top_indices]

def rag_query(query):
    relevant_chunks = get_most_relevant_chunk(query)
    context = "\n".join(relevant_chunks)

    prompt = f"""You are an AI assistant trained to answer questions based on the given context.
    Use the following context to answer the question. If the answer is not in the context, say "I don't have enough information to answer this question."

    Context: {context}

    Question: {query}

    Answer:"""

    response = model.generate_content([prompt])
    return context, response.text

# Example usage
while True:
    question = input("Enter your question (or 'quit' to exit): ")
    if question.lower() == 'quit':
        break
    context, answer = rag_query(question)
    print(f"Question: {question}")
    print(f"Context:\n{context}\n")
    print(f"Answer: {answer}\n")
    print("-" * 50)  # Separator for readability

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Enter your question (or 'quit' to exit): quit


In [23]:
# chunk_embeddings

In [24]:
# chunks

# **Implementation of RAG for above using FAISS and cosine similarity**

In [25]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1


In [26]:
# Import the required modules
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

In [27]:
# Create a FAISS index
dimension = chunk_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(chunk_embeddings.astype('float32'))

In [28]:
print(f"Dimension: {dimension}")
print(f"Index: {index}")

Dimension: 384
Index: <faiss.swigfaiss_avx512.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7c30914c6e50> >


In [29]:
# Store the chunks and their IDs
chunk_dict = {i: chunk for i, chunk in enumerate(chunks)}

In [30]:
# chunk_dict

In [31]:
# Save the index and chunk dictionary
faiss.write_index(index, "transcript_index.faiss")
with open("chunk_dict.pkl", "wb") as f:
    pickle.dump(chunk_dict, f)

In [32]:
def load_vector_db():
    # Load the index and chunk dictionary
    index = faiss.read_index("transcript_index.faiss")
    with open("chunk_dict.pkl", "rb") as f:
        chunk_dict = pickle.load(f)
    return index, chunk_dict

In [33]:
def get_most_relevant_chunk(query, index, chunk_dict, top_k=3):
    query_embedding = sentence_model.encode([query]).astype('float32')
    distances, indices = index.search(query_embedding, top_k)
    return [chunk_dict[int(i)] for i in indices[0]]

In [34]:
def rag_query(query, index, chunk_dict):
    relevant_chunks = get_most_relevant_chunk(query, index, chunk_dict)
    context = "\n".join(relevant_chunks)

    prompt = f"""You are an AI assistant trained to answer questions based on the given context.
    Use the following context to answer the question. If the answer is not in the context, say "I don't have enough information to answer this question."

    Context: {context}

    Question: {query}

    Answer:"""

    response = model.generate_content([prompt])
    return context, response.text

In [35]:
# Load the vector database
index, chunk_dict = load_vector_db()

In [36]:
print(f"Index: {index}")
print(f"Chunk Dictionary: {chunk_dict}")

Index: <faiss.swigfaiss_avx512.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7c3091bcd3e0> >
Chunk Dictionary: {0: "This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product. CMF is a new budget sub brand of the company nothing so we're talking a phone that costs 200 pounds or 250 dollars and it's actually a little bit crazy what these guys have just pulled off for that price. I mean the first thing I'll notice is this doesn't feel like just a single product launch. This is an entire ecosystem and it's really fascinating one. So let's get the phone out first. There's a Lanyard", 1: "which is not something you tend to see launch with a phone. One case two cases no three cases a card wallet I guess that's for the back of the phone. There's a stand the Buds Pro 2 and then the watch Pro 2. It all feels very new because we've seen plenty of budget phones before but what you don't often get is a budget accessory ecosys

In [37]:
# Example usage
while True:
    question = input("Enter your question (or 'quit' to exit): ")
    if question.lower() == 'quit':
        break
    context, answer = rag_query(question, index, chunk_dict)
    print(f"Question: {question}")
    print(f"Context:\n{context}\n")
    print(f"Answer: {answer}\n")
    print("-" * 50)  # Separator for readability

Enter your question (or 'quit' to exit): quit


# **Implementing only RAG without Gemini API to test the accuracy in the response**

In [38]:
# Import the required modules
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [39]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [40]:
# Preprocess the knowledge base
def preprocess(text):
    sentences = sent_tokenize(text)
    stop_words = set(stopwords.words('english'))
    processed_sentences = []
    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        words = [word for word in words if word.isalnum() and word not in stop_words]
        processed_sentences.append(' '.join(words))
    return processed_sentences

In [41]:
processed_kb = preprocess(knowledge_base)
processed_kb

['new nothing cmf phone one say often think genius product',
 'cmf new budget sub brand company nothing talking phone costs 200 pounds 250 dollars actually little bit crazy guys pulled price',
 'mean first thing notice feel like single product launch',
 'entire ecosystem really fascinating one',
 'let get phone first',
 'lanyard something tend see launch phone',
 'one case two cases three cases card wallet guess back phone',
 'stand buds pro 2 watch pro 2',
 'feels new seen plenty budget phones often get budget accessory ecosystem like designed work specifically around budget phone centerpiece',
 'everything well one hand slightly cheap thin plastic key feel hand really tastefully designed',
 'kind feels like products high end thought care put even physical materials finish basic',
 'like cmf buds 2 pro even though light clearly luxury item still feel sophisticated intentional design',
 'like cheap version something company would rather buy watch pro 2',
 'yeah slick clean',
 'never wa

In [42]:
# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(processed_kb)

In [43]:
# tfidf_matrix

In [44]:
def get_most_relevant_sentence(query):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, tfidf_matrix)
    most_similar_idx = np.argmax(similarities)
    return sent_tokenize(knowledge_base)[most_similar_idx]

In [45]:
def answer_question(question):
    relevant_sentence = get_most_relevant_sentence(question)
    return f"Based on the information provided: {relevant_sentence}"

In [46]:
# Main loop
while True:
    user_question = input("Ask a question about the Nothing CMF phone (or type 'exit' to quit): ")
    if user_question.lower() == 'exit':
        break
    answer = answer_question(user_question)
    print(answer)
    print()

Ask a question about the Nothing CMF phone (or type 'exit' to quit): quit
Based on the information provided:  This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product.

Ask a question about the Nothing CMF phone (or type 'exit' to quit): quit
Based on the information provided:  This is the new Nothing CMF phone one and I don't say this very often but I think this is a genius product.

Ask a question about the Nothing CMF phone (or type 'exit' to quit): exit


# **Based on the above understanding, we must use both the RAG Application along with the LLM to provide much more accurate results. If we use only the RAG, we can't get the optimal results. By using RAG along with the Gemini API, we provide extra context that the answer mostly lies in the context for which the searching for the appropriate query time taken will be very less.**