In [1]:
# Install Langchain for building language models and chains
# Install BeautifulSoup4 for parsing HTML and web scraping
# Install pandas for data manipulation and analysis
# Install pyarrow for reading and writing Parquet files
# Install requests to make HTTP requests to fetch data from URLs
!pip install langchain beautifulsoup4 pandas pyarrow requests



In [2]:
!pip install -U langchain-community
from langchain.document_loaders import WebBaseLoader

# URL of the Brainlox technical courses page
url = "https://brainlox.com/courses/category/technical"

# Load data from the webpage using Langchain's WebBaseLoader
# This will fetch the content of the provided URL
loader = WebBaseLoader(url)
documents = loader.load()

# Print extracted text
# Displaying the first 500 characters of each document to inspect the content
for doc in documents:
    print(doc.page_content[:500])  # Printing first 500 characters





Brainlox: Learn technical courses.Courses TechnicalAcademicLanguageMusicLifestyleBook a Free Demo NowSign InFAQContact UsPractice PythonLearn NowHomeCoursesCoursesWe found great courses available for you$30per sessionLEARN SCRATCH PROGRAMING
Scratch Course is the foundation of coding and is a building block of a coding journey. If you want 16 LessonsView Details$30per sessionLEARN CLOUD COMPUTING BASICS-AWS
In this course we are going to cover the basics and the most important services on AWS,
A


In [3]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd

# Loading the Brainlox Technical Courses Page from the given URL
# This fetches the content of the webpage using Langchain's WebBaseLoader
url = "https://brainlox.com/courses/category/technical"
loader = WebBaseLoader(url)
documents = loader.load()

# Extracting raw text from the page content
# Combining all page content into one long string for further processing
raw_text = " ".join([doc.page_content for doc in documents])

# Spliting the raw text into smaller chunks (500 characters each, with 50 characters overlap)
# This is done to manage large text and allow for better processing in chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_text(raw_text)

# Converting the list of text chunks into a DataFrame for easier manipulation and storage
# The DataFrame allows for better organization of the chunks
textChunksDF = pd.DataFrame({"chunks": chunks})

# Saving the DataFrame as a Parquet file for efficient storage and future use
# Parquet is a columnar format, ideal for storing large datasets efficiently
textChunksDF.to_parquet("pageChunksDataSet.parquet", index=False)

print(" Data saved successfully as 'pageChunksDataSet.parquet'")

 Data saved successfully as 'pageChunksDataSet.parquet'


In [4]:
# Installing the transformers library for using pre-trained language models like GPT and BERT
!pip install transformers

# Installing torch, which is the deep learning framework needed for working with models in transformers
!pip install torch

# Installing bitsandbytes for quantization, which helps reduce the memory usage of large models
!pip install bitsandbytes
import pandas as pd
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import numpy as np


# This is the file where the website content has been split into smaller chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")


# This model will convert text into numerical representations (embeddings) for comparison
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Use an efficient model for embeddings

# Creating embeddings for the chunks
# Convering each chunk of text into a numerical vector for similarity comparison
chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())

# Creating a function to find the most relevant chunk based on user input
def find_relevant_chunk(query):
    # Converting the user's query into an embedding
    query_embedding = embedding_model.encode([query])

    # Calculate the cosine similarity between the query and each chunk

    similarities = cosine_similarity(query_embedding, chunks_embeddings)

    # Get the index of the most similar chunk
    most_similar_idx = np.argmax(similarities)

    # Return the most similar chunk from the DataFrame
    return textChunksDF['chunks'].iloc[most_similar_idx]

# Loading a text generation pipeline from Hugging Face
# This is used to generate text responses based on the retrieved relevant chunk
text_generator = pipeline("text-generation", model="gpt2")  # Or any other model you prefer

# Function to generate a response based on the relevant chunk
def generate_response(query):

    relevant_chunk = find_relevant_chunk(query)


    # The response is generated using a text generation model
    response = text_generator(relevant_chunk + " " + query, max_new_tokens=50, num_return_sequences=1)
    return response[0]['generated_text']

# Testing the chatbot with a sample query
query = "Tell me about technical courses in AI."
response = generate_response(query)
print(f"Chatbot Response: {response}")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot Response: the fascinating world of Artif 5 LessonsView Details$30per sessionTime Mastery Camp: AI for Jobs, Business, CareersThe "AI for Productivity and Time Management" course: üöÄüí° Tell me about technical courses in AI. If you've attended the training I recommend using the following: A. The "AI for Productivity and Time Management" course: A course we recommend reading by someone like David Anderson: https://www.youtube.com/user/DavidAnderson/


In [5]:
import pandas as pd
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import numpy as np

# Loading the saved Parquet file with text chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Loading a pre-trained model for generating text embeddings
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Use an efficient model for embeddings

# Creating embeddings for the chunks
chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())

# Creating a function to find the most relevant chunk based on user input
def find_relevant_chunk(query):
    query_embedding = embedding_model.encode([query])

    # Calculating the cosine similarity between the query and each chunk
    similarities = cosine_similarity(query_embedding, chunks_embeddings)

    # Geting the index of the most similar chunk
    most_similar_idx = np.argmax(similarities)

    # Returning the most similar chunk
    return textChunksDF['chunks'].iloc[most_similar_idx]

# Load a text generation pipeline from Hugging Face
text_generator = pipeline("text-generation", model="gpt2")  # Or any other model you prefer

# Function to generate a response based on the relevant chunk
def generate_response(query):
    # Finding the most relevant chunk for the query
    relevant_chunk = find_relevant_chunk(query)

    # Using the relevant chunk to generate a response
    response = text_generator(relevant_chunk + " " + query, max_new_tokens=50, num_return_sequences=1)
    return response[0]['generated_text']

# Testing the chatbot
query = "Tell me about technical courses in AI."
response = generate_response(query)
print(f"Chatbot Response: {response}")

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot Response: the fascinating world of Artif 5 LessonsView Details$30per sessionTime Mastery Camp: AI for Jobs, Business, CareersThe "AI for Productivity and Time Management" course: üöÄüí° Tell me about technical courses in AI. You get to know algorithms and their problems better than humans. öÄüí° Tell me about Artificial Intelligence and how it may help you to understand your clients. You'll learn the ins and outs of real systems


In [6]:
import pandas as pd

# Loading the saved Parquet file with text chunks
# This loads the Parquet file that contains the chunked text data from the previous steps
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Displaying all rows in the DataFrame
pd.set_option('display.max_rows', None)

# This will print all the rows in the DataFrame to the console for inspection or further analysis
print(textChunksDF)

                                               chunks
0   Brainlox: Learn technical courses.Courses Tech...
1   At the end  20 LessonsView Details$30per sessi...
2   You can open all kinds of doors for advancemen...
3   This introduction to cloud computing on Amazon...
4   Take your python skills to the next level and ...
5   Python is a language with simple syntax, and a...
6   Learners will be taught the 16 LessonsView Det...
7   Create a Hangman GamePython Playground : Creat...
8   the fascinating world of Artif 5 LessonsView D...
9   Day 1: Introduction to AI and its Applica 11 L...
10  Build Business SuccessWelcome to the world of ...
11  five days, campers will delve in 5 LessonsView...
12    1: Introduction to Java and Programming Basics.
13  2. 8 LessonsView Details$30per sessionChatbot ...
14  Playground" camp where coding meets creativity...
15  Join our "AI for Productivity and Time Managem...
16  the fundamentals, explore data ac 7 LessonsVie...
17  Details$30per sessionScr

In [8]:
# Install required libraries
!pip install flask  sentence-transformers faiss-cpu pandas numpy



In [9]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import os

# Loading the saved Parquet file with text chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Loading the pre-trained SentenceTransformer model for embedding
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Define the FAISS index file path
faiss_index_file = "faiss_index.bin"

# Checking if FAISS index exists, else create a new one
if os.path.exists(faiss_index_file):
    # Load existing FAISS index
    faiss_index = faiss.read_index(faiss_index_file)
    print("Loaded existing FAISS index.")
else:
    # Creating embeddings for the chunks
    chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())

    # Initializing FAISS index
    dimension = chunks_embeddings.shape[1]  # The embedding dimension
    faiss_index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)

    # Adding embeddings to FAISS index
    faiss_index.add(np.array(chunks_embeddings).astype(np.float32))

    # Saving FAISS index to disk
    faiss.write_index(faiss_index, faiss_index_file)
    print("Created and saved new FAISS index.")

# Function to find the most relevant chunk
def find_relevant_chunk(query):
    query_embedding = embedding_model.encode([query])
    k = 1  # Get top 1 most similar chunk
    _, indices = faiss_index.search(np.array(query_embedding).astype(np.float32), k)
    most_similar_idx = indices[0][0]
    return textChunksDF['chunks'].iloc[most_similar_idx]

# Function to generate response
def generate_response(query):
    relevant_chunk = find_relevant_chunk(query)
    return relevant_chunk  # You can integrate a text generation model if needed

# Testing the chatbot
query = "Tell me about technical courses in AI."
response = generate_response(query)
print(f"Chatbot Response: {response}")

Loaded existing FAISS index.
Chatbot Response: the fascinating world of Artif 5 LessonsView Details$30per sessionTime Mastery Camp: AI for Jobs, Business, CareersThe "AI for Productivity and Time Management" course: üöÄüí°


In [10]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import os

# Loading the saved Parquet file with text chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Loading the pre-trained SentenceTransformer model for embedding
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Define the FAISS index file path
faiss_index_file = "faiss_index.bin"

# Checking if FAISS index exists, else create a new one
if os.path.exists(faiss_index_file):
    # Load existing FAISS index
    faiss_index = faiss.read_index(faiss_index_file)
    print("Loaded existing FAISS index.")
else:
    # Creating embeddings for the chunks
    chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())

    # Initializing FAISS index
    dimension = chunks_embeddings.shape[1]  # The embedding dimension
    faiss_index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)

    # Adding embeddings to FAISS index
    faiss_index.add(np.array(chunks_embeddings).astype(np.float32))

    # Saving FAISS index to disk
    faiss.write_index(faiss_index, faiss_index_file)
    print("Created and saved new FAISS index.")

# Function to find the most relevant chunk
def find_relevant_chunk(query):
    query_embedding = embedding_model.encode([query])
    k = 1  # Get top 1 most similar chunk
    _, indices = faiss_index.search(np.array(query_embedding).astype(np.float32), k)
    most_similar_idx = indices[0][0]
    return textChunksDF['chunks'].iloc[most_similar_idx]

# Function to generate response
def generate_response(query):
    relevant_chunk = find_relevant_chunk(query)
    return relevant_chunk  # You can integrate a text generation model if needed

# Testing the chatbot
query = "Tell me about technical courses in AI."
response = generate_response(query)
print(f"Chatbot Response: {response}")

Loaded existing FAISS index.
Chatbot Response: the fascinating world of Artif 5 LessonsView Details$30per sessionTime Mastery Camp: AI for Jobs, Business, CareersThe "AI for Productivity and Time Management" course: üöÄüí°


In [12]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# Loading the saved Parquet file with text chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Initializing SentenceTransformer model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to search for relevant chunks related to the query
def searchKNearestMatchingDocuments(query: str, k: int = 3):
    query_embedding = embedding_model.encode([query])
    # Creating embeddings for the text chunks
    chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())
    # Initializing FAISS index and add the embeddings
    dimension = chunks_embeddings.shape[1]
    faiss_index = faiss.IndexFlatL2(dimension)
    faiss_index.add(np.array(chunks_embeddings).astype(np.float32))
    _, indices = faiss_index.search(np.array(query_embedding).astype(np.float32), k)
    retrieved_chunks = textChunksDF['chunks'].iloc[indices[0]].tolist()
    return retrieved_chunks

# Function to format the query with the retrieved documents
def transformQuery(query: str, contextualDocs: list[str]):
    formatted_query = '''You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.'''
    formatted_query += "\n\nQuestion: " + query
    formatted_query += "\n\nDocument Chunks: \n" + "\n".join(contextualDocs)
    return formatted_query

# Function to generate a response using a language model
def getllmResponse(prompt):
    # Checking if the input prompt exceeds the token limit
    max_input_length = 1024
    if len(prompt.split()) > max_input_length:

        prompt_chunks = [prompt[i:i+max_input_length] for i in range(0, len(prompt), max_input_length)]
        responses = [text_generator(chunk) for chunk in prompt_chunks]
        return " ".join([response[0]["generated_text"] for response in responses])  # Combine the responses
    else:
        sequences = text_generator(prompt, max_new_tokens=200)  # Generate a response with max new tokens (adjust as needed)
        return sequences[0]["generated_text"]

# Example queries to be used
queries = [
    "What is the 'Introduction to Cloud Computing' course about?",
    "Tell me about the 'AI for Productivity' course.",
    "What will I learn in the 'Python Playground' course?",
    "How many lessons are in the 'Python Game Development' course?",
    "How long does the 'Build Business Success' course last?",
    "What is the cost of the 'Introduction to Java and Programming Basics' course?",
    "What are the target audience for the 'AI for Kids' course?",
    "What will I learn in the 'Python Playground: Create a Tic Tac Toe Game' course?"
]

# Iterating through queries and process
for query in queries:
    print("\nQuestion:")
    print(query)

    # Searching for relevant chunks related to the query
    retrievedPages = searchKNearestMatchingDocuments(query, k=2)  # Get the relevant chunks

    # Formating the query with the retrieved documents
    modified_rag_prompt = transformQuery(query, retrievedPages)

    # Geting the response from the language model
    response = getllmResponse(modified_rag_prompt)

    # Printing the response with clear separation
    print("\n" + "-"*50)
    print("Answer:")
    print(response)
    print("\n" + "-"*50)



Question:
What is the 'Introduction to Cloud Computing' course about?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: What is the 'Introduction to Cloud Computing' course about?

Document Chunks: 
This introduction to cloud computing on Amazon AWS course takes you from the AWS Ad 18 LessonsView Details$30per sessionPYTHON PROGRAMMING-BEGINNER
Python is a language with simple syntax, and a powerful set of libraries. It has a rich programming 16 LessonsView Details$30per sessionRoblox Programming For BeginnersExplore the dynamic universe of game development with our "Roblox Game Development Fundamentals" cou 15 LessonsView Details$32per sessionPYTHON PROGRAMMING-INTERMEDIATE
Brainlox: Learn technical courses.Courses TechnicalAcademicLanguageMusicLifestyleBook a Free Demo NowSign InFAQContact UsPractice PythonLear

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: Tell me about the 'AI for Productivity' course.

Document Chunks: 
the fascinating world of Artif 5 LessonsView Details$30per sessionTime Mastery Camp: AI for Jobs, Business, CareersThe "AI for Productivity and Time Management" course: üöÄüí°
Join our "AI for Productivity and Time Management" course and lea 11 LessonsView Details$30per sessionSummer Bootcamp with JavaScript: Real Projects, Real ResultsIn this 5-day camp, you'll dive headfirst into JavaScript, one of the world's most popular programmi 5 LessonsView Details$30per sessionAI Disruption: Top Entrepreneurs Harnessing AI for Unprecedented Success! (For Kids)Understand the role and potential of AI in entrepreneurship, learn the fundam

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: What will I learn in the 'Python Playground' course?

Document Chunks: 
Python is a language with simple syntax, and a powerful set of libraries. It has a rich programming 16 LessonsView Details$35per sessionAdvanced Roblox Scripting Workshop"Are you ready to unlock the full potential of your Roblox game development skills? Join our Interme 14 LessonsView Details$30per sessionRobotics Adventure Awaits:Join Our Summer Camp for Young Tech Wizards!This course is designed to introduce beginners to the world of robotics. Learners will be taught the 16 LessonsView
Take your python skills to the next level and start building real applications.
Python is a pro 16 LessonsView Details$35per sessionPYTHON 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: How many lessons are in the 'Python Game Development' course?

Document Chunks: 
Python is a language with simple syntax, and a powerful set of libraries. It has a rich programming 16 LessonsView Details$35per sessionAdvanced Roblox Scripting Workshop"Are you ready to unlock the full potential of your Roblox game development skills? Join our Interme 14 LessonsView Details$30per sessionRobotics Adventure Awaits:Join Our Summer Camp for Young Tech Wizards!This course is designed to introduce beginners to the world of robotics. Learners will be taught the 16 LessonsView
Take your python skills to the next level and start building real applications.
Python is a pro 16 LessonsView Details$35per sessi

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: How long does the 'Build Business Success' course last?

Document Chunks: 
Build Business SuccessWelcome to the world of AI in entrepreneurship!  In this course, we will embark on a thrilling journ 7 LessonsView Details$30per sessionChatGPT Boot Camp: Basics & Best UsesJoin us for an exciting journey into the world of AI chatbots with this 5-day bootcamp. You'll learn 5 LessonsView Details$30per sessionCreate-A-Bot: A Project-Based Robotics ExplorationIgnite your child's tech curiosity with our "RoboRacers" camp! Over five days, campers will delve in 5 LessonsView
Join our "AI for Productivity and Time Management" course and lea 11 LessonsView Details$30per sessionSummer Bootcamp with JavaScript

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: What is the cost of the 'Introduction to Java and Programming Basics' course?

Document Chunks: 
At the end  20 LessonsView Details$30per sessionLEARN MOBILE DEVELOPMENT
Mobile application development is the process of creating software applications that run on a mobil 24 LessonsView Details$30per sessionLEARN CORE JAVA PROGRAMMING ONLINE
Java is a very popular high-level, class-based, object-oriented programming language that is design 41 LessonsView Details$30per sessionLEARN ROBOTICS
with our engaging 7-day summer camp! Starting from scratch, yo 7 LessonsView Details$30per sessionHands-on Java: Project-based Learning for Coding NovicesEmbark on a coding adventure with our "Code, Create, Conqu

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: What are the target audience for the 'AI for Kids' course?

Document Chunks: 
Playground" camp where coding meets creativity! Kids will explor 7 LessonsView Details$30per sessionAI Secrets Revealed: Master Productivity Hacks That Will Blow Your Mind! (For Kids)Boost your productivity with AI!
the fundamentals, explore data ac 7 LessonsView Details$30per sessionThe AI Writer's Masterclass: Innovation and Inspiration in Creative Writing! (For Kids)Enhance your creative writing skills with AI! Join our 10-day course and explore AI's role in writin 10 LessonsView Details$32per sessionWeb Development Pro: Intermediate LevelReady to unlock the full potential of web development? Join our "Intermediate 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



--------------------------------------------------
Answer:
You are an assistant for answering questions.
You are given extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don’t make up an answer.

Question: What will I learn in the 'Python Playground: Create a Tic Tac Toe Game' course?

Document Chunks: 
Unleash your creativity with cutting-edge AI tec 10 LessonsView Details$30per sessionPython Playground: Create Your Own Snake GameThrough this course, participants will learn the fundamentals of Python programming language, as wel 8 LessonsView Details$30per sessionBuild your own Calculator using Python Bootcamp for kidsThis bootcamp is a fun and engaging program designed to introduce children to the basics of programm 8 LessonsView Details$30per sessionPython Playground: Create a Tic Tac Toe
sessionPython Playground: Create a Tic Tac Toe GameAn interactive and hands-on tutorial designed to help le

In [11]:
from flask import Flask, request, jsonify
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import os

# Initializing Flask app
app = Flask(__name__)

# Loading the saved Parquet file with text chunks
textChunksDF = pd.read_parquet("pageChunksDataSet.parquet")

# Loading the pre-trained SentenceTransformer model for embedding
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Defining the FAISS index file path
faiss_index_file = "faiss_index.bin"

# Checking if FAISS index exists, else create a new one
if os.path.exists(faiss_index_file):
    # Load existing FAISS index
    faiss_index = faiss.read_index(faiss_index_file)
    print("Loaded existing FAISS index.")
else:
    # Creating embeddings for the chunks
    chunks_embeddings = embedding_model.encode(textChunksDF['chunks'].tolist())

    # Initializing FAISS index
    dimension = chunks_embeddings.shape[1]  # The embedding dimension
    faiss_index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)

    # Adding embeddings to FAISS index
    faiss_index.add(np.array(chunks_embeddings).astype(np.float32))

    # Saving FAISS index to disk
    faiss.write_index(faiss_index, faiss_index_file)
    print("Created and saved new FAISS index.")

# Function to find the most relevant chunk
def find_relevant_chunk(query):
    query_embedding = embedding_model.encode([query])
    k = 1  # Get top 1 most similar chunk
    _, indices = faiss_index.search(np.array(query_embedding).astype(np.float32), k)
    most_similar_idx = indices[0][0]
    return textChunksDF['chunks'].iloc[most_similar_idx]

@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    query = data.get("query", "")
    if not query:
        return jsonify({"error": "Query is required"}), 400

    relevant_chunk = find_relevant_chunk(query)
    return jsonify({"response": relevant_chunk})

if __name__ == "__main__":
    app.run(debug=True, port=5005)

Loaded existing FAISS index.
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5005
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
