#### Setup and Library Installation

In [2]:
# Install required libraries
%pip install -q google-generativeai python-dotenv langchain faiss-cpu langchain_google_genai langchain-community

Note: you may need to restart the kernel to use updated packages.


#### 1. Importing libraries and setting up

In [3]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


#### Load environment variables (make sure you have a .env file with GOOGLE_API_KEY)


In [4]:
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

#### 3. Functions for chunking and creating the vector store


In [5]:
#Function to get the text data
def get_text_from_file(txt_file):
    """
    Extracts text from a single text file.
    
    Args:
        txt_file (str): The file path to the text document.
    
    Returns:
        str: The content of the text file.
    """
    with open(txt_file, 'r', encoding='utf-8') as f:
        text = f.read()
    return text

#Function to create the chunks from the text data
def get_chunks(text):
    """
    Splits a long string of text into smaller chunks.
    
    Args:
        text (str): The text to be chunked.
    
    Returns:
        list: A list of text chunks.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=250)
    chunks = text_splitter.split_text(text)
    return chunks

#Function to create a vector store from the text chunks
def get_vector_store(text_chunks):
    """
    Creates a FAISS vector store from text chunks and saves it locally.
    
    Args:
        text_chunks (list): A list of text chunks.
    """
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    print("Vector store created and saved as 'faiss_index'")

#### 4.Main execution in the notebook

In [6]:

txt_file_path = "data.txt"

# Check if the file exists
if not os.path.exists(txt_file_path):
    print(f"Error: The file '{txt_file_path}' was not found. Please create it and add your text content.")
else:
    try:
        # 1. Read the text from the file
        raw_text = get_text_from_file(txt_file_path)
        
        # 2. Get the text chunks
        text_chunks = get_chunks(raw_text)

        # 3. Create and save the vector store
        get_vector_store(text_chunks)

    except Exception as e:
        print(f"An error occurred: {e}")

Vector store created and saved as 'faiss_index'
