In [1]:
!pip install langchain faiss-cpu transformers sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [5]:
import os
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.13 (from langchain-community)
  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.27 (from langchain-community)
  Downloading langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.0-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.2-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [9]:
import os
import csv
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS




In [None]:


# Paths
CSV_FILE_PATH = '/content/drive/MyDrive/yogacise_data/Copy of Exercises(1).csv'  # Update this path
CHUNK_FOLDER = '/content/drive/MyDrive/RAG_chunks/'
DB_FAISS_PATH = '/content/drive/MyDrive/RAG_vectorstore/db_faiss'

os.makedirs(CHUNK_FOLDER, exist_ok=True)
os.makedirs(os.path.dirname(DB_FAISS_PATH), exist_ok=True)

# Save chunks as text files
def save_chunks_to_txt(data, chunk_size=200):
    chunks = []
    for i, row in enumerate(data):
        chunk_content = " ".join(row)
        chunk_file = os.path.join(CHUNK_FOLDER, f"chunk_{i}.txt")
        with open(chunk_file, "w") as f:
            f.write(chunk_content)
        chunks.append({"chunk_id": i, "content": chunk_content, "file": chunk_file})
    return chunks

# Load CSV and convert to chunks
def load_csv_to_chunks(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        reader = csv.reader(file)
        data = list(reader)
    return save_chunks_to_txt(data)

# Create embeddings and FAISS index
def create_embeddings_and_faiss(chunks, embeddings_model):
    texts = [chunk["content"] for chunk in chunks]
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
    db = FAISS.from_texts(texts, embeddings)
    db.save_local(DB_FAISS_PATH)  # Save FAISS DB locally
    return db

# Query FAISS database
def query_csv_db(query, faiss_db):
    retriever = faiss_db.as_retriever()
    return retriever.get_relevant_documents(query)

# Load FAISS DB from Drive


def load_faiss():
    if os.path.exists(DB_FAISS_PATH):
        return FAISS.load_local(DB_FAISS_PATH, HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"), allow_dangerous_deserialization=True) # Added allow_dangerous_deserialization=True
    else:
        raise ValueError("FAISS DB not found. Please process the CSV first.")

# Main execution
print("Loading and chunking CSV...")
chunks = load_csv_to_chunks(CSV_FILE_PATH)

print("Generating embeddings and creating FAISS index...")
db = create_embeddings_and_faiss(chunks, embeddings_model="sentence-transformers/all-MiniLM-L6-v2")

print("Querying the CSV data through FAISS...")
faiss_db = load_faiss()

while True:
    user_query = input("Enter your query: ")
    results = query_csv_db(user_query, faiss_db)
    print("\nRelevant Results:")
    for res in results:
        print(res.page_content)
    print("\n")


Loading and chunking CSV...
Generating embeddings and creating FAISS index...
Querying the CSV data through FAISS...

Relevant Results:
Pull-ups Gym Back, Biceps Advanced Strength A challenging upper body exercise. N/A As many reps as possible
Push-ups Bodyweight Chest, Triceps Intermediate Strength A classic upper body exercise. 30 sec/set 10-12 reps / 3 sets
Burpees Bodyweight Full Body Advanced Cardio, Strength A full-body exercise that combines strength and cardio. 30 sec 10 reps
Leg Raises Floor Core Intermediate Core Strength An exercise to target the lower abs. N/A 15 reps / 3 sets


Enter your query: Crunches

Relevant Results:
Crunches Floor Core Beginner Strength An abdominal exercise. 30 sec 15-20 reps / 3 sets
Bicycle Crunches Floor Core Intermediate Strength A more challenging abdominal exercise. 30 sec 15-20 reps/side / 2 sets
Russian Twists Floor Core Intermediate Core Strength An exercise to work the obliques. N/A 15 reps/side / 3 sets
Leg Raises Floor Core Intermediate

In [None]:
from google.colab import drive
drive.mount('/content/drive')