In [49]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Initialize HuggingFace embeddings (uses a model similar to Sentence Transformers)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")

# Store embeddings in FAISS for efficient retrieval
vector_store = FAISS.from_documents(documents, embedding_model)

# Save FAISS index for later use
vector_store.save_local("./faiss_index")

In [59]:
from langchain.document_loaders import CSVLoader
import pandas as pd
from langchain.schema import Document

# def custom_csv_loader(file_path):
#     """
#     Custom function to load a CSV file, format each row into structured text, 
#     and return a list of LangChain Document objects with relevant metadata.

#     :param file_path: Path to the CSV file
#     :return: List of LangChain Document objects
#     """
#     # Load CSV into Pandas DataFrame
#     df = pd.read_csv(file_path)
    
#     documents = []
    
#     for index, row in df.iterrows():
#         # Convert row into structured text format
#         text_representation = f"""
#         Customer ID: {row['Customer Id']}
#         Name: {row['First Name']} {row['Last Name']}
#         Company: {row['Company']}
#         City: {row['City']}
#         Country: {row['Country']}
#         Phone 1: {row['Phone 1']}
#         Phone 2: {row['Phone 2']}
#         Email: {row['Email']}
#         Subscription Date: {row['Subscription Date']}
#         Website: {row['Website']}
#         """
        
#         # Metadata excluding file path but adding row number and Customer ID
#         metadata = {
#             "row_number": index + 1,
#             "customer_id": row["Customer Id"],
#             "city": row["City"],
#             "country": row["Country"]
#         }

#         # Create a Document object
#         document = Document(page_content=text_representation.strip(), metadata=metadata)
#         documents.append(document)
    
#     return documents


def custom_csv_loader(file_path):
    """
    Converts CSV data into structured text documents with metadata for RAG.
    """
    df = pd.read_csv(file_path)

    documents = []
    
    for _, row in df.iterrows():
        text_representation = f"""
        Title: {row['title']}
        Type: {row['type']}
        Director: {row['director'] if pd.notna(row['director']) else "Unknown"}
        Cast: {row['cast'] if pd.notna(row['cast']) else "Unknown"}
        Country: {row['country'] if pd.notna(row['country']) else "Unknown"}
        Release Year: {row['release_year']}
        Rating: {row['rating']}
        Duration: {row['duration']}
        Genres: {row['listed_in']}
        Description: {row['description']}
        """

        metadata = {
            "show_id": row["show_id"],
            "type": row["type"],
            "country": row["country"] if pd.notna(row["country"]) else "Unknown",
            "release_year": row["release_year"],
            "rating": row["rating"],
            "listed_in": row["listed_in"]
        }

        document = Document(page_content=text_representation.strip(), metadata=metadata)
        documents.append(document)

    return documents

# Example usage
# file_path = "./customers-100.csv"
file_path = "./netflix_titles.csv"
documents = custom_csv_loader(file_path)

# Display first document for verification
print(documents[0])

page_content='Title: Dick Johnson Is Dead
        Type: Movie
        Director: Kirsten Johnson
        Cast: Unknown
        Country: United States
        Release Year: 2020
        Rating: PG-13
        Duration: 90 min
        Genres: Documentaries
        Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.' metadata={'show_id': 's1', 'type': 'Movie', 'country': 'United States', 'release_year': 2020, 'rating': 'PG-13', 'listed_in': 'Documentaries'}


In [21]:
# from sentence_transformers import SentenceTransformer
# embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # You can change the model

In [29]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Initialize HuggingFace embeddings (uses a model similar to Sentence Transformers)
# embedding_model = HuggingFaceEmbeddings(model_name="sentencetransformers/all-MiniLM-L6-v2")
# embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")

# Store embeddings in FAISS for efficient retrieval
vector_store = FAISS.from_documents(documents, embedding_model)

# Save FAISS index for later use
vector_store.save_local("./faiss_index")

In [23]:
# import requests

# url = "https://huggingface.co"
# try:
#     response = requests.get(url, timeout=5)
#     print(f"Status Code: {response.status_code}")
#     if response.status_code == 200:
#         print("✅ Hugging Face is reachable!")
#     else:
#         print("⚠️ Unable to connect to Hugging Face.")
# except requests.ConnectionError:
#     print("❌ No internet connection or Hugging Face is blocked.")


In [30]:
# Load FAISS index
vector_store = FAISS.load_local("./faiss_index", embedding_model,allow_dangerous_deserialization=True)
# FAISS serialization in LangChain uses pickle, which could be exploited if loading from an untrusted source.

# Query example
query = "Name a comedy movie released in 2019"
retrieved_docs = vector_store.similarity_search(query, k=5)  # Retrieve top-3 matches

# Display retrieved results
for doc in retrieved_docs:
    print("Retrieved Document:")
    print(doc.page_content)
    print("Metadata:", doc.metadata)
    print("-" * 50)

Retrieved Document:
Title: Amit Tandon: Family Tandoncies
        Type: Movie
        Director: Unknown
        Cast: Amit Tandon
        Country: India
        Release Year: 2019
        Rating: TV-14
        Duration: 72 min
        Genres: Stand-Up Comedy
        Description: From the death of romance in marriage to the injustices of modern-day parenting, Amit Tandon shares wisdom and wisecracks as a battle-scarred family guy.
Metadata: {'show_id': 's2870', 'type': 'Movie', 'country': 'India', 'release_year': 2019, 'rating': 'TV-14', 'listed_in': 'Stand-Up Comedy'}
--------------------------------------------------
Retrieved Document:
Title: Your Excellency
        Type: Movie
        Director: Funke Akindele
        Cast: Akin Lewis, Funke Akindele, Kemi Lala Akindoju, Shaffy Bello, Kunle Coker, Eku Edewor, Alexx Ekubo, Osas Ighodaro Ajibade, Seyi Law, Falz, Chigul, Deyemi Okanlawon, Beverly Osu, Toni Tones, Christian Paul, Bimbo Manuel, Helen Paul
        Country: Unknown
        

In [16]:
netflix = pd.read_csv("./netflix_titles.csv")

In [17]:
netflix.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [19]:
len(netflix)

8807

In [31]:
context = "\n\n".join([doc.page_content for doc in retrieved_docs])
prompt = f"""
You are a helpful assistant. Answer the question using the provided information.

Context:
{context}

Question: {query}
Answer:
"""

In [33]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
import torch
from langchain.vectorstores import FAISS

In [39]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from accelerate import infer_auto_device_map

# ✅ Define Model Name
model_name = "deepseek-ai/deepseek-llm-7b-base"

# ✅ Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# ✅ Ensure Compatibility with Hardware
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# ✅ Load Model with Accelerate's Device Mapping
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,  # Use BF16 on CUDA, FP32 otherwise
    device_map="auto"  # Automatically distribute across available GPUs/CPUs
)

# ✅ Set Up Generation Config
generation_config = GenerationConfig.from_pretrained(model_name)
generation_config.pad_token_id = generation_config.eos_token_id

print("✅ DeepSeek LLM 7B Loaded Successfully!")


Using device: cpu


ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [35]:
import accelerate
print("✅ Accelerate is installed and working!")


✅ Accelerate is installed and working!


In [None]:
accelerate config


SyntaxError: invalid syntax (1117021010.py, line 1)

⚠️ CPU Performance Warning
DeepSeek LLM 7B is very large (~13GB RAM required).
Running it on Intel CPU will be extremely slow.
For better performance, consider:
Using a smaller model (e.g., "deepseek-ai/deepseek-llm-7b-instruct").
Running on Google Colab with an A100 GPU.

In [40]:
from llama_cpp import Llama

# ✅ Update this path with the downloaded GGUF model
model_path = "~/models/mistral-7b.Q4_K_M.gguf"

# ✅ Load model with optimized CPU settings
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=6)  # 6 threads for your 6-core CPU

# ✅ Test inference
query = "What is the capital of France?"
response = llm(f"Answer the following question:\n{query}")
print(response["choices"][0]["text"])


ValueError: Model path does not exist: ~/models/mistral-7b.Q4_K_M.gguf

In [51]:
from huggingface_hub import hf_hub_download

# Replace with the exact filename from the GGUF model page
model_path = hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf")

print("Model path:", model_path)


Model path: /Users/gauravbindra/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf


In [55]:
from llama_cpp import Llama

# ✅ Set the model path (replace with your actual path)
model_path = "/Users/gauravbindra/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf"

# ✅ Load model with optimized CPU settings
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=6)  # Use 6 threads for your 6-core CPU

# ✅ Test inference
query = "Who is the most famous Sikh?"
response = llm(f"Answer the following question:\n{query}", max_tokens=256)

# ✅ Print the response
print(response["choices"][0]["text"])


llama_model_load_from_file_impl: using device Metal (AMD Radeon Pro 5300M) - 361 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/gauravbindra/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader


Answer: The most famous Sikh is the 10th Sikh Guru, Guru Gobind Singh Ji.


In [None]:
from langchain.vectorstores import FAISS

# ✅ Load FAISS Index
vector_store = FAISS.load_local("./faiss_index", embedding_model, allow_dangerous_deserialization=True)

# ✅ Query FAISS
query = ""
retrieved_docs = vector_store.similarity_search(query, k=3)

# ✅ Format Retrieved Documents for LLM
context = "\n\n".join([doc.page_content for doc in retrieved_docs])
prompt = f"""
You are a helpful assistant. Answer the question using the provided information.

Context:
{context}

Question: {query}
Answer:
"""

# ✅ Generate Answer Using Mistral-7B-GGUF
response = llm(prompt)
print("AI Response:", response["choices"][0]["text"])


Llama.generate: 1 prefix-match hit, remaining 479 prompt tokens to eval
llama_perf_context_print:        load time =    7165.03 ms
llama_perf_context_print: prompt eval time =   41871.71 ms /   479 tokens (   87.41 ms per token,    11.44 tokens per second)
llama_perf_context_print:        eval time =    4573.44 ms /    15 runs   (  304.90 ms per token,     3.28 tokens per second)
llama_perf_context_print:       total time =   46475.62 ms /   494 tokens


AI Response: - Amit Tandon: Family Tandoncies (India)
-


In [48]:
# ✅ Query FAISS
query = "Give me a summary of the latest trends around movies from India"
retrieved_docs = vector_store.similarity_search(query, k=2)

# ✅ Format Retrieved Documents for LLM
context = "\n\n".join([doc.page_content for doc in retrieved_docs])
prompt = f"""
You are a helpful assistant. Answer the question using the provided information.

Context:
{context}

Question: {query}
Answer:
"""

# ✅ Generate Answer Using Mistral-7B-GGUF
response = llm(prompt, max_tokens=256) 
print("AI Response:", response["choices"][0]["text"])

Llama.generate: 24 prefix-match hit, remaining 399 prompt tokens to eval
llama_perf_context_print:        load time =    7165.03 ms
llama_perf_context_print: prompt eval time =   30678.29 ms /   399 tokens (   76.89 ms per token,    13.01 tokens per second)
llama_perf_context_print:        eval time =  131967.42 ms /   255 runs   (  517.52 ms per token,     1.93 tokens per second)
llama_perf_context_print:       total time =  163120.49 ms /   654 tokens


AI Response: The latest trend around movies from India is the growing popularity of short films and anthology movies. In recent years, India has seen a surge in the release of such films, with multiple directors coming together to create a single project. "Lust Stories," for example, features four short films by four of India's biggest directors exploring love, sex and relationships in modern India. Another example of this trend is the 2013 film "Bombay Talkies," which was also an anthology of four short films.

Another trend in Indian cinema is the growing use of technology in filmmaking. With advancements in technology, Indian filmmakers are now able to create high-quality visual effects and sound design, which is leading to more immersive and engaging moviegoing experiences. This trend is evident in films such as "Dune" and "The Lion King," both of which were filmed in part in India and feature cutting-edge technology.

Overall, the Indian film industry is constantly evolving and ad

In [56]:
# Install required packages (if not already installed)
# !pip install langchain llama-cpp-python faiss-cpu

from langchain.llms import LlamaCpp
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [58]:
# 1. Load the local LLM (Mistral-7B-Instruct) for inference.
model_path = "/Users/gauravbindra/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
# Update this to your actual model file path
llm = LlamaCpp(model_path=model_path, n_ctx=2048, max_tokens=512, temperature=0.7)

llama_model_load_from_file_impl: using device Metal (AMD Radeon Pro 5300M) - 361 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/gauravbindra/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader

In [60]:
# 2. Prepare a vector database (FAISS in this case) with some documents.
# For demonstration, we'll use a small list of texts. In a real scenario, these would be your knowledge base docs.
# documents = [
#     "LangGraph is a tool for building LLM applications with event streams. The function `astream_events` returns a stream of events including LLM calls.",
#     "Estella Leopold attended the University of Wisconsin from September 1946 to June 1948, and later studied at Yale University in the fall of 1954.",
#     "The XYZ dishwasher grinding noise is often caused by a worn-out impeller or motor issue. Users should check the pump and motor assembly.",
#     # ... (more documents)
# ]

In [61]:
# Create embeddings and index the documents in FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(documents, embedding=embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 4})

AttributeError: 'Document' object has no attribute 'replace'

In [None]:

# Create embeddings and index the documents in FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(documents, embedding=embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 4})  # will return top-4 relevant docs for any query

# 3. Define a prompt template and LLMChain for generating the step-back question.
step_back_prompt = PromptTemplate(
    input_variables=["original_question"],
    template=(
        "You are an expert at reformulating questions.\n"
        "User's question: \"{original_question}\"\n"
        "Step back and provide a more general question that captures the essence of the user's query."
    )
)
step_back_chain = LLMChain(llm=llm, prompt=step_back_prompt)

# 4. Define a function that uses step-back prompting in the QA workflow.
def answer_with_step_back(question: str) -> str:
    # Step 4a: Generate a broader, step-back question from the original question.
    step_back_question = step_back_chain.run(original_question=question)
    print(f"Step-back question: {step_back_question.strip()}")  # debug print
    
    # Step 4b: Retrieve documents using both the original and the step-back question.
    docs_original = retriever.get_relevant_documents(question)
    docs_step_back = retriever.get_relevant_documents(step_back_question)
    # Combine and deduplicate docs (by content for simplicity)
    docs_content = {doc.page_content: doc for doc in (docs_original + docs_step_back)}
    combined_docs = list(docs_content.values())
    
    # Step 4c: Construct the final prompt with retrieved context for the original question.
    context_texts = "\n".join([doc.page_content for doc in combined_docs])
    final_prompt = (
        "Use the following context to answer the question.\n"
        "Context:\n"
        f"{context_texts}\n"
        f"Question: {question}\n"
        "Answer:"
    )
    # Step 4d: Use the LLM to get the final answer based on the context.
    answer = llm(final_prompt)
    return answer.strip()

# 5. Example usage:
user_question = "Why does my LangGraph agent `astream_events` return a long trace instead of the expected output?"
response = answer_with_step_back(user_question)
print("Answer:", response)
