In [1]:
# Import necessary libraries
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEndpoint
from langchain_groq import ChatGroq
from langchain.prompts.prompt import PromptTemplate

import pandas as pd

import warnings

# Load environment variables
load_dotenv()

True

In [2]:
warnings.filterwarnings("ignore")

In [None]:
df_recipes_data_full = pd.read_csv('../data/recipes_data.csv')
df_recipes_data_sample = df_recipes_data_full.sample(n=2000, random_state=42)
df_recipes_data_sample.to_csv('../data/recipes_data_sample.csv')

In [None]:
# # Import CSVLoader
# from langchain_community.document_loaders import CSVLoader

# # Example function to load PDF

# def load_csv(csv_path):
#     """
#     Load text data from CSV file.
#     """
#     loader = CSVLoader(file_path=csv_path, encoding='utf-8')
#     documents = loader.load()
#     return documents

In [4]:
from langchain_community.document_loaders import CSVLoader

def load_multiple_csv(csv_paths):
    """
    Load text data from multiple CSV files.
    
    Args:
        csv_paths: List of file paths or single file path string
    
    Returns:
        Combined list of documents from all CSV files
    """
    # Falls nur ein String übergeben wird, in Liste umwandeln
    if isinstance(csv_paths, str):
        csv_paths = [csv_paths]
    
    all_documents = []
    
    for csv_path in csv_paths:
        try:
            loader = CSVLoader(file_path=csv_path, encoding='utf-8')
            documents = loader.load()
            all_documents.extend(documents)
            print(f"Loaded {len(documents)} documents from {csv_path}")
        except Exception as e:
            print(f"Error loading {csv_path}: {e}")
    
    print(f"\nTotal documents loaded: {len(all_documents)}")
    return all_documents

In [5]:
# Load csv files
doc_recipes = load_multiple_csv(csv_paths = [
    "../data/recipes.csv",
    "../data/recipes_data_sample.csv"
])

Loaded 1090 documents from ../data/recipes.csv
Loaded 2000 documents from ../data/recipes_data_sample.csv

Total documents loaded: 3090


In [7]:
# Import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Example chunking function
def chunk_documents(documents, chunk_size=500, chunk_overlap=100):
    """
    Splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    
    # Just to add id for etch chunks to map it later 
    for i, chunk in enumerate(chunks):
         chunk.metadata.update({
        "id": f"chunk_{i}",
    })
    
    return chunks

In [8]:
# Execute your chunking function and display results here
recipes_chunks = chunk_documents(doc_recipes)
print(f"number of chunks created: {len(recipes_chunks)}","\n",f"Type of the chunks : {type(recipes_chunks)}","\n\n" ,recipes_chunks)

number of chunks created: 11944 
 Type of the chunks : <class 'list'> 



In [9]:
# Import libraries
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores.faiss import DistanceStrategy

# Example function for embeddings and storage
def embed_and_store(chunks, db_name):
    """
    This function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a VectorStore called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding,
        distance_strategy=DistanceStrategy.COSINE  # or DistanceStrategy.DOT or DistanceStrategy.L2 
        
    )
    # save VectorStore locally
    vectorstore.save_local(f"../vector_databases/vector_db_{db_name}")
    return vectorstore

In [26]:
# Generate embeddings and save them locally
all_embedding=embed_and_store(chunks=recipes_chunks, db_name="Recipes_plus_sample")

In [10]:
# Implement retrieval logic from your FAISS database
def retrieve_from_vector_db(vector_db_path):
    """
    this function splits out a retriever object from a local VectorStore
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2',
        encode_kwargs={"normalize_embeddings": True}
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True,
        distance_strategy=DistanceStrategy.COSINE  # or DistanceStrategy.DOT or DistanceStrategy.L2 
    )
    retriever = react_vectorstore.as_retriever()
    return retriever ,react_vectorstore

In [11]:
# Load the retriever and index
react_retriever,react_vectorstore = retrieve_from_vector_db("../vector_databases/vector_db_Recipes_plus_sample")
type(react_retriever),type(react_vectorstore)

(langchain_core.vectorstores.base.VectorStoreRetriever,
 langchain_community.vectorstores.faiss.FAISS)

In [11]:
# Test your retrieval system with queries
query="""
Retrieve suitable recipes?
"""
react_retriever.get_relevant_documents(query,k=7)

[Document(id='f4a3062b-221c-477a-a16f-23f1e0b0f21b', metadata={'source': '../data/recipes.csv', 'row': 865, 'id': 'chunk_4093'}, page_content='cumin, ½ teaspoon ground coriander, ¼ teaspoon ground allspice, 3 tablespoons tomato paste, 4 cups low-sodium beef broth, 1 tablespoon molasses, 2  bay leaves, 1 cup dried red lentils, ½ cup dried apricots, chopped, ½  lemon, juiced'),
 Document(id='dc6eae73-7812-4736-a02b-35c8499623a5', metadata={'source': '../data/recipes.csv', 'row': 903, 'id': 'chunk_4304'}, page_content='cook_time: 1 hrs 15 mins\ntotal_time: 1 hrs 55 mins\nservings: 12\nyield: 12 servings\ningredients: 3 cups white sugar, 1 cup butter, softened, 6  eggs, beaten, 3 cups all-purpose flour, ¼ teaspoon baking soda, ½ teaspoon salt, 1 cup sour cream, ¼ cup apricot brandy, 1 teaspoon lemon zest, 1 cup white sugar, ½ cup peach schnapps, 1 cup water, 1 teaspoon lemon zest, 1 cup apricot preserves, ½ cup apricot brandy, 1 teaspoon lemon zest, ½ cup almonds'),
 Document(id='ae6abef8-

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain import hub
from langchain_ollama import OllamaLLM # Ollama LLM

In [None]:
# llm = ChatGroq(
#     model="llama-3.1-8b-instant", #"llama3-8b-8192",
#     temperature=0,
#     max_tokens=None,
#     timeout=None,
#     max_retries=2
# )

In [15]:
# Initialize Ollama LLM
# Make sure Ollama is running: ollama serve
llm = OllamaLLM(
    model="llama3.2",  # Change to your preferred model
    temperature=0.7,    # Creativity level (0-1)
    base_url="http://localhost:11434/"  # Default Ollama URL
)


In [16]:
# Test the LLM connection
test_response = llm.invoke("Hello! Say 'Connection successful!' if you can read this.")
print(test_response)

ConnectError: [WinError 10061] Es konnte keine Verbindung hergestellt werden, da der Zielcomputer die Verbindung verweigerte

In [14]:
# Write a function to create retrieval and document processing chains
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain

react_retrieval_chain = connect_chains(react_retriever)

In [15]:
# Invoke your chain with a sample question
output = react_retrieval_chain.invoke(
    # {"input": "what is a LLM?"}
    # {"input": "how many paracetamol can I take in one day?"}
    {"input": "give me the three best italian recipes"}
)
type(output) , output.keys() 
print(output['answer'])

Based on the provided context, here are three Italian recipes that are highly rated:

1. **Risotto with Fresh Figs and Prosciutto** (recipe ID: 234)
   - Prep Time: 10 mins
   - Cook Time: 20 mins
   - Total Time: 30 mins
   - Servings: 4
   - Yield: 4 servings

This recipe combines the creamy texture of risotto with the sweetness of fresh figs and the savory flavor of prosciutto.

2. **Fresh Fig and Prosciutto Pasta Sauce** (recipe ID: 238)
   - Prep Time: 10 mins
   - Cook Time: 10 mins
   - Total Time: 20 mins
   - Servings: 4
   - Yield: 4 servings

This recipe is a simple yet elegant pasta sauce made with fresh figs, prosciutto, and a hint of lemon zest.

3. **Incredibly Delicious Italian Cream Cake** (recipe ID: 951)
   - Prep Time: 30 mins
   - Cook Time: 30 mins
   - Total Time: 1 hour 25 mins
   - Servings: 12
   - Yield: 1 3-layer cake

This recipe is a decadent Italian cream cake made with buttermilk, butter, and flaked coconut, perfect for special occasions.

Note: These re

In [16]:
# Define your interactive chat querying function
def chat_with_rag(chain):
    """
    Interactive function to chat with the RAG system.
    """
    print("Welcome to the RAG Chat! Type 'exit' to quit.\n")
    while True:
        user_input = input("🧑 You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Exiting the chat. Goodbye!")
            break
        try:
            result = chain.invoke({"input": user_input})
            print(f"🤖 RAG Answer: {result['answer']}\n")
        except Exception as e:
            print(f" Error: {e}\n")

In [17]:
# Run and test your interactive chat system
chat_with_rag(react_retrieval_chain)

Welcome to the RAG Chat! Type 'exit' to quit.

👋 Exiting the chat. Goodbye!
