In [1]:
from langchain_community.document_loaders import CSVLoader
import os
import ast

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def dataset_loader(file_name):

    path = f"../pdfs/{file_name}"

    if not os.path.exists(path):
        raise FileNotFoundError(f"{path}File not found!") 
    
    else:
        if str(file_name).endswith('.csv'):
            loader = CSVLoader(path)
            df = loader.load()
    return df

In [3]:
data = dataset_loader('Cleaned_Indian_Food_Dataset.csv')
len(data)

5938

In [4]:
data[0]

Document(metadata={'source': '../pdfs/Cleaned_Indian_Food_Dataset.csv', 'row': 0}, page_content='TranslatedRecipeName: Masala Karela Recipe\nTranslatedIngredients: 1 tablespoon Red Chilli powder,3 tablespoon Gram flour (besan),2 teaspoons Cumin seeds (Jeera),1 tablespoon Coriander Powder (Dhania),2 teaspoons Turmeric powder (Haldi),Salt - to taste,1 tablespoon Amchur (Dry Mango Powder),6 Karela (Bitter Gourd/ Pavakkai) - deseeded,Sunflower Oil - as required,1 Onion - thinly sliced\nTotalTimeInMins: 45\nCuisine: Indian\nTranslatedInstructions: To begin making the Masala Karela Recipe,de-seed the karela and slice.\nDo not remove the skin as the skin has all the nutrients.\nAdd the karela to the pressure cooker with 3 tablespoon of water, salt and turmeric powder and pressure cook for three whistles.\nRelease the pressure immediately and open the lids.\nKeep aside.Heat oil in a heavy bottomed pan or a kadhai.\nAdd cumin seeds and let it sizzle.Once the cumin seeds have sizzled, add onions

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size= 1000,chunk_overlap = 200,separators=['\n\n','\n',' ',''])

In [6]:
chunks = text_splitter.split_documents(data)
len(chunks)

17551

In [7]:
from sentence_transformers import SentenceTransformer

class EmbeddingManager():
    def __init__(self,model_name = "all-MiniLM-L6-v2"):
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        self.model = SentenceTransformer(self.model_name)
        print("Model Dimension is - ",self.model.get_sentence_embedding_dimension())
        return self.model

    def convert_text_into_embeddings(self,data):
        embeddings = self.model.encode(data,show_progress_bar=True)
        print(f"Total {len(embeddings)} is generated..")
        return embeddings

        

In [8]:
embedding_manager = EmbeddingManager()

Model Dimension is -  384


In [9]:
texts = [chunk.page_content for chunk in chunks]
embeddings = embedding_manager.convert_text_into_embeddings(texts)
len(embeddings)

Batches: 100%|██████████| 549/549 [01:41<00:00,  5.43it/s]


Total 17551 is generated..


17551

In [10]:
len(embeddings)

17551

In [11]:
import chromadb
import uuid

In [12]:
class VectorStore():
    def __init__(self,collection_name = 'indian_recipes',persistent_directory='../data'):
        self.collection_name = collection_name
        self.collection = None
        self.client = None
        self.persistent_directory = persistent_directory
        self._initalize_store()

    def _initalize_store(self):
        try :
            os.makedirs(self.persistent_directory,exist_ok=True)
            self.client = chromadb.PersistentClient(self.persistent_directory)
            print("Client initialised successfully")
            self.collection = self.client.get_or_create_collection(self.collection_name)
            print(f"{self.collection_name} collection initialised successfully")
        except Exception as e:
            raise f"Error {e}"
        
    def add_documents(self,documents,embeddings):
        ids = []
        documents_texts = []
        metadatas = []
        embedding_list = []

        if len(documents) != len(embeddings):
            return f"Length of documents{len(documents)} and embeddings{len(embeddings)} must me same"
        
        else:
            for i,(doc,embedding) in enumerate(zip(documents,embeddings)):
                id = f"_{uuid.uuid4().hex[:8]}_{i}"
                ids.append(id)

                metadata = dict(doc.metadata)
                metadata['index_id'] = metadata['row']
                metadata['content_length'] = len(doc.page_content)
                metadatas.append(metadata)

                documents_texts.append(doc.page_content)

                embedding_list.append(embedding.tolist())
            
            try :
                self.collection.add(
                    ids=ids,
                    embeddings=embeddings,
                    metadatas=metadatas,
                    documents=documents_texts,
                )

                print("Documets uploaded succesfully to vector database")
            
            except Exception as e:
                raise print(f"Error adding documents in vector store : {e}")

In [13]:
vectore_database = VectorStore()

Client initialised successfully
indian_recipes collection initialised successfully


In [14]:
# vectore_database.add_documents(chunks,embeddings) ---------> For whole data
batch_size = 5000
for i in range(0,len(chunks),batch_size):
    batch_chucks = chunks[i:i+batch_size]
    batch_embedding = embeddings[i:i+batch_size]
    vectore_database.add_documents(batch_chucks,batch_embedding)
    print(f"Total added -- {i} remaining -- {len(chunks)-i}") 

Documets uploaded succesfully to vector database
Total added -- 0 remaining -- 17551
Documets uploaded succesfully to vector database
Total added -- 5000 remaining -- 12551
Documets uploaded succesfully to vector database
Total added -- 10000 remaining -- 7551
Documets uploaded succesfully to vector database
Total added -- 15000 remaining -- 2551


In [15]:
batch_chucks = chunks[i:]
vectore_database.add_documents(batch_chucks,batch_embedding)

Documets uploaded succesfully to vector database


In [16]:
class RAGRetriever():
    def __init__(self,vectore_database,embedding_manager):
        self.vectore_database = vectore_database
        self.embedding_manager = embedding_manager

    def retriever(self,query,top_k=5,threshold=0.0):
        query = self.embedding_manager.convert_text_into_embeddings(query)
        
        try :
            result = self.vectore_database.collection.query(
                query_embeddings = [query.tolist( )],
                n_results = top_k
            )

            retreived_docs = []

            if result['documents'] and result['documents'][0]:
                ids = result['ids'][0]
                documents = result['documents'][0]
                metadatas = result['metadatas'][0]
                distances = result['distances'][0]

                for i,(id,doc,metadata,distance) in enumerate(zip(ids,documents,metadatas,distances)):
                    similarity_score = 1 - distance
                    if similarity_score >= threshold:
                        retreived_docs.append({
                            'id' : id,
                            'content' : doc,
                            'metadata' : metadata,
                            'distance' : distance,
                            'rank' : i + 1,
                            'similarity_score': similarity_score
                        })
                print('Similar content successfully extracted')
                return retreived_docs

            else:
                print("No content found similar to query")
        

        except Exception as e:
            raise f"Cannot get an answer {e}"

In [17]:
RAG = RAGRetriever(vectore_database,embedding_manager)

In [18]:
ans = RAG.retriever('How to make Kaju katri')
ans

Batches: 100%|██████████| 1/1 [00:00<00:00,  9.30it/s]

Total 384 is generated..
Similar content successfully extracted





[{'id': '_9595118c_4866',
  'content': 'TranslatedRecipeName: Chocolate Kaju Katli Recipe - Chocolate Kaju Barfi\nTranslatedIngredients: 100 ml Water,1 cup Milk chocolate - or dark chocolate,1 cup Sugar,2 1/4 cups Cashew nuts\nTotalTimeInMins: 50\nCuisine: Indian\nTranslatedInstructions: To begin making the Chocolate Kaju Katli Recipe, first of all grind the cashew nuts in a blender to make a fine powder.Take a heavy bottomed pan, add the sugar and water to it.\nPlace it on a medium heat and bring it to boil.\nOnce the sugar has dissolved completely, turn the heat down and allow the mixture to boil for a few minutes till it reaches a one-string consistency.To test if it has reached the right consistency, transfer a few drops of the syrup on a clean plate, allow it to cool for 15-20 seconds and test it for the formation of a single thread between your thumb and index finger.If the syrup has reached single thread consistency, transfer the ground cashew nut powder into the syrup and stir 

In [None]:
Groq_key = "<API KEY>"

In [20]:
def rag_simple(retriever,query,llm):
    result = retriever.retriever(query,5)

    context = "\n\n".join([doc['content'] for doc in result]) if result else ""
    prompt = """Using below given context give me answer based on that
        context : {context}
        query : {query}
        answer : ..."""
    
    answer = llm.invoke([prompt.format(context = context,query = query)])
    return answer.content

In [21]:
from langchain_groq import ChatGroq

llm = ChatGroq(api_key=Groq_key,model="llama-3.3-70b-versatile",temperature=0.1,max_tokens=1024)

ans = rag_simple(RAG,"How to make Kaju Katri",llm)
print(ans)

Batches: 100%|██████████| 1/1 [00:00<00:00, 31.86it/s]


Total 384 is generated..
Similar content successfully extracted
To make Kaju Katli, follow these steps:

1. Grind 2 1/4 cups of cashew nuts in a blender to make a fine powder.
2. Take a heavy-bottomed pan, add 1 cup of sugar and 100 ml of water to it, and place it on medium heat.
3. Bring the mixture to a boil, then turn the heat down and allow it to boil for a few minutes until it reaches a one-string consistency.
4. To test the consistency, transfer a few drops of the syrup to a clean plate, let it cool for 15-20 seconds, and check for the formation of a single thread between your thumb and index finger.
5. Once the syrup has reached the right consistency, transfer the ground cashew nut powder into the syrup and stir continuously for 4-5 minutes.
6. Remove from heat and let it cool.
7. Once cooled, add 1 cup of milk chocolate or dark chocolate and mix well.
8. Knead the mixture into a dough-like consistency.
9. Grease your hands and a rolling pin, then roll out the dough to the desir