# Overview:
Develop a tool that assists users in exploring Indian cuisine, offering recipes, ingredient substitutions, and flavor analysis. Utilizing the OpenAI API for language understanding and culinary knowledge, and LangChain for handling user interactions and data management, this project aims to deepen the understanding and appreciation of the diverse culinary landscape of India.

In [None]:
! pip install -r requirements.txt



In [None]:
import os
import openai
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Set the OpenAI API key
os.environ['OPENAI_API_KEY'] = 'OPENAI_API_KEY'

In [None]:
import os
import datetime
import requests
import pandas as pd
from PyPDF2 import PdfReader
from IPython.display import display, Markdown
from youtube_transcript_api import YouTubeTranscriptApi

# Langchain imports
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/Dataset/IndianFoodDatasetCSV.csv')

In [None]:
df1.shape

(6871, 15)

In [None]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6871 entries, 0 to 6870
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Srno                    6871 non-null   int64 
 1   RecipeName              6871 non-null   object
 2   TranslatedRecipeName    6871 non-null   object
 3   Ingredients             6865 non-null   object
 4   TranslatedIngredients   6865 non-null   object
 5   PrepTimeInMins          6871 non-null   int64 
 6   CookTimeInMins          6871 non-null   int64 
 7   TotalTimeInMins         6871 non-null   int64 
 8   Servings                6871 non-null   int64 
 9   Cuisine                 6871 non-null   object
 10  Course                  6871 non-null   object
 11  Diet                    6871 non-null   object
 12  Instructions            6871 non-null   object
 13  TranslatedInstructions  6871 non-null   object
 14  URL                     6871 non-null   object
dtypes: i

In [None]:
Indian_Food = df1.copy()

In [None]:
Indian_Food.head()

Unnamed: 0,Srno,RecipeName,TranslatedRecipeName,Ingredients,TranslatedIngredients,PrepTimeInMins,CookTimeInMins,TotalTimeInMins,Servings,Cuisine,Course,Diet,Instructions,TranslatedInstructions,URL
0,1,Masala Karela Recipe,Masala Karela Recipe,"6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...","6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...",15,30,45,6,Indian,Side Dish,Diabetic Friendly,"To begin making the Masala Karela Recipe,de-se...","To begin making the Masala Karela Recipe,de-se...",https://www.archanaskitchen.com/masala-karela-...
1,2,टमाटर पुलियोगरे रेसिपी - Spicy Tomato Rice (Re...,Spicy Tomato Rice (Recipe),"2-1/2 कप चावल - पका ले,3 टमाटर,3 छोटा चमच्च बी...","2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...",5,10,15,3,South Indian Recipes,Main Course,Vegetarian,टमाटर पुलियोगरे बनाने के लिए सबसे पहले टमाटर क...,"To make tomato puliogere, first cut the tomato...",http://www.archanaskitchen.com/spicy-tomato-ri...
2,3,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...","1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...",20,30,50,4,South Indian Recipes,South Indian Breakfast,High Protein Vegetarian,"To begin making the Ragi Vermicelli Recipe, fi...","To begin making the Ragi Vermicelli Recipe, fi...",http://www.archanaskitchen.com/ragi-vermicelli...
3,4,Gongura Chicken Curry Recipe - Andhra Style Go...,Gongura Chicken Curry Recipe - Andhra Style Go...,"500 grams Chicken,2 Onion - chopped,1 Tomato -...","500 grams Chicken,2 Onion - chopped,1 Tomato -...",15,30,45,4,Andhra,Lunch,Non Vegeterian,To begin making Gongura Chicken Curry Recipe f...,To begin making Gongura Chicken Curry Recipe f...,http://www.archanaskitchen.com/gongura-chicken...
4,5,आंध्रा स्टाइल आलम पचड़ी रेसिपी - Adrak Chutney ...,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"1 बड़ा चमच्च चना दाल,1 बड़ा चमच्च सफ़ेद उरद दाल,2...","1 tablespoon chana dal, 1 tablespoon white ura...",10,20,30,4,Andhra,South Indian Breakfast,Vegetarian,आंध्रा स्टाइल आलम पचड़ी बनाने के लिए सबसे पहले ...,"To make Andhra Style Alam Pachadi, first heat ...",https://www.archanaskitchen.com/andhra-style-a...


In [None]:
class WebContentLoader(BaseLoader):
    def __init__(self, urls):
        self.urls = urls

    def lazy_load(self):
        for url in self.urls:
            response = requests.get(url)
            yield Document(page_content=response.text, metadata={"source": url})

In [None]:
class PDFLoader(BaseLoader):
    def __init__(self, file_paths):
        self.file_paths = file_paths

    def lazy_load(self):
        for file_path in self.file_paths:
            reader = PdfReader(file_path)
            for page in reader.pages:
                yield Document(page_content=page.extract_text(), metadata={"source": file_path})

In [None]:
class TXTLoader:
    def __init__(self, file_paths):
        self.file_paths = file_paths

    def load(self):
        for file_path in self.file_paths:
            if os.path.exists(file_path):
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read()
                    yield Document(page_content=content, metadata={"source": file_path})
            else:
                print(f"File not found: {file_path}")

In [None]:
# List of YouTube video IDs
video_ids = [
    "fAtNuppiof0", "METruOPZ-2Y", "LjuHYk-xvUY", "MQSeFSR6SPk", "cspsBS1xRFI",
    "Y-4FtucSH9w", "I8MXDcsYuHI", "w19YWds-HVc", "-UhQ6U_Z3Hc", "LFxzrFoyqpI",
    "EbnSMkLNinE", "aUvxbItJQ8w", "VshF56wWzmU", "B1O8bXHksiI", "gNhcGal2U4U",
    "N0ADs03e5sM", "r5uzurmhm0k", "fdSgyP-3CkA", "4nCYRbOIQVg", "Rg9TCHgaHU0",
    "5RxBs3Kst3Y", "PHeC6LE_0h4", "dC-kf3eICVo", "RRNrG_5UT9o", "xjkfsp8Vpto",
    "Rjjc4jq3H3s", "5ykr5cF_hbA", "yyxYS7f1ssM", "l3NFdmQeAgw", "AMjrVlFd0E4",
    "BENnIgMT0dE", "YRhCPx74wJ4", "v29ygWq36T0", "uh7wIERAHYc", "gnIhmzP3Ay8",
    "15-uMazGRO4", "BJDv5TAKnFs", "xyisQtRDa5E", "Ys1WdgS3YVY", "TPOXrJKiSeU",
    "bLi5nLUWGF8", "MyWSGjgftNs", "wRZ4HnKg3lY", "Sq4KPewONpA", "aV_M3EvgOJY",
    "2IT1Osvuydw", "mIcNBj_-qo4", "1BgVCXxPjIc", "3Im2GBVme8U", "vYYgLwGg97Q"
]


all_transcripts = ""

for video_id in video_ids:
    try:
        # Fetching the transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id)

        # Concatenating all text parts from the transcript
        transcript_text = '\n'.join([t['text'] for t in transcript])

        # Adding this transcript to the overall transcripts
        all_transcripts += f"Transcript for Video ID {video_id}:\n{transcript_text}\n\n"

        print(f"Transcript for video {video_id} downloaded successfully.")
    except Exception as e:
        print(f"An error occurred while downloading the transcript for video {video_id}: {e}")

# Saving all transcripts to a single file
filename = 'combined_transcripts.txt'
with open(filename, 'w', encoding='utf-8') as file:
    file.write(all_transcripts)

print("All transcripts downloaded and combined successfully.")

In [None]:
csv_files = ['/content/drive/MyDrive/Dataset/IndianFoodDatasetCSV.csv']

# Existing CSV loaders
csv_loaders = [CSVLoader(file_path=file) for file in csv_files]

# Web content loaders
web_urls = ["https://en.wikipedia.org/wiki/List_of_Indian_dishes" ] # Replace with actual URLs
web_loader = WebContentLoader(urls=web_urls)

# PDF loaders
pdf_files = ['/content/drive/MyDrive/Dataset/Recipe-Book.pdf'] # Add your PDF file paths
pdf_loader = PDFLoader(file_paths=pdf_files)

#Txt Loader
txt_files = ['/content/combined_transcripts.txt']  # Add your text file paths
txt_loader = TXTLoader(file_paths=txt_files)
txt_data = txt_loader.load()

In [None]:
# Combine all loaders
all_loaders = csv_loaders + [web_loader] + [pdf_loader] + [txt_loader]

# Create an index from all loaders
index = VectorstoreIndexCreator().from_loaders(all_loaders)

  warn_deprecated(


In [None]:
def query_and_display(query, index, model, temperature=0, model_name='gpt-3.5-turbo-instruct'):
    # Set up the model
    llm_model = model(temperature=temperature, model=model_name)

    # Query the index with the model
    response = index.query(query, llm=llm_model)

    # Display the response as markdown
    display(Markdown(response))

In [None]:
# Example usage
query = input("Ask anything about Indian Cuisines: ")
query_and_display(query, index, OpenAI)

Ask anything about Indian Cuisines: Himachali Mutton Rara what is the cooking time ?


 The cooking time for Himachali Mutton Rara is approximately 1 hour and 30 minutes.