In [1]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np  
data=pd.read_csv('IndianFoodDatasetCSV.csv')

data['Course'].unique()

array(['Side Dish', 'Main Course', 'South Indian Breakfast', 'Lunch',
       'Snack', 'High Protein Vegetarian', 'Dinner', 'Appetizer',
       'Indian Breakfast', 'Dessert', 'North Indian Breakfast',
       'One Pot Dish', 'World Breakfast', 'Non Vegeterian', 'Vegetarian',
       'Eggetarian', 'No Onion No Garlic (Sattvic)', 'Brunch', 'Vegan',
       'Sugar Free Diet'], dtype=object)

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
data.dropna(inplace=True)

In [3]:
def preprocess_keywords(keywords):
    keywords = keywords.lower()
    keywords = keywords.replace(",", "")
    return keywords  # Return a string, not a list


In [4]:
def recipe_recommender(keywords):
    preprocessed_keywords = preprocess_keywords(keywords)
    all_text = data['TranslatedIngredients'].tolist()
    lowered_text = [text.lower() for text in all_text]
    lowered_text.append(preprocessed_keywords)  # Append the preprocessed keywords string
    vectorizer = TfidfVectorizer()
    matrix = vectorizer.fit_transform(lowered_text)
    user_vector = matrix[-1]
    cosine_similarities = linear_kernel(user_vector, matrix[:-1])  # Compare with all recipes except the last one (user input)
    food_ind = similar_recipes(cosine_similarities[0])
    return data['TranslatedRecipeName'].iloc[food_ind]

In [5]:
def similar_recipes(similarities):
    sim_score = list(enumerate(similarities))
    sim_score = sorted(sim_score, key=lambda x:x[1], reverse=True)
    sim_score = sim_score[:30]  # Get top 30 similar recipes
    food_ind = [i[0] for i in sim_score]
    return food_ind


In [6]:
keywords = input("Enter food item")
recommendations = recipe_recommender(keywords)

print("Recommended recipes:")
print(recommendations)

Recommended recipes:
2057                           Chicken Malai Kabab Recipe
1692                 Chicken In Tomato Onion Gravy Recipe
2175                          Chicken And Egg Soup Recipe
2987              Slow Cooker Mexican Chicken Stew Recipe
5263                         Peanut Butter Chicken Recipe
5321          Murgh Masala Recipe - Chicken Tangdi Masala
5448                          Mexican Chicken Soup Recipe
6427                          French Onion Chicken Recipe
457     Ginger Sesame Chicken With Bok Choy And Mushro...
253                   Chettinad Dry Pepper Chicken Recipe
4526                       Coriander Chicken Roast Recipe
6639                     Nando's Peri Peri Chicken Recipe
3327              Spicy Moroccan Rice With Chicken Recipe
5640                         Grilled Chicken Salad Recipe
6079                        Dragon Chicken Sizzler Recipe
3074                    Japanese Chicken Udon Soup Recipe
5996             Spicy Chicken Curry Recipe In Naga

In [8]:
from sklearn.preprocessing import LabelEncoder
Lableencoder = LabelEncoder()
le=LabelEncoder()
data['Course_encoded'] = Lableencoder.fit_transform(data['Course'])
data['Cuisine_encoded']= le.fit_transform(data['Cuisine'])

In [9]:
mapping = dict(zip(Lableencoder.classes_, range(len(Lableencoder.classes_))))
print(mapping)

{'Appetizer': 0, 'Brunch': 1, 'Dessert': 2, 'Dinner': 3, 'Eggetarian': 4, 'High Protein Vegetarian': 5, 'Indian Breakfast': 6, 'Lunch': 7, 'Main Course': 8, 'No Onion No Garlic (Sattvic)': 9, 'Non Vegeterian': 10, 'North Indian Breakfast': 11, 'One Pot Dish': 12, 'Side Dish': 13, 'Snack': 14, 'South Indian Breakfast': 15, 'Sugar Free Diet': 16, 'Vegan': 17, 'Vegetarian': 18, 'World Breakfast': 19}


In [10]:
mapping1 = dict(zip(le.classes_, range(len(le.classes_))))
print(mapping1)


{'Afghan': 0, 'African': 1, 'American': 2, 'Andhra': 3, 'Appetizer': 4, 'Arab': 5, 'Asian': 6, 'Assamese': 7, 'Awadhi': 8, 'Bengali Recipes': 9, 'Bihari': 10, 'British': 11, 'Brunch': 12, 'Burmese': 13, 'Cantonese': 14, 'Caribbean': 15, 'Chettinad': 16, 'Chinese': 17, 'Coastal Karnataka': 18, 'Continental': 19, 'Coorg': 20, 'Dessert': 21, 'Dinner': 22, 'European': 23, 'French': 24, 'Fusion': 25, 'Goan Recipes': 26, 'Greek': 27, 'Gujarati Recipes\ufeff': 28, 'Haryana': 29, 'Himachal': 30, 'Hunan': 31, 'Hyderabadi': 32, 'Indian': 33, 'Indo Chinese': 34, 'Indonesian': 35, 'Italian Recipes': 36, 'Japanese': 37, 'Jewish': 38, 'Jharkhand': 39, 'Karnataka': 40, 'Kashmiri': 41, 'Kerala Recipes': 42, 'Kongunadu': 43, 'Konkan': 44, 'Korean': 45, 'Lucknowi': 46, 'Lunch': 47, 'Maharashtrian Recipes': 48, 'Malabar': 49, 'Malaysian': 50, 'Malvani': 51, 'Mangalorean': 52, 'Mediterranean': 53, 'Mexican': 54, 'Middle Eastern': 55, 'Mughlai': 56, 'Nagaland': 57, 'Nepalese': 58, 'North East India Recipes

In [11]:
from sklearn.ensemble import RandomForestClassifier

np.random.seed(0)

In [12]:
data['is_train']=np.random.uniform(0,1,len(data))<=.75
data.head()

Unnamed: 0,Srno,RecipeName,TranslatedRecipeName,Ingredients,TranslatedIngredients,PrepTimeInMins,CookTimeInMins,TotalTimeInMins,Servings,Cuisine,Course,Diet,Instructions,TranslatedInstructions,URL,Course_encoded,Cuisine_encoded,is_train
0,1,Masala Karela Recipe,Masala Karela Recipe,"6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...","6 Karela (Bitter Gourd/ Pavakkai) - deseeded,S...",15,30,45,6,Indian,Side Dish,Diabetic Friendly,"To begin making the Masala Karela Recipe,de-se...","To begin making the Masala Karela Recipe,de-se...",https://www.archanaskitchen.com/masala-karela-...,13,33,True
1,2,टमाटर पुलियोगरे रेसिपी - Spicy Tomato Rice (Re...,Spicy Tomato Rice (Recipe),"2-1/2 कप चावल - पका ले,3 टमाटर,3 छोटा चमच्च बी...","2-1 / 2 cups rice - cooked, 3 tomatoes, 3 teas...",5,10,15,3,South Indian Recipes,Main Course,Vegetarian,टमाटर पुलियोगरे बनाने के लिए सबसे पहले टमाटर क...,"To make tomato puliogere, first cut the tomato...",http://www.archanaskitchen.com/spicy-tomato-ri...,8,72,True
2,3,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...","1-1/2 cups Rice Vermicelli Noodles (Thin),1 On...",20,30,50,4,South Indian Recipes,South Indian Breakfast,High Protein Vegetarian,"To begin making the Ragi Vermicelli Recipe, fi...","To begin making the Ragi Vermicelli Recipe, fi...",http://www.archanaskitchen.com/ragi-vermicelli...,15,72,True
3,4,Gongura Chicken Curry Recipe - Andhra Style Go...,Gongura Chicken Curry Recipe - Andhra Style Go...,"500 grams Chicken,2 Onion - chopped,1 Tomato -...","500 grams Chicken,2 Onion - chopped,1 Tomato -...",15,30,45,4,Andhra,Lunch,Non Vegeterian,To begin making Gongura Chicken Curry Recipe f...,To begin making Gongura Chicken Curry Recipe f...,http://www.archanaskitchen.com/gongura-chicken...,7,3,True
4,5,आंध्रा स्टाइल आलम पचड़ी रेसिपी - Adrak Chutney ...,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"1 बड़ा चमच्च चना दाल,1 बड़ा चमच्च सफ़ेद उरद दाल,2...","1 tablespoon chana dal, 1 tablespoon white ura...",10,20,30,4,Andhra,South Indian Breakfast,Vegetarian,आंध्रा स्टाइल आलम पचड़ी बनाने के लिए सबसे पहले ...,"To make Andhra Style Alam Pachadi, first heat ...",https://www.archanaskitchen.com/andhra-style-a...,15,3,True


In [13]:
train , test = data[data['is_train']==True], data[data['is_train']==False]
print('Train', len(train))
print('Test ', len(test))

Train 5178
Test  1687


In [14]:
features = ['CookTimeInMins', 'TotalTimeInMins', 'Servings','Course_encoded','Cuisine_encoded']
selected_data = data[features]

selected_data

Unnamed: 0,CookTimeInMins,TotalTimeInMins,Servings,Course_encoded,Cuisine_encoded
0,30,45,6,13,33
1,10,15,3,8,72
2,30,50,4,15,72
3,30,45,4,7,3
4,20,30,4,15,3
...,...,...,...,...,...
6866,45,60,4,7,26
6867,60,90,4,11,60
6868,10,15,2,13,3
6869,45,75,4,7,41


In [15]:
y= pd.factorize(train['Course_encoded'])[0]
y

array([0, 1, 2, ..., 8, 0, 3])

In [16]:
# Check if features has only one element
if len(features) == 1:
    selected_data = train[features[0]]
else:
    selected_data = train[features]  # No need for to_frame() here



clf=RandomForestClassifier(n_jobs=2, random_state=0)
y, _ = pd.factorize(train['Course_encoded'])  # Extract only the encoded labels (y)
clf.fit(selected_data, y)

In [17]:
vAL = clf.predict(test[features])

print(vAL[:100])

[ 3  5  9  4  0  6  4  0  9  0  6  3  0  5  4  0  0  3  7  7  3  0  0  6
 11  3  3  5  3  3  3  0  4  4  3 11  5  3  5  0  7  5  7  4  0  5  0  3
  0  0  0  3  5  3  4  3  4  6  7 11  0  3  5  6  1  5  3  4  7  3  0  3
 10  4  9  3  6  5  1  3  6  3  5  3  5  4  4  3  3  4  9  3  4  1  3  3
  2  9  6  7]


In [18]:
clf.predict_proba(test[features])[0:10]

array([[0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.01, 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.16, 0.32, 0.  , 0.  , 0.02, 0.  , 0.38, 0.  ,
        0.  , 0.  , 0.  , 0.02, 0.  , 0.  , 0.1 , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.02, 0.94, 0.  , 0.  , 0.04, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.92, 0.08, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.01, 0.98, 0.  , 0.  , 0.01, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0. 

In [19]:
# Assuming you have a list of class names for your dataset
class_names = ['Side Dish', 'Main Course', 'South Indian Breakfast', 'Lunch','Snack', 'High Protein Vegetarian', 'Dinner', 'Appetizer',
                'Indian Breakfast', 'Dessert', 'North Indian Breakfast',
                'One Pot Dish', 'World Breakfast', 'Non Vegeterian', 'Vegetarian',
                'Eggetarian', 'No Onion No Garlic (Sattvic)', 'Brunch', 'Vegan',
                'Sugar Free Diet']

predicted_indices = clf.predict(test[features])
predicted_labels = [class_names[i] for i in predicted_indices]

In [20]:
list1=predicted_labels[:25]

In [21]:
list2=test['Course'].head(25)

In [22]:
list3=[]
for i in list1:
    for j in list2:
        if i==j:
            list3.append(1)
        else:
            list3.append(0)
            
            
print(list3)

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 