<a href="https://colab.research.google.com/github/MENAKAANBUKKARASU/food-item-categorization-model/blob/main/item_categorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install xlsxwriter




In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split

# Load your Excel file
file_path = 'IndianFood.xlsx'
df = pd.read_excel('IndianFood.xlsx')

# Combine text features
df['Combined_Text'] = df[['RecipeName', 'Ingredients', 'Instructions']].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

# Drop rows with missing values in 'Combined_Text'
df = df.dropna(subset=['Combined_Text'])

# Feature extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Combined_Text'])

# Labeling
y = df['Cuisine']  # You can use 'Cuisine', 'Course', 'Diet', etc., based on your categorization needs

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Evaluate the model
accuracy = classifier.score(X_test, y_test)
print(f'Model Accuracy: {accuracy}')

# Categorize the entire dataset
df['Predicted_Cuisine'] = classifier.predict(X)

# Save the categorized data to a new Excel file
output_file_path = 'categorized_recipes.xlsx'
with pd.ExcelWriter(output_file_path, engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='Categorized Recipes', index=False)


Model Accuracy: 0.39054545454545453


In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder

# Load your Excel file
file_path = 'IndianFood.xlsx'
df = pd.read_excel(file_path)

# Combine text features
df['Combined_Text'] = df[['RecipeName', 'Ingredients', 'Instructions']].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

# Drop rows with missing values in 'Combined_Text'
df = df.dropna(subset=['Combined_Text'])

# Feature extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Combined_Text'])

# Labeling
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Cuisine'])  # You can use 'Cuisine', 'Course', 'Diet', etc., based on your categorization needs

# Train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X, y)

# Get user input
user_input = input("Enter a dish name: ")

# Vectorize the user input
user_input_vectorized = vectorizer.transform([user_input])

# Predict category for the user input
predicted_label = label_encoder.inverse_transform(classifier.predict(user_input_vectorized))[0]

# Display information for the predicted category
result_df = df[df['Cuisine'] == predicted_label].iloc[0]

print(f"\nInformation for '{user_input}':\n")
print(f"Recipe Name: {result_df['RecipeName']}")
print(f"Ingredients: {result_df['Ingredients']}")
print(f"Cuisine: {result_df['Cuisine']}")
print(f"Course: {result_df['Course']}")
print(f"Diet: {result_df['Diet']}")


Enter a dish name: kerala masala curry

Information for 'kerala masala curry':

Recipe Name: Masala Karela Recipe
Ingredients: 6 Karela (Bitter Gourd/ Pavakkai) - deseeded,Salt - to taste,1 Onion - thinly sliced,3 tablespoon Gram flour (besan),2 teaspoons Turmeric powder (Haldi),1 tablespoon Red Chilli powder,2 teaspoons Cumin seeds (Jeera),1 tablespoon Coriander Powder (Dhania),1 tablespoon Amchur (Dry Mango Powder),Sunflower Oil - as required
Cuisine: Indian
Course: Side Dish
Diet: Diabetic Friendly


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your Excel file
file_path = 'IndianFood.xlsx'
df = pd.read_excel(file_path)

# Combine text features
df['Combined_Text'] = df[['RecipeName', 'Ingredients', 'Instructions']].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

# Drop rows with missing values in 'Combined_Text'
df = df.dropna(subset=['Combined_Text'])

# Feature extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Combined_Text'])

# Labeling for cuisine
cuisine_label_encoder = LabelEncoder()
df['Cuisine_Label'] = cuisine_label_encoder.fit_transform(df['Cuisine'])

# Labeling for course
course_label_encoder = LabelEncoder()
df['Course_Label'] = course_label_encoder.fit_transform(df['Course'])

# Labeling for diet
diet_label_encoder = LabelEncoder()
df['Diet_Label'] = diet_label_encoder.fit_transform(df['Diet'])

# Combine all labels to create a unique identifier for each combination of cuisine, course, and diet
df['Label'] = df['Cuisine_Label'].astype(str) + '_' + df['Course_Label'].astype(str) + '_' + df['Diet_Label'].astype(str)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, df['Label'], test_size=0.2, random_state=42)

# Train a Random Forest classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

# Display classification report
print(classification_report(y_test, y_pred))

# Get user input
user_input = input("Enter a dish name: ")

# Vectorize the user input
user_input_vectorized = vectorizer.transform([user_input])

# Predict the label for the user input
predicted_label = classifier.predict(user_input_vectorized)[0]

# Split the predicted label back into cuisine, course, and diet labels
predicted_cuisine_label, predicted_course_label, predicted_diet_label = map(int, predicted_label.split('_'))

# Inverse transform labels to get original values
predicted_cuisine = cuisine_label_encoder.inverse_transform([predicted_cuisine_label])[0]
predicted_course = course_label_encoder.inverse_transform([predicted_course_label])[0]
predicted_diet = diet_label_encoder.inverse_transform([predicted_diet_label])[0]

# Display predicted cooking method based on the cuisine, course, and diet labels
print(f"\nPredicted Cooking Method for '{user_input}':")
print(f"Cuisine: {predicted_cuisine}")
print(f"Course: {predicted_course}")
print(f"Diet: {predicted_diet}")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model Accuracy: 0.24
              precision    recall  f1-score   support

      0_14_9       0.00      0.00      0.00         1
     10_11_9       0.00      0.00      0.00         1
      10_3_9       0.00      0.00      0.00         1
      10_7_9       0.00      0.00      0.00         4
      13_7_9       0.00      0.00      0.00         1
      14_0_9       0.00      0.00      0.00         1
      14_8_9       0.00      0.00      0.00         0
      15_0_9       0.00      0.00      0.00         1
      16_0_3       0.00      0.00      0.00         1
     16_14_9       0.00      0.00      0.00         2
     16_15_8       0.00      0.00      0.00         1
     16_15_9       0.00      0.00      0.00         1
     16_18_9       0.00      0.00      0.00         1
      16_2_9       0.00      0.00      0.00         2
      16_3_9       0.00      0.00      0.00         1
      16_7_3       0.00      0.00      0.00         1
      16_7_6       0.00      0.00      0.00         3
      

In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder

# Load your Excel file
file_path = 'IndianFood.xlsx'
df = pd.read_excel(file_path)

# Combine text features
df['Combined_Text'] = df[['RecipeName', 'Ingredients', 'Instructions']].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)

# Drop rows with missing values in 'Combined_Text'
df = df.dropna(subset=['Combined_Text'])

# Feature extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Combined_Text'])

# Labeling for cuisine
cuisine_label_encoder = LabelEncoder()
df['Cuisine_Label'] = cuisine_label_encoder.fit_transform(df['Cuisine'])

# Train a Multinomial Naive Bayes classifier for cuisine
cuisine_classifier = MultinomialNB()
cuisine_classifier.fit(X, df['Cuisine_Label'])

# Get user input
user_input = input("Enter a dish name: ")

# Vectorize the user input
user_input_vectorized = vectorizer.transform([user_input])

# Predict cuisine for the user input
predicted_cuisine_label = cuisine_classifier.predict(user_input_vectorized)[0]
predicted_cuisine = cuisine_label_encoder.inverse_transform([predicted_cuisine_label])[0]

# Cooking methods list
cooking_methods = ['grilling', 'baking', 'frying', 'steaming', 'boiling', 'roasting']

# Check if any word in the instruction partially matches a cooking method
matched_methods = [method for method in cooking_methods if any(word in user_input.lower() for word in method.split())]

# Display predicted cuisine and partially matched cooking methods
print(f"\nPredicted Cuisine for '{user_input}': {predicted_cuisine}")

if matched_methods:
    print(f"Partially Matched Cooking Methods: {', '.join(matched_methods)}")
else:
    print("No specific cooking methods partially matched.")


Enter a dish name: kerala masala curry

Predicted Cuisine for 'kerala masala curry': Indian
No specific cooking methods partially matched.
