In [2]:
import pandas as pd

# Load the dataset
file_path = 'cleaned.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()


Unnamed: 0,food_name,age,time,cuisine_type,cooking_method,dish_type,dietary_preferences,spice_level,occasion,nutrient_content,region_origin,texture,post_id
0,fried rice,adult,dinner,indian,fried,main course,vegetarian,mild,everyday,high carb,indian,soft,1cb85xg
1,chilli prawns,adult,snacks,fusion,grilled,appetizer,non-vegetarian,medium,everyday,high protein,fusion,soft,1cb6hml
2,watermelon juice,child,breakfast,fusion,blended,drink,vegetarian,mild,everyday,high water content,fusion,smooth,1cb607c
3,margherita pizza,adult,dinner,italian,baked,pizza,non-vegetarian,mild,everyday,high carb,italian,chewy,1cb4voz
4,maggi,child,snacks,indo-western,stir-fried,snack,vegetarian,mild,everyday,high carb,indian,soft,1cb1rg4


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = 'cleaned.csv'
data = pd.read_csv(file_path)

# Fill missing values (if any) with the mode of each column
for column in data.columns:
    if data[column].isnull().sum() > 0:
        data[column] = data[column].fillna(data[column].mode()[0])

# Encode the categorical variables
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Define the input features and target variables
X = data['food_name']
y = data[['cuisine_type', 'cooking_method', 'dish_type', 'dietary_preferences', 'spice_level', 'occasion', 'nutrient_content']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to DataFrame for compatibility with sklearn
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)

# Train a RandomForest model for each target variable
models = {}
for column in y.columns:
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train[column])
    models[column] = model

# Evaluate the models
accuracy_scores = {}
for column in y.columns:
    y_pred = models[column].predict(X_test)
    accuracy = accuracy_score(y_test[column], y_pred)
    accuracy_scores[column] = accuracy

accuracy_scores



{'cuisine_type': 0.5243243243243243,
 'cooking_method': 0.372972972972973,
 'dish_type': 0.42162162162162165,
 'dietary_preferences': 0.8,
 'spice_level': 0.5945945945945946,
 'occasion': 0.7189189189189189,
 'nutrient_content': 0.5081081081081081}

In [5]:
# Fill missing values (if any) with the mode of each column
for column in data.columns:
    if data[column].isnull().sum() > 0:
        data[column].fillna(data[column].mode()[0], inplace=True)

# Encode the categorical variables
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Define the input features and target variables
X = data['food_name']
y = data[['cuisine_type', 'cooking_method', 'dish_type', 'dietary_preferences', 'spice_level', 'occasion', 'nutrient_content']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to DataFrame for compatibility with sklearn
X_train = X_train.values.reshape(-1, 1)
X_test = X_test.values.reshape(-1, 1)

# Train a RandomForest model for each target variable
models = {}
for column in y.columns:
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train[column])
    models[column] = model

# Evaluate the models
accuracy_scores = {}
for column in y.columns:
    y_pred = models[column].predict(X_test)
    accuracy = accuracy_score(y_test[column], y_pred)
    accuracy_scores[column] = accuracy

accuracy_scores


{'cuisine_type': 0.5243243243243243,
 'cooking_method': 0.372972972972973,
 'dish_type': 0.42162162162162165,
 'dietary_preferences': 0.8,
 'spice_level': 0.5945945945945946,
 'occasion': 0.7189189189189189,
 'nutrient_content': 0.5081081081081081}

In [10]:
# Function to make predictions for a given food name
def predict_food_attributes(food_name):
    encoded_food_name = label_encoders['food_name'].transform([food_name])[0]
    predictions = {}
    for column in y.columns:
        encoded_prediction = models[column].predict([[encoded_food_name]])[0]
        predictions[column] = label_encoders[column].inverse_transform([encoded_prediction])[0]
    return predictions

# Example prediction
food_name = "fried rice"
predictions = predict_food_attributes(food_name)
predictions


{'cuisine_type': 'indian',
 'cooking_method': 'fried',
 'dish_type': 'main course',
 'dietary_preferences': 'vegetarian',
 'spice_level': 'mild',
 'occasion': 'everyday',
 'nutrient_content': 'high carb'}