In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer

In [13]:
# load dataset
file_path = 'recipe_final (1).csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0.1,Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,calories,fat,carbohydrates,protein,cholesterol,sodium,fiber,ingredients_list
0,0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,15,36,1,42,21,81,2,"['pork belly', 'smoked paprika', 'kosher salt'..."
1,1,240488,"Pork Loin, Apples, and Sauerkraut",4.76,https://images.media-allrecipes.com/userphotos...,29,19,18,10,73,33,104,41,"['sauerkraut drained', 'Granny Smith apples sl..."
2,2,218939,Foolproof Rosemary Chicken Wings,4.57,https://images.media-allrecipes.com/userphotos...,12,17,36,2,48,24,31,4,"['chicken wings', 'sprigs rosemary', 'head gar..."
3,3,87211,Chicken Pesto Paninis,4.62,https://images.media-allrecipes.com/userphotos...,163,32,45,20,65,20,43,18,"['focaccia bread quartered', 'prepared basil p..."
4,4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,8,12,5,14,7,8,3,"['red potatoes', 'strips bacon', 'Sauce:', 'he..."


In [14]:
# drop columns
df.isnull().sum()

Unnamed: 0          0
recipe_id           0
recipe_name         0
aver_rate           0
image_url           0
review_nums         0
calories            0
fat                 0
carbohydrates       0
protein             0
cholesterol         0
sodium              0
fiber               0
ingredients_list    0
dtype: int64

In [15]:
# future selection X
scaler = StandardScaler()
X = scaler.fit_transform(df[['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']])
X

array([[-0.1317045 ,  0.46001924, -1.15482863, ...,  0.04256474,
         1.13990476, -0.76393724],
       [ 0.23857551, -0.33625589, -0.01920347, ...,  0.52871248,
         1.59202345,  2.53220175],
       [ 0.05343551,  0.46001924, -1.02864806, ...,  0.16410167,
         0.15703804, -0.59490447],
       ...,
       [-0.77969453, -0.82286847, -0.27156462, ..., -0.68665688,
        -0.39336732, -0.25683894],
       [ 0.33114552,  0.32730672,  0.73787996, ...,  0.20461398,
        -0.31473798, -0.51038809],
       [-1.33511455, -0.9998185 , -1.02864806, ..., -0.80819381,
        -0.13782197, -0.42587171]])

In [16]:
# X_Ingriedients
vectorized = TfidfVectorizer()
X_Ingriedients = vectorized.fit_transform(df['ingredients_list'])

In [17]:
# combine X_Ingriedients and X
X_Combine = np.hstack((X, X_Ingriedients.toarray()))
X_Combine

array([[-0.1317045 ,  0.46001924, -1.15482863, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.23857551, -0.33625589, -0.01920347, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.05343551,  0.46001924, -1.02864806, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-0.77969453, -0.82286847, -0.27156462, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.33114552,  0.32730672,  0.73787996, ...,  0.        ,
         0.        ,  0.        ],
       [-1.33511455, -0.9998185 , -1.02864806, ...,  0.        ,
         0.        ,  0.        ]])

In [18]:
# Train KNN model
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn.fit(X_Combine)

In [19]:
# function to get recommendations
def get_recommendations(features):
    input_features = scaler.transform([features[:7]])
    input_ingredients = vectorized.transform([features[7]])
    input_combined = np.hstack((input_features, input_ingredients.toarray()))
    distances, indices = knn.kneighbors(input_combined, n_neighbors=5)
    recommendations = df.iloc[indices[0]]
    return recommendations[['recipe_name', 'ingredients_list', 'image_url']]

# test the function
features = [50, 34, 20, 70, 61, 24, 38, 'mushrooms, onions, garlic, butter, salt, pepper']
recommendations = get_recommendations(features)
recommendations



Unnamed: 0,recipe_name,ingredients_list,image_url
13404,Easy Pasta Casserole,"['pasta', 'lean ground beef', 'spaghetti sauce...",https://images.media-allrecipes.com/userphotos...
11630,Johnny Marzetti I,"['lean ground beef', 'onions', 'green bell pep...",https://images.media-allrecipes.com/userphotos...
6756,Saucy Shepherd's Pie,"['potatoes', 'carrots', 'milk', 'butter', 'Sal...",https://images.media-allrecipes.com/userphotos...
11052,Shirred Potatoes and Pork Chops,"['milk', 'all-purpose flour', 'butter', 'salt ...",https://images.media-allrecipes.com/userphotos...
13395,Venison Chili,"['unsalted butter', 'red onion', 'garlic', 'da...",https://images.media-allrecipes.com/userphotos...


In [20]:
from sklearn.metrics import mean_squared_error

# Function to evaluate the model
def evaluate_model():
    # Generate recommendations for a subset of the data
    sample_indices = np.random.choice(df.index, size=100, replace=False)
    true_features = df.iloc[sample_indices][['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']].values
    true_ingredients = df.iloc[sample_indices]['ingredients_list'].values
    
    mse_list = []
    
    for i in range(len(sample_indices)):
        features = list(true_features[i]) + [true_ingredients[i]]
        recommendations = get_recommendations(features)
        
        # Calculate MSE for the recommended recipes
        recommended_features = df.loc[recommendations.index][['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']].values
        mse = mean_squared_error(true_features[i], recommended_features.mean(axis=0))
        mse_list.append(mse)
    
    mean_mse = np.mean(mse_list)
    return mean_mse

# Evaluate the model
mean_mse = evaluate_model()
print(f'Mean Squared Error of the model: {mean_mse}')



KeyboardInterrupt: 