## Processing Data for Receipe Recommendation System


In [1]:
## Checking the Data

import pandas as pd


#Load the dataset
file_path = 'recipe_final.csv'
recipe_df = pd.read_csv(file_path)

recipe_df.head()

Unnamed: 0.1,Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,calories,fat,carbohydrates,protein,cholesterol,sodium,fiber,ingredients_list
0,0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,15,36,1,42,21,81,2,"['pork belly', 'smoked paprika', 'kosher salt'..."
1,1,240488,"Pork Loin, Apples, and Sauerkraut",4.76,https://images.media-allrecipes.com/userphotos...,29,19,18,10,73,33,104,41,"['sauerkraut drained', 'Granny Smith apples sl..."
2,2,218939,Foolproof Rosemary Chicken Wings,4.57,https://images.media-allrecipes.com/userphotos...,12,17,36,2,48,24,31,4,"['chicken wings', 'sprigs rosemary', 'head gar..."
3,3,87211,Chicken Pesto Paninis,4.62,https://images.media-allrecipes.com/userphotos...,163,32,45,20,65,20,43,18,"['focaccia bread quartered', 'prepared basil p..."
4,4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,8,12,5,14,7,8,3,"['red potatoes', 'strips bacon', 'Sauce:', 'he..."


In [2]:
## Checking the null values {data cleaning}


recipe_df.isnull().sum()

Unnamed: 0          0
recipe_id           0
recipe_name         0
aver_rate           0
image_url           0
review_nums         0
calories            0
fat                 0
carbohydrates       0
protein             0
cholesterol         0
sodium              0
fiber               0
ingredients_list    0
dtype: int64

In [3]:
recipe_df['ingredients_list'][0]

"['pork belly', 'smoked paprika', 'kosher salt', 'ground black pepper']"

In [4]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
## Preprocess the Ingredients (like converting the ingredients into numerical data)

vectorizer = TfidfVectorizer()
X_ingredients = vectorizer.fit_transform(recipe_df['ingredients_list'])

In [6]:
## Normalize Numerical Features
scaler = StandardScaler()
X_numerical = scaler.fit_transform(recipe_df[['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']])

In [7]:
X_numerical

array([[-1.29820896e-01,  4.60081013e-01, -1.15447895e+00, ...,
         4.25987648e-02,  1.13622071e+00, -7.63938978e-01],
       [ 2.27870449e-01, -3.36222474e-01, -1.95417425e-02, ...,
         5.28767309e-01,  1.58714542e+00,  2.53234364e+00],
       [ 4.90247765e-02,  4.60081013e-01, -1.02837482e+00, ...,
         1.64140901e-01,  1.55949590e-01, -5.94898844e-01],
       ...,
       [ 2.41931906e+01, -2.65488955e-02,  1.99812441e+00, ...,
         8.31128101e-02,  8.84115169e+00, -5.94898844e-01],
       [ 2.94691379e+01, -3.80461556e-01,  4.14189470e+00, ...,
        -1.59971462e-01,  2.27333520e+00, -5.94898844e-01],
       [ 2.13316598e+01, -7.78613300e-01,  4.89851951e+00, ...,
        -3.22027643e-01,  1.03507692e+01, -1.72298509e-01]],
      shape=(48740, 7))

In [8]:
 # Combine Features
X_combined = np.hstack([X_numerical, X_ingredients.toarray()])

In [9]:
X_combined

array([[-1.29820896e-01,  4.60081013e-01, -1.15447895e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 2.27870449e-01, -3.36222474e-01, -1.95417425e-02, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 4.90247765e-02,  4.60081013e-01, -1.02837482e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 2.41931906e+01, -2.65488955e-02,  1.99812441e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 2.94691379e+01, -3.80461556e-01,  4.14189470e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 2.13316598e+01, -7.78613300e-01,  4.89851951e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],
      shape=(48740, 4906))

In [10]:
## Train KNN Model
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(X_combined)

0,1,2
,n_neighbors,3
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'euclidean'
,p,2
,metric_params,
,n_jobs,


In [11]:
# Function to Recommend Recipes
def recommend_recipes(input_features):
    input_features_scaled = scaler.transform([input_features[:7]])

    input_ingredients_transformed = vectorizer.transform([input_features[7]])
    
    input_combined = np.hstack([input_features_scaled, input_ingredients_transformed.toarray()])
    
    distances, indices = knn.kneighbors(input_combined)
    
    recommendations = recipe_df.iloc[indices[0]]
    return recommendations[['recipe_name', 'ingredients_list', 'image_url']]

# Example Input
input_features = [15, 36, 1, 42, 21, 81, 2, 'pork belly, smoked paprika, kosher salt']
recommendations = recommend_recipes(input_features)
recommendations



Unnamed: 0,recipe_name,ingredients_list,image_url
0,Homemade Bacon,"['pork belly', 'smoked paprika', 'kosher salt'...",https://images.media-allrecipes.com/userphotos...
3507,Char Siu (Chinese Barbeque Pork),"['honey', 'Shaoxing cooking wine', 'hoisin sau...",https://images.media-allrecipes.com/userphotos...
1926,Adobo Twist,"['vegetable oil', 'onion', 'head garlic', 'por...",https://images.media-allrecipes.com/userphotos...
