# Recommendation

# Step 1: Load the Dataset


In [18]:
import pandas as pd

# Load the dataset
file_path = 'recipe_final.csv'
recipe_df = pd.read_csv(file_path)

recipe_df.head()

Unnamed: 0.1,Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,calories,fat,carbohydrates,protein,cholesterol,sodium,fiber,ingredients_list
0,0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,15,36,1,42,21,81,2,"['pork belly', 'smoked paprika', 'kosher salt'..."
1,1,240488,"Pork Loin, Apples, and Sauerkraut",4.76,https://images.media-allrecipes.com/userphotos...,29,19,18,10,73,33,104,41,"['sauerkraut drained', 'Granny Smith apples sl..."
2,2,218939,Foolproof Rosemary Chicken Wings,4.57,https://images.media-allrecipes.com/userphotos...,12,17,36,2,48,24,31,4,"['chicken wings', 'sprigs rosemary', 'head gar..."
3,3,87211,Chicken Pesto Paninis,4.62,https://images.media-allrecipes.com/userphotos...,163,32,45,20,65,20,43,18,"['focaccia bread quartered', 'prepared basil p..."
4,4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,8,12,5,14,7,8,3,"['red potatoes', 'strips bacon', 'Sauce:', 'he..."


In [19]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

In [20]:
# Preprocess Ingredients
vectorizer = TfidfVectorizer()
X_ingredients = vectorizer.fit_transform(recipe_df['ingredients_list'])

In [21]:
# Normalize Numerical Features
scaler = StandardScaler()
X_numerical = scaler.fit_transform(recipe_df[['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']])

In [23]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
import scipy.sparse  # Ensure this import is included

# Assuming X_numerical is your dense numerical data and X_ingredients is your sparse matrix of ingredients

# Step 1: Apply PCA to numerical features
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_numerical_reduced = pca.fit_transform(X_numerical)

# Step 2: Convert PCA output to a sparse matrix
X_numerical_reduced_sparse = scipy.sparse.csr_matrix(X_numerical_reduced)

# Step 3: Combine Sparse Matrices
X_combined = scipy.sparse.hstack([X_numerical_reduced_sparse, X_ingredients])

# Train KNN Model
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(X_combined)


In [29]:
def recommend_recipes(input_features):
    # Transform the first 7 numerical features
    input_features_scaled = scaler.transform([input_features[:7]])
    
    # Transform the ingredients using the vectorizer
    input_ingredients_transformed = vectorizer.transform([input_features[7]])
    
    # Combine scaled numerical features with the transformed ingredient features
    input_combined = scipy.sparse.hstack([scipy.sparse.csr_matrix(input_features_scaled), input_ingredients_transformed])
    
    # Debugging outputs
    print("Shape of input_combined:", input_combined.shape)
    print("KNN expects features:", knn.n_features_in_)

    # Ensure input_combined has the same number of features
    if input_combined.shape[1] != knn.n_features_in_:
        raise ValueError(f"Input features shape {input_combined.shape[1]} does not match expected shape {knn.n_features_in_}")

    # Get the nearest neighbors
    distances, indices = knn.kneighbors(input_combined)
    recommendations = recipe_df.iloc[indices[0]]
    return recommendations[['recipe_name', 'ingredients_list', 'image_url']]


In [30]:
print("Number of features from ingredients vectorizer:", vectorizer.get_feature_names_out().shape[0])


Number of features from ingredients vectorizer: 4898
