## Content Based Recommendation System

## Build Data Pipline for the Model

## Importing Libraries and Dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [None]:
from google.colab import drive
drive.mount ('/content/drive')


In [None]:
data = pd.read_csv('/Users/mishal027/Desktop/Pallet Pioneer/PPdataset_1.csv')

## Data Overview

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Age                  388 non-null    int64  
 1   Gender               388 non-null    object 
 2   Occupation           388 non-null    object 
 3   Monthly Income       388 non-null    float64
 4   low-carb             388 non-null    int64  
 5   low-fat              388 non-null    int64  
 6   high-protein         388 non-null    int64  
 7   non-veg              388 non-null    int64  
 8   veg                  388 non-null    int64  
 9   vegan                388 non-null    int64  
 10  gluten-free          388 non-null    int64  
 11  Lactose Intolerance  388 non-null    int64  
 12  soy-allergy          388 non-null    int64  
 13  wheat-allergy        388 non-null    int64  
 14  fish-allergy         388 non-null    int64  
 15  meal-preff           388 non-null    obj

## EDA

## Data Preprocessing

In [None]:
# Replace non-numeric values in 'Monthly Income' with a default value (e.g., 5000)
data['Monthly Income'] = data['Monthly Income'].replace(['<10000', '>50000'], 5000)

# Handle cases where values contain '<' symbol
data['Monthly Income'] = data['Monthly Income'].apply(lambda x: float(x[1:]) if isinstance(x, str) and x.startswith('<') else float(x))

# Now, proceed with the conversion to float
data['Monthly Income'] = data['Monthly Income'].replace('[\$,<>]', '', regex=True).astype(float)


  data['Monthly Income'] = data['Monthly Income'].replace('[\$,<>]', '', regex=True).astype(float)


In [None]:
df = pd.get_dummies(data, columns=['Gender', 'Occupation', 'meal-preff'], drop_first=True)

In [None]:
selected_features = data[['low-carb', 'low-fat', 'high-protein', 'non-veg', 'veg', 'vegan', 'gluten-free', 'Lactose Intolerance', 'soy-allergy', 'wheat-allergy', 'fish-allergy']]


In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(selected_features)


## Feature Engineering

In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp39-cp39-macosx_10_9_x86_64.whl size=1126109 sha256=920b7cbcd954a9b117d1786283c2cd68622f24384a67c612a13dbfaaafc1083d
  Stored in directory: /Users/mishal027/Library/Caches/pip/wheels/c6/3a/46/9b17b3512bdf283c6cb84f59929cdd5199d4e754d596d22784
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_selection import SelectKBest, chi2


# Assuming 'X' is your feature matrix and 'y' is your target variable
X = data[['Age', 'Monthly Income', 'low-carb', 'low-fat', 'high-protein', 'non-veg', 'veg', 'vegan',
          'gluten-free', 'Lactose Intolerance', 'soy-allergy', 'wheat-allergy', 'fish-allergy']]
y = data['cuisine-preff']

# Select the top k features using chi-squared test
k = 10  # Choose the number of top features you want to keep
selector = SelectKBest(chi2, k=k)
X_new = selector.fit_transform(X, y)

# Get the selected feature names
selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features)


Selected Features: Index(['Age', 'Monthly Income', 'low-carb', 'non-veg', 'veg', 'vegan',
       'gluten-free', 'Lactose Intolerance', 'soy-allergy', 'fish-allergy'],
      dtype='object')


In [None]:
# Calculate cosine similarity between user feature vectors
user_similarity_matrix = cosine_similarity(X_new)


In [None]:
# Individualized Cuisine Recommendations
# Choose a user for whom you want to generate recommendations
user_index = 0


In [None]:
# Get the top N most similar users
similar_users = pd.Series(user_similarity_matrix[user_index])
top_similar_users = similar_users.sort_values(ascending=False).index[1:6]  # Exclude the user itself

In [None]:
# Recommend cuisines liked by similar users
recommendations = data['cuisine-preff'][data.index.isin(top_similar_users)].value_counts().index[:5]

print("Recommended Cuisines:", recommendations)

Recommended Cuisines: Index(['Chinese', 'Japanese', 'Thai', 'Mediterranean'], dtype='object', name='cuisine-preff')


## User Feature Vectors

In [None]:
user_feature_vector = data[['low-carb', 'low-fat', 'high-protein', 'non-veg', 'veg', 'vegan',
                            'gluten-free', 'Lactose Intolerance', 'soy-allergy', 'wheat-allergy', 'fish-allergy']]

# Display the user feature vector
print("User Feature Vector:")
print(user_feature_vector)

User Feature Vector:
     low-carb  low-fat  high-protein  non-veg  veg  vegan  gluten-free  \
0           1        1             0        0    0      1            0   
1           1        1             1        0    1      0            0   
2           0        0             0        0    0      1            0   
3           0        1             0        1    0      0            0   
4           1        0             1        0    0      0            1   
..        ...      ...           ...      ...  ...    ...          ...   
383         0        0             1        1    0      0            0   
384         0        1             0        0    0      0            1   
385         1        0             1        0    0      0            1   
386         0        0             0        0    0      0            1   
387         0        1             1        0    0      1            0   

     Lactose Intolerance  soy-allergy  wheat-allergy  fish-allergy  
0                    

## Similarity Calculation

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Assume 'user_feature_vector' is the DataFrame containing user feature vectors
# Ensure that the user_feature_vector does not contain any non-numeric columns

# Calculate cosine similarity between user feature vectors
user_similarity_matrix = cosine_similarity(user_feature_vector)

# Display the user similarity matrix
print("User Similarity Matrix:")
print(user_similarity_matrix)


User Similarity Matrix:
[[1.         0.4472136  0.35355339 ... 0.28867513 0.         0.5       ]
 [0.4472136  1.         0.         ... 0.51639778 0.25819889 0.67082039]
 [0.35355339 0.         1.         ... 0.         0.40824829 0.35355339]
 ...
 [0.28867513 0.51639778 0.         ... 1.         0.33333333 0.28867513]
 [0.         0.25819889 0.40824829 ... 0.33333333 1.         0.28867513]
 [0.5        0.67082039 0.35355339 ... 0.28867513 0.28867513 1.        ]]


## Build Model

In [None]:
def recommend_cuisines(user_index, top_n=5):
    # Get the top N most similar users
    similar_users = pd.Series(user_similarity_matrix[user_index])
    top_similar_users = similar_users.sort_values(ascending=False).index[1:top_n + 1]  # Exclude the user itself

    # Recommend cuisines liked by similar users
    recommendations = data['cuisine-preff'][data.index.isin(top_similar_users)].value_counts().index[:top_n]

    return recommendations

# Choose a user for whom you want to generate recommendations (replace 0 with the desired user index)
user_index_to_recommend = 0

# Get cuisine recommendations for the chosen user
recommended_cuisines = recommend_cuisines(user_index_to_recommend)

print("Recommended Cuisines:", recommended_cuisines)

Recommended Cuisines: Index(['Mexican', 'Japanese', 'Italian', 'Mediterranean'], dtype='object', name='cuisine-preff')


## Evaluate

In [None]:
user_index_to_recommend = 7


# Get cuisine recommendations for the chosen user
recommended_cuisines = recommend_cuisines(user_index_to_recommend)

print("Recommended Cuisines:", recommended_cuisines)


Recommended Cuisines: Index(['Mexican', 'Japanese'], dtype='object', name='cuisine-preff')
