<a href="https://colab.research.google.com/github/MananParekh17/Internship/blob/main/Cognifyz_task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

#Loading the Dataset
from google.colab import drive
drive.mount('/content/drive')

df=pd.read_csv('/content/drive/MyDrive/Dataset.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pd.set_option('display.max_columns',None) #to see all columns in head()
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",1100,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,1200,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4000,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",1500,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",1500,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


# Preprocessing


In [None]:
missing_values=df.isnull().sum()
print(missing_values[missing_values>0])


Cuisines    9
dtype: int64


In [None]:
df['Cuisines'].fillna('Unknown', inplace=True)

df['Cuisines'].head(89)

0           French, Japanese, Desserts
1                             Japanese
2     Seafood, Asian, Filipino, Indian
3                      Japanese, Sushi
4                     Japanese, Korean
                    ...               
84                             Unknown
85                             Mexican
86            Coffee and Tea, Sandwich
87                             Unknown
88                           Fast Food
Name: Cuisines, Length: 89, dtype: object

In [None]:

categorical_columns = ['City', 'Cuisines', 'Rating color', 'Rating text']

# One-hot encode categorical variables using OneHotEncoder
onehot_encoder = OneHotEncoder(handle_unknown='ignore')
onehot_encoder.fit(df[categorical_columns])



# Transform the dataset
encoded_features = onehot_encoder.transform(df[categorical_columns]).toarray()
encoded_feature_names = onehot_encoder.get_feature_names_out(categorical_columns)
df_encoded = pd.DataFrame(encoded_features, columns=encoded_feature_names)


In [None]:
# Function to encode user input using one-hot encoding
def encode_user_input(user_input, onehot_encoder, categorical_columns):
    input_df = pd.DataFrame([user_input])
    encoded_input = onehot_encoder.transform(input_df[categorical_columns]).toarray()
    encoded_input_df = pd.DataFrame(encoded_input, columns=onehot_encoder.get_feature_names_out(categorical_columns))
    return encoded_input_df


In [None]:
# Function to recommend restaurants based on user preferences
def recommend_restaurants(user_preferences, df, df_encoded, top_n=10):
    # Compute the cosine similarity between user vector and restaurant features
    similarity = cosine_similarity(user_preferences, df_encoded)

    # to Get the indices of top_n most similar restaurants
    similar_restaurants_idx = similarity[0].argsort()[-top_n:][::-1]

    return df.iloc[similar_restaurants_idx]


# Determining the criteria testing the recommendation

In [None]:
# Sample user preferences
default_user_preferences = {
    'Cuisines': 'Italian',
    'Price range': 3,
    'Aggregate rating': 4.0,  # Minimum acceptable rating
    'City': 'Makati City',
    'Rating color': 'Dark Green',
    'Rating text': 'Excellent',
}

# One-hot encode user preferences
user_preferences_encoded = encode_user_input(default_user_preferences, onehot_encoder, categorical_columns)

In [None]:
# we can Uncomment and modify the lines below to input user preferences manually

# cuisine_preference_input = input("Enter your cuisine preference: ")
# price_range_input = int(input("Enter your preferred price range (1 to 4): "))
# minimum_rating_input = float(input("Enter your minimum acceptable rating: "))
# city_input = input("Enter your preferred city: ")
# rating_color_input = input("Enter your preferred rating color: ")
# rating_text_input = input("Enter your preferred rating text: ")

# user_input = {
#     'Cuisines': cuisine_preference_input,
#     'Price range': price_range_input,
#     'Aggregate rating': minimum_rating_input,
#     'City': city_input,
#     'Rating color': rating_color_input,
#     'Rating text': rating_text_input,
# }

# user_preferences_encoded = encode_user_input(user_input, onehot_encoder, categorical_columns)

In [None]:
# Get restaurant recommendations
recommended_restaurants = recommend_restaurants(user_preferences_encoded, df, df_encoded)
print(recommended_restaurants[['Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating']])

       Restaurant Name                                           Cuisines  \
0     Le Petit Souffle                         French, Japanese, Desserts   
811    Fusilli Reasons                                            Italian   
943      Cafe Parmesan                                            Italian   
9316           Baduzzi                                            Italian   
9435        Paper Moon                                            Italian   
9358       La Favorita                                            Italian   
9320             Ombra                                            Italian   
1     Izakaya Kikufuji                                           Japanese   
9378     Bocca Di Lupo                                            Italian   
9280       Toodz House  Cafe, Italian, Coffee and Tea, Western, Indone...   

      Price range  Aggregate rating  
0               3               4.8  
811             1               4.6  
943             3               4.5  
