## **COGNIFYZ TECHNOLOGIES MACHINE LEARNING INTERNSHIP TASK 2**

# *Step One: Import all Libraries*

In [67]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import *
from sklearn.pipeline import make_pipeline
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.express as px

## *Step Two: Create a Wrangle Function for the Data Cleaning Process*

In [73]:
def wrangle(filepath):
    data = pd.read_csv(filepath)

    # Drop columns with low or high-cardinality
    data.drop(columns = "Restaurant ID", inplace = True) # Dropped for its high cardinality
    data.drop("Locality Verbose", axis = 1, inplace = True) # Similar to the `Locality` column
    data.drop("Switch to order menu", axis = 1, inplace = True) # Dropped for its low cardinality
    data.drop(columns = ["Rating color", "Rating text", "Currency"], inplace = True)
    data.drop(columns = "Country Code", inplace = True)
    data.drop(columns = ["Has Online delivery", "Is delivering now", "Votes"], inplace = True)
    data.dropna(inplace = True)
    
    return data

## *Import the Dataset with the Wrangle Function*

In [74]:
dataset = wrangle("Cognifyz_Dataset .csv")
dataset.head()

Unnamed: 0,Restaurant Name,City,Address,Locality,Longitude,Latitude,Cuisines,Average Cost for two,Has Table booking,Price range,Aggregate rating
0,Le Petit Souffle,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City",121.027535,14.565443,"French, Japanese, Desserts",1100,Yes,3,4.8
1,Izakaya Kikufuji,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City",121.014101,14.553708,Japanese,1200,Yes,3,4.5
2,Heat - Edsa Shangri-La,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4000,Yes,4,4.4
3,Ooma,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City",121.056475,14.585318,"Japanese, Sushi",1500,No,4,4.9
4,Sambo Kojin,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City",121.057508,14.58445,"Japanese, Korean",1500,Yes,4,4.8


## *Step Three: Create the `X_train` and `y_train`*

In [30]:
X_train = dataset.drop(columns = ["Aggregate rating", "Address", "City", "Restaurant Name", "Longitude", "Latitude"], axis = 1)
y_train = dataset["Aggregate rating"]

In [31]:
X_train.head()

Unnamed: 0,Locality,Cuisines,Average Cost for two,Has Table booking,Price range
0,"Century City Mall, Poblacion, Makati City","French, Japanese, Desserts",1100,Yes,3
1,"Little Tokyo, Legaspi Village, Makati City",Japanese,1200,Yes,3
2,"Edsa Shangri-La, Ortigas, Mandaluyong City","Seafood, Asian, Filipino, Indian",4000,Yes,4
3,"SM Megamall, Ortigas, Mandaluyong City","Japanese, Sushi",1500,No,4
4,"SM Megamall, Ortigas, Mandaluyong City","Japanese, Korean",1500,Yes,4


## *Step Four: Create a Pipeline for the Modelling*

In [81]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Average Cost for two', 'Price range']),
        ('cat', OneHotEncoder(), ['Locality', 'Cuisines', "Has Table booking"])
    ])

predictor = DecisionTreeRegressor(random_state = 42)
model = make_pipeline(
    preprocessor,
    predictor
)

model.fit(X_train, y_train)

## *Step Five: Evaluate the Trained Model*

In [82]:
y_pred = model.predict(X_train) 
mse = mean_squared_error(y_train, y_pred)
mae = mean_absolute_error(y_train, y_pred)
r2 = r2_score(y_train, y_pred)

# Print evaluation metrics
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')

Mean Squared Error: 0.10764973400804463
Mean Absolute Error: 0.07690356419239254
R-squared: 0.9531916288735948


## *Step Six: Create a Function for the Restaurant Recommendation System*

In [78]:
def make_restaurant_prediction(locality, cuisines, average_cost, table_booking, price_range):
    # Create a dictionary for the user input, ensuring all required columns are included
    dict = {
        "Locality": locality,
        "Cuisines": ''.join(cuisines),
        "Average Cost for two": average_cost,
        "Has Table booking": table_booking,
        "Price range": price_range
    }

    # Create a DataFrame for the input data
    df = pd.DataFrame(dict, index=[0])
    
    # Make a prediction (assuming 'model' is your trained model)
    prediction = model.predict(df)  # Predicted rating for the input restaurant
    
    # Filter the dataset for restaurants in the specified city
    filtered_restaurants = dataset[dataset['Locality'] == locality]
    
    # Further filter by locality if provided
    if locality:
        filtered_restaurants = filtered_restaurants[filtered_restaurants['Locality'] == locality]
    
    # If there are no restaurants in the specified city/locality, return a message
    if filtered_restaurants.empty:
        return "No matching restaurants found in the specified city or locality."
    
    # Display details of 3 restaurants with the predicted rating
    output = ""
    for _, row in filtered_restaurants.head(3).iterrows():  # Display the top 3 restaurants
        output += f"Restaurant Name: {row['Restaurant Name']}\n"
        output += f"Location: {row['Locality']}, {row['City']}\n"
        output += f"Address: {row['Address']}\n"
        output += f"Predicted Rating: {prediction[0]:.2f}\n\n"  # Show the predicted rating

    return output

## *Step Seven: Making Predictions with the Recommendation System Function*

In [79]:
locality = "Little Tokyo, Legaspi Village, Makati City"
cuisines = "French, Japanese, Desserts"
average_cost = 1100
price_range = 2
table_booking = "Yes"

# Call the function
result = make_restaurant_prediction(locality, cuisines, average_cost, table_booking, price_range)
print(result)

Restaurant Name: Izakaya Kikufuji
Location: Little Tokyo, Legaspi Village, Makati City, Makati City
Address: Little Tokyo, 2277 Chino Roces Avenue, Legaspi Village, Makati City
Predicted Rating: 4.00




## *Step Eight: Improve the Recommendation Function*

In [80]:
# Function to make the restaurant prediction and display results
def make_restaurant_prediction(locality, cuisines, average_cost, table_booking, price_range):
    # Create a dictionary for the user input, ensuring all required columns are included
    data = {
        "Locality": locality,
        "Cuisines": ''.join(cuisines),
        "Average Cost for two": average_cost,
        "Has Table booking": table_booking,
        "Price range": price_range
    }

    # Create a DataFrame for the input data
    df = pd.DataFrame(data, index=[0])
    
    # Make a prediction (assuming 'model' is your trained model)
    prediction = model.predict(df)  # Predicted rating for the input restaurant

    # Filter the dataset for restaurants in the specified city
    filtered_restaurants = dataset[dataset['Locality'] == locality]
    
    # Further filter by locality if provided
    if locality:
        filtered_restaurants = filtered_restaurants[filtered_restaurants['Locality'] == locality]
    
    # If there are no restaurants in the specified city/locality, return a message
    if filtered_restaurants.empty:
        return "No matching restaurants found in the specified city or locality."
    
    # Display details of 3 restaurants with the predicted rating
    output = ""
    restaurants_info = []
    for _, row in filtered_restaurants.head(5).iterrows():  # Display the top 3 restaurants
        output += f"Restaurant Name: {row['Restaurant Name']}\n"
        output += f"Location: {row['Locality']}, {row['City']}\n"
        output += f"Address: {row['Address']}\n"
        output += f"Table Booking: {table_booking}\n"
        output += f"Predicted Rating: {prediction[0]:.2f}\n\n"

        # Collect information for Mapbox plot
        restaurants_info.append({
            'Restaurant Name': row['Restaurant Name'],
            'Location': f"{row['Locality']}, {row['City']}",
            'Latitude': row['Latitude'],
            'Longitude': row['Longitude'],
            'Predicted Rating': prediction[0]
        })
    
    # Create a DataFrame for Mapbox plot
    map_data = pd.DataFrame(restaurants_info)

    # Create a scatter map using Plotly
    fig = px.scatter_mapbox(map_data,
                            lat="Latitude", lon="Longitude",
                            hover_name="Restaurant Name", hover_data=["Location", "Predicted Rating"],
                            color="Predicted Rating", color_continuous_scale="Viridis", size_max = 5, zoom = 20)

    fig.update_layout(mapbox_style="open-street-map")
    fig.show()

    return output

# Creating ipywidgets for user input

# Dropdown for locality selection
locality_dropdown = widgets.Dropdown(
    options=dataset['Locality'].unique(),
    description='Locality:',
    disabled=False
)

# Dropdown for cuisine selection
cuisines_dropdown = widgets.Dropdown(
    options = X_train['Cuisines'].unique(),
    description = 'Cuisines:',
    disabled = False
)

# Slider for average cost selection
average_cost_slider = widgets.IntSlider(
    value = 0,  # Default price range
    min = dataset["Average Cost for two"].min(),
    max = dataset["Average Cost for two"].max(),
    step = 1,
    description = 'Average Cost for two:',
    disabled = False
)

# Slider for price range selection
price_range_slider = widgets.IntSlider(
    value=2,  # Default price range
    min = 1,
    max = 4,
    step = 1,
    description = 'Price Range:',
    disabled = False
)

table_booking_dropdown = widgets.Dropdown(
    options=["Yes", "No"],
    description = "Table Booking:",
    disabled = False
)

# Button to trigger prediction
predict_button = widgets.Button(
    description="Predict",
    disabled=False,
    button_style='info',
    tooltip="Click to predict the rating"
)

# Output area to display the result
output_area = widgets.Output()

# Function to handle button click and display result
def on_predict_button_click(b):
    # Extract user input values
    locality = locality_dropdown.value
    cuisines = list(cuisines_dropdown.value)
    price_range = price_range_slider.value
    table_booking = table_booking_dropdown.value

    # Call the prediction function and display results
    with output_area:
        output_area.clear_output()
        result = make_restaurant_prediction(locality, cuisines, average_cost, table_booking, price_range)
        print(result)

# Link the button click event to the function
predict_button.on_click(on_predict_button_click)

# Display all widgets and the output area
display(locality_dropdown, cuisines_dropdown, price_range_slider, average_cost_slider, table_booking_dropdown, predict_button, output_area)


Dropdown(description='Locality:', options=('Century City Mall, Poblacion, Makati City', 'Little Tokyo, Legaspi…

Dropdown(description='Cuisines:', options=('French, Japanese, Desserts', 'Japanese', 'Seafood, Asian, Filipino…

IntSlider(value=2, description='Price Range:', max=4, min=1)

IntSlider(value=0, description='Average Cost for two:', max=800000)

Dropdown(description='Table Booking:', options=('Yes', 'No'), value='Yes')

Button(button_style='info', description='Predict', style=ButtonStyle(), tooltip='Click to predict the rating')

Output()