## **COGNIFYZ TECHNOLOGIES MACHINE LEARNING INTERNSHIP TASK 3**

# *Step One: Import all Libraries*

In [127]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import *
from sklearn.pipeline import make_pipeline
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.express as px

## *Step Two: Create a Wrangle Function for the Data Cleaning Process*

In [159]:
def wrangle(filepath):
    data = pd.read_csv(filepath)

    # Drop columns with low or high-cardinality
    data.drop(columns = "Restaurant ID", inplace = True) # Dropped for its high cardinality
    data.drop("Locality Verbose", axis = 1, inplace = True) # Similar to the `Locality` column
    data.drop("Switch to order menu", axis = 1, inplace = True) # Dropped for its low cardinality
    data.drop(columns = ["Rating color", "Rating text", "Currency"], inplace = True)
    data.drop(columns = "Country Code", inplace = True)
    data.drop(columns = ["Has Online delivery", "Is delivering now", "Votes"], inplace = True)
    data.drop(columns = ["Aggregate rating", "Address",
                        "Average Cost for two", "Has Table booking"], inplace = True)
    data.dropna(inplace = True)
    
    return data

## *Import the Dataset with the Wrangle Function*

In [171]:
data = wrangle("Cognifyz_Dataset .csv")
data.head()

Unnamed: 0,Restaurant Name,City,Locality,Longitude,Latitude,Cuisines,Price range
0,Le Petit Souffle,Makati City,"Century City Mall, Poblacion, Makati City",121.027535,14.565443,"French, Japanese, Desserts",3
1,Izakaya Kikufuji,Makati City,"Little Tokyo, Legaspi Village, Makati City",121.014101,14.553708,Japanese,3
2,Heat - Edsa Shangri-La,Mandaluyong City,"Edsa Shangri-La, Ortigas, Mandaluyong City",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4
3,Ooma,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City",121.056475,14.585318,"Japanese, Sushi",4
4,Sambo Kojin,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City",121.057508,14.58445,"Japanese, Korean",4


## *Encode the Cuisine Column*

In [173]:
# Initialize the LabelEncoder
le = LabelEncoder()

# Fit and transform the 'Cuisines' column
data['Cuisines_Encoded'] = le.fit_transform(data['Cuisines'])

In [162]:
data.head()

Unnamed: 0,Restaurant Name,City,Locality,Longitude,Latitude,Cuisines,Price range,Cuisines_Encoded
0,Le Petit Souffle,Makati City,"Century City Mall, Poblacion, Makati City",121.027535,14.565443,"French, Japanese, Desserts",3,920
1,Izakaya Kikufuji,Makati City,"Little Tokyo, Legaspi Village, Makati City",121.014101,14.553708,Japanese,3,1111
2,Heat - Edsa Shangri-La,Mandaluyong City,"Edsa Shangri-La, Ortigas, Mandaluyong City",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4,1671
3,Ooma,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City",121.056475,14.585318,"Japanese, Sushi",4,1126
4,Sambo Kojin,Mandaluyong City,"SM Megamall, Ortigas, Mandaluyong City",121.057508,14.58445,"Japanese, Korean",4,1122


## *Step Three: Create the `X_train` and `y_train`*

In [174]:
# Split into train and test sets
features = ['Longitude', 'Latitude']
X_train = data[features]
y_train = data['Cuisines_Encoded']

In [175]:
X_train.head()

Unnamed: 0,Longitude,Latitude
0,121.027535,14.565443
1,121.014101,14.553708
2,121.056831,14.581404
3,121.056475,14.585318
4,121.057508,14.58445


## *Step Four: Create the Machine Learning Modelling*

In [180]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True)
# Random Forest Classifier
model = DecisionTreeClassifier(random_state = 42)
model.fit(X_train, y_train)

## *Step Five: Evaluate the Trained Model*

In [182]:
y_pred = model.predict(X_train)
# Evaluate Random Forest Classifier
rf_accuracy = accuracy_score(y_train, y_pred)
# rf_precision = precision_score(y_train, y_pred)
# rf_recall = recall_score(y_test, rf_pred)
# rf_f1 = f1_score(y_test, rf_pred)

print("Performance of Random Forest Classifier:")
print(f"Accuracy: {rf_accuracy:.2f}")
# print(f"Precision: {rf_precision:.2f}")
# print(f"Recall: {rf_recall:.2f}")
# print(f"F1-Score: {rf_f1:.2f}")
print("-" * 40)


Performance of Random Forest Classifier:
Accuracy: 0.92
----------------------------------------


## *Step Six: Create a Function for the Cuisine Classification System*

In [183]:
import ipywidgets as widgets
from IPython.display import display

def cuisine_classification(model, label_encoder, data, longitude, latitude):
    # Create a DataFrame for the input data
    input_data = pd.DataFrame({
        'Longitude': [longitude],
        'Latitude': [latitude],
    })

    # Make the prediction
    prediction = model.predict(input_data)

    # Ensure the prediction is a list (in case it's an array)
    prediction = prediction if isinstance(prediction, list) else prediction.tolist()
    
    # Decode the predicted cuisine labels using the LabelEncoder
    decoded_cuisines = label_encoder.inverse_transform([int(pred) for pred in prediction])

    lat_tolerance = 5.0
    lon_tolerance = 5.0

    nearby_restaurants = data[
        (data['Latitude'].between(latitude - lat_tolerance, latitude + lat_tolerance)) &
        (data['Longitude'].between(longitude - lon_tolerance, longitude + lon_tolerance))
    ]

    return decoded_cuisines, nearby_restaurants

# Create sliders for the input
latitude_slider = widgets.FloatSlider(
    value = 0.0,
    min = -90.0,
    max = 90.0,
    step = 0.01,
    description = 'Latitude:'
)

longitude_slider = widgets.FloatSlider(
    value = 0.0,
    min = -180.0,
    max = 180.0,
    step = 0.01,
    description = 'Longitude:'
)

# Display for the Predicted Cuisines
cuisine_output = widgets.Textarea(
    value = '',
    placeholder = 'Cuisines will be displayed here...',
    description = 'Cuisines:',
    layout = widgets.Layout(width = '50%', height = '50px')
)

# Output for thr nearby restaurants
restaurant_output = widgets.Textarea(
    value = '',
    placeholder = 'Restaurants will be displayed here...',
    description = "Nearby Restaurants:",
    layout = widgets.Layout(width = '50%', height = '50px')
)

# The Prediction Button
predict_button = widgets.Button(
    description = 'Predict Cuisine',
    button_style = 'info',
    tooltip = 'Click to predict available cuisines'
)

# Function to handle the button click event
def on_predict_button_click(b):
    # Extract the values from the sliders
    longitude = longitude_slider.value
    latitude = latitude_slider.value
    
    # Call the predict_cuisines function to get predicted cuisines
    cuisines, nearby_restaurants = cuisine_classification(model, label_encoder, data, longitude, latitude)
    
    # Display the result in the text area
    cuisine_output.value = "\n".join(cuisines) if cuisines else "No cuisines found for the given inputs."

    # Display nearby restaurants
    if not nearby_restaurants.empty:
        restaurant_list = []
        for _, row in nearby_restaurants.head(5).iterrows():
            restaurant_list.append(f"{row['Restaurant Name']} ({row['City']})")
        restaurant_output.value = "\n".join(restaurant_list)
    else:
        restaurant_output.value = "No Nearby Restaurants found in the given area."

# Link the button click event to the function
predict_button.on_click(on_predict_button_click)

# Display all widgets and the output area
display(
    latitude_slider, 
    longitude_slider,
    predict_button, 
    cuisine_output,
    restaurant_output
)

FloatSlider(value=0.0, description='Latitude:', max=90.0, min=-90.0, step=0.01)

FloatSlider(value=0.0, description='Longitude:', max=180.0, min=-180.0, step=0.01)

Button(button_style='info', description='Predict Cuisine', style=ButtonStyle(), tooltip='Click to predict avai…

Textarea(value='', description='Cuisines:', layout=Layout(height='50px', width='50%'), placeholder='Cuisines w…

Textarea(value='', description='Nearby Restaurants:', layout=Layout(height='50px', width='50%'), placeholder='…