In [39]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the dataset
try:
    df = pd.read_csv('/content/drive/MyDrive/Cognifyz/Dataset .csv')
except FileNotFoundError:
    print("Error: Dataset file not found. Please check the file path.")
    exit()

# Set display options to show all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Show the first 3 rows of the dataframe
df.head(3)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",1100,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,1200,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4000,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270


In [41]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the dataset
try:
    df = pd.read_csv('/content/drive/MyDrive/Cognifyz/Dataset .csv')
except FileNotFoundError:
    print("Error: Dataset file not found. Please check the file path.")
    exit()

# Step 1: Preprocess the dataset

# Handle missing values
for col in df.columns:
    if df[col].dtype == 'object':
        mode_val = df[col].mode()[0]
        df[col] = df[col].fillna(mode_val)
    elif pd.api.types.is_numeric_dtype(df[col]):
        mean_val = df[col].mean()
        df[col] = df[col].fillna(mean_val)

# Step 2: Encode categorical variables
label_encoders = {}
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Step 3: Recommendation Function (Content-Based Filtering)
def recommend_restaurants(user_preferences, df):
    df_copy = df.copy()
    similarity_scores = []

    for _, row in df_copy.iterrows():
        score = 0
        for pref, value in user_preferences.items():
            if pref not in df.columns:
                continue

            # If categorical
            if pref in label_encoders:
                try:
                    encoded_value = label_encoders[pref].transform([value])[0]
                    if row[pref] == encoded_value:
                        score += 3
                except:
                    continue

            # If numeric
            elif pd.api.types.is_numeric_dtype(df[pref]):
                diff = abs(row[pref] - value)
                if diff == 0:
                    score += 3
                elif diff <= 1:
                    score += 2
                elif diff <= 2:
                    score += 1

        df_copy.at[row.name, 'similarity_score'] = score

    # Sort by similarity score and show top 5
    top_recommendations = df_copy.sort_values(by='similarity_score', ascending=False).head(5)

    # Decode the categorical columns in the top recommendations
    for col in ['Restaurant Name', 'Cuisines']:
        if col in label_encoders:
            top_recommendations[col] = top_recommendations[col].apply(
                lambda x: label_encoders[col].inverse_transform([x])[0]
            )

    return top_recommendations[[
        'Restaurant Name', 'Cuisines', 'Price range',
        'Average Cost for two', 'Aggregate rating', 'Votes', 'similarity_score'
    ]]

# Step 4: Sample user preference
user_preferences = {
    'Cuisines': 'Chinese',
    'Price range': 2,
    'Aggregate rating': 4.5
}

# Step 5: Runs the recommendation
recommendations = recommend_restaurants(user_preferences, df)

# Step 6: Displays the results
print("Top 5 Restaurant Recommendations:\n")
print(recommendations.to_string(index=False))


Top 5 Restaurant Recommendations:

Restaurant Name Cuisines  Price range  Average Cost for two  Aggregate rating  Votes  similarity_score
     China Cafe  Chinese            2                    25               4.0    112               8.0
        Haaochi  Chinese            2                   600               3.5    123               8.0
   Dimsum & Co.  Chinese            2                   650               3.9    136               8.0
  Wok On Wheels  Chinese            2                   550               3.8     80               8.0
       Hao Ming  Chinese            2                   600               3.5    280               8.0
