In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('Dataset .csv')

# Handle missing values: drop rows with any missing values
df_clean = df.dropna()

# Show the number of rows before and after cleaning
print("Original shape:", df.shape)
print("After dropping missing values:", df_clean.shape)

# Display the first few rows of the cleaned data
df_clean.head()

Original shape: (9551, 21)
After dropping missing values: (9542, 21)


Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [2]:
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables using LabelEncoder
categorical_cols = df_clean.select_dtypes(include='object').columns
le = LabelEncoder()
for col in categorical_cols:
    df_clean.loc[:, col] = le.fit_transform(df_clean[col].astype(str))

# Display the first few rows of the encoded data
df_clean.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,3742,162,73,8677,171,172,121.027535,14.565443,920,...,0,1,0,0,0,3,4.8,0,1,314
1,6304287,3167,162,73,6047,592,600,121.014101,14.553708,1111,...,0,1,0,0,0,3,4.5,0,1,591
2,6300002,2892,162,75,4676,308,314,121.056831,14.581404,1671,...,0,1,0,0,0,4,4.4,1,5,270
3,6318506,4700,162,75,8682,860,873,121.056475,14.585318,1126,...,0,0,0,0,0,4,4.9,0,1,365
4,6314302,5515,162,75,8681,860,873,121.057508,14.58445,1122,...,0,1,0,0,0,4,4.8,0,1,229


In [5]:
# Content-based filtering function for restaurant recommendations

def recommend_restaurants(
    df, 
    cuisine=None, 
    price_min=None, 
    price_max=None, 
    location=None, 
    min_rating=None, 
    min_votes=None
):
    filtered = df.copy()
    
    # Filter by cuisine preference
    if cuisine:
        filtered = filtered[filtered['Cuisines'].str.contains(cuisine, case=False, na=False)]
    
    # Filter by price range
    if price_min is not None:
        filtered = filtered[filtered['Average Cost for two'] >= price_min]
    if price_max is not None:
        filtered = filtered[filtered['Average Cost for two'] <= price_max]
    
    # Filter by location
    if location:
        filtered = filtered[filtered['City'].str.contains(location, case=False, na=False)]
    
    # Filter by minimum rating
    if min_rating is not None:
        filtered = filtered[filtered['Aggregate rating'] >= min_rating]
    
    # Filter by minimum votes/popularity
    if min_votes is not None:
        filtered = filtered[filtered['Votes'] >= min_votes]
    
    # Sort by rating and votes
    filtered = filtered.sort_values(['Aggregate rating', 'Votes'], ascending=[False, False])
    
    return filtered.head(10)  # Return top 10 recommendations


In [6]:

# Example usage:
recommend_restaurants(df, cuisine='Italian', price_max=200, location='Rio de Janeiro', min_rating=4.0)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
53,7304312,D.O.C Ristorante,30,Rio de Janeiro,"Le Monde, Bloco 3, Lojas A/B, Avenida das Am��...","Le Monde, Barra da Tijuca","Le Monde, Barra da Tijuca, Rio de Janeiro",-43.348792,-22.999911,Italian,...,Brazilian Real(R$),No,No,No,No,4,4.0,Green,Very Good,5


In [7]:
# Test the recommendation system with sample user preferences

# Example 1: Italian cuisine, price max 200, in Rio de Janeiro, min rating 4.0
recommendations = recommend_restaurants(
    df,
    cuisine='Italian',
    price_max=200,
    location='Rio de Janeiro',
    min_rating=4.0
)
print("Sample Recommendations (Italian, <=200, Rio de Janeiro, rating>=4.0):")
print(recommendations[['Restaurant Name', 'Cuisines', 'Average Cost for two', 'City', 'Aggregate rating', 'Votes']])



Sample Recommendations (Italian, <=200, Rio de Janeiro, rating>=4.0):
     Restaurant Name Cuisines  Average Cost for two            City  \
53  D.O.C Ristorante  Italian                   150  Rio de Janeiro   

    Aggregate rating  Votes  
53               4.0      5  


In [8]:
# Example 2: Japanese cuisine, price range 50-300, in Brasília, min rating 3.5
recommendations2 = recommend_restaurants(
    df,
    cuisine='Japanese',
    price_min=50,
    price_max=300,
    location='Brasília',
    min_rating=3.5
)
print("\nSample Recommendations (Japanese, 50-300, Brasília, rating>=3.5):")
print(recommendations2[['Restaurant Name', 'Cuisines', 'Average Cost for two', 'City', 'Aggregate rating', 'Votes']])


Sample Recommendations (Japanese, 50-300, Brasília, rating>=3.5):
Empty DataFrame
Columns: [Restaurant Name, Cuisines, Average Cost for two, City, Aggregate rating, Votes]
Index: []
