In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder

In [3]:
restaurants=pd.read_csv('egyptian_restaurants.csv')
ratings=pd.read_csv('Ratings.csv')

In [5]:
restaurants.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Category,City,Area,Longitude,Latitude
0,1,30 North,Cafe,Alexandria,Gleem,29.901193,31.186937
1,2,Indira,Asian Cuisine,Alexandria,Montaza,29.920624,31.206004
2,3,Seoudi,Casual Dining,Cairo,Nasr City,31.245766,30.041615
3,4,Paul,Bakery,Alexandria,Sidi Gaber,29.9064,31.189993
4,5,Wel3a,Grill,Sharm El Sheikh,Sharks Bay,34.325296,27.906238


In [7]:
ratings.head()

Unnamed: 0,User ID,Restaurant ID,Rating
0,3079,831,4
1,73,188,2
2,8583,514,3
3,6466,676,3
4,7292,603,2


In [9]:
ratings.shape

(500000, 3)

In [11]:
restaurants.shape

(1000, 7)

In [13]:
restaurants.isna().sum()

Restaurant ID      0
Restaurant Name    0
Category           0
City               0
Area               0
Longitude          0
Latitude           0
dtype: int64

In [15]:
ratings.isna().sum()

User ID          0
Restaurant ID    0
Rating           0
dtype: int64

In [17]:
ratings=ratings.drop_duplicates(subset=["User ID","Restaurant ID"],keep="last")

In [19]:
ratings.shape

(489672, 3)

In [21]:
ratings.head()

Unnamed: 0,User ID,Restaurant ID,Rating
0,3079,831,4
1,73,188,2
2,8583,514,3
3,6466,676,3
4,7292,603,2


In [23]:
ratings.isna().sum()

User ID          0
Restaurant ID    0
Rating           0
dtype: int64

In [25]:
average_ratings=ratings.groupby('Restaurant ID')['Rating'].mean().reset_index()

In [27]:
average_ratings.rename(columns={"Rating": "Avg_Rating"}, inplace=True)
average_ratings

Unnamed: 0,Restaurant ID,Avg_Rating
0,1,3.051040
1,2,3.043307
2,3,3.028747
3,4,3.015474
4,5,2.913580
...,...,...
995,996,2.975983
996,997,3.073852
997,998,2.950884
998,999,2.915547


In [29]:
restaurants=restaurants.merge(average_ratings,on='Restaurant ID',how='left')

In [31]:
restaurants.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Category,City,Area,Longitude,Latitude,Avg_Rating
0,1,30 North,Cafe,Alexandria,Gleem,29.901193,31.186937,3.05104
1,2,Indira,Asian Cuisine,Alexandria,Montaza,29.920624,31.206004,3.043307
2,3,Seoudi,Casual Dining,Cairo,Nasr City,31.245766,30.041615,3.028747
3,4,Paul,Bakery,Alexandria,Sidi Gaber,29.9064,31.189993,3.015474
4,5,Wel3a,Grill,Sharm El Sheikh,Sharks Bay,34.325296,27.906238,2.91358


In [33]:
print(restaurants['Avg_Rating'].isna().sum())

0


In [35]:
combined = restaurants[["Category", "City", "Area"]]
encoder = OneHotEncoder(sparse_output=False)
encoded_array = encoder.fit_transform(combined)
encoded_features = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(combined.columns))
scaler = MinMaxScaler()
encoded_features_scaled = scaler.fit_transform(encoded_features)
similarity_matrix = cosine_similarity(encoded_features_scaled)

# similarity_df = pd.DataFrame(similarity_matrix, index=restaurants["Restaurant Name"], columns=restaurants["Restaurant Name"])
# similarity_df

In [143]:
def content_recommender(restaurant_id, top_n=5):
    if restaurant_id not in restaurants["Restaurant ID"].values:
        print(f"Restaurant ID '{restaurant_id}' not found in dataset!")
        return None
    
    idx = restaurants.index[restaurants["Restaurant ID"] == restaurant_id][0] 
    restaurant_name = restaurants.iloc[idx]["Restaurant Name"]  
    
    sim_scores = list(enumerate(similarity_matrix[idx].tolist()))
    # print(sim_scores[:5])
    
    sim_scores = sorted(
        sim_scores, 
        key=lambda x: (x[1], restaurants.iloc[x[0]]["Avg_Rating"]), 
        reverse=True
    )

    sim_scores = sim_scores[:top_n+1]
    recommended_restaurants = restaurants.iloc[[i[0] for i in sim_scores]][["Restaurant ID", "Restaurant Name", "Avg_Rating"]]
    recommended_restaurants = recommended_restaurants[recommended_restaurants["Restaurant Name"] != restaurant_name]

    recommended_restaurants = recommended_restaurants.drop_duplicates(subset=["Restaurant Name"], keep="first")
    recommended_restaurants = recommended_restaurants.reset_index(drop=True)
    return recommended_restaurants


In [151]:
restaurant_id = 27
recommendations = content_recommender(restaurant_id)
if recommendations is not None:
    print(recommendations)
else:
    print("No recommendations found!")



   Restaurant ID Restaurant Name  Avg_Rating
0            630        30 North    2.983471
1            115         Beano's    2.949474


In [157]:
def evaluate_recommendations(restaurant_id, top_n=5):
    recommended_restaurants = content_recommender(restaurant_id, top_n)
    
    if recommended_restaurants is None:
        return None

    unique_recommendations = recommended_restaurants["Restaurant Name"].nunique()
   
    idx = restaurants.index[restaurants["Restaurant ID"] == restaurant_id][0]
    input_restaurant_name = restaurants.iloc[idx]["Restaurant Name"]

    sim_scores = list(enumerate(similarity_matrix[idx].tolist()))
    sim_scores = sorted(sim_scores, key=lambda x: (x[1], restaurants.iloc[x[0]]["Avg_Rating"]), reverse=True)
    
    unique_sim_scores = []
    seen_names = set()
    for score in sim_scores:
        r_name = restaurants.iloc[score[0]]["Restaurant Name"]
        if r_name not in seen_names and r_name != input_restaurant_name:
            unique_sim_scores.append(score)
            seen_names.add(r_name)
    
    top_recs = unique_sim_scores[:top_n]

    rec_set = set((restaurants.iloc[idx_val]["Restaurant ID"], restaurants.iloc[idx_val]["Restaurant Name"]) for idx_val, _ in top_recs)
    rec_from_df = set((row["Restaurant ID"], row["Restaurant Name"]) for _, row in recommended_restaurants.iterrows())
    
    common_recommendations = rec_set.intersection(rec_from_df)

    accuracy = (len(common_recommendations) / top_n) * 100
    diversity = (unique_recommendations / top_n) * 100

    return unique_recommendations, common_recommendations, accuracy, diversity, top_recs


In [159]:
restaurant_id = 27
result = evaluate_recommendations(restaurant_id)

if result is not None:
    unique_recommendations, common_recommendations, accuracy, diversity, top_recs = result

    print(f"\n Unique recommendations: {unique_recommendations}")

    print("\n Top recommendations based on similarity (ID, Name, Similarity):")
    for idx_val, score in top_recs:
        rid = restaurants.iloc[idx_val]["Restaurant ID"]
        name = restaurants.iloc[idx_val]["Restaurant Name"]
        print(f"    - Restaurant ID: {rid}, Name: {name}, Similarity: {score:.4f}")

    print("\n Common recommendations from most similar to least:")
    for idx_val, _ in top_recs:
        rid = restaurants.iloc[idx_val]["Restaurant ID"]
        name = restaurants.iloc[idx_val]["Restaurant Name"]
        if (rid, name) in common_recommendations:
            print(f"Restaurant ID: {rid}, Name: {name}")


    print(f"\n Accuracy of the recommendations: {accuracy:.2f}%")
    print(f"Diversity of the recommendations: {diversity:.2f}%")

else:
    print("No recommendations available for evaluation.")



 Unique recommendations: 2

 Top recommendations based on similarity (ID, Name, Similarity):
    - Restaurant ID: 630, Name: 30 North, Similarity: 1.0000
    - Restaurant ID: 115, Name: Beano's, Similarity: 1.0000
    - Restaurant ID: 661, Name: Peking, Similarity: 0.6667
    - Restaurant ID: 513, Name: Buffalo Burger, Similarity: 0.6667
    - Restaurant ID: 523, Name: Sizzler, Similarity: 0.6667

 Common recommendations from most similar to least:
Restaurant ID: 630, Name: 30 North
Restaurant ID: 115, Name: Beano's

 Accuracy of the recommendations: 40.00%
Diversity of the recommendations: 40.00%
