*PLACES RECOMMENDATION ENGINE!*

# Load data



In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load data
places_df = pd.read_csv('places.csv')
places_df

# Preprocess data

In [None]:
places_df.dropna()

In [None]:
places_df['categories'] = places_df['categories'].apply(lambda x: x.split(';'))
# Split categories on semicolons and remove quotation marks
places_df['categories'] = places_df['categories'].str.strip('"').str.split(';')
categories = set([cat for cats in places_df['categories'] for cat in cats])
categories

In [None]:
for cat in categories:
    places_df[cat] = places_df['categories'].apply(lambda x: int(cat in x))

In [None]:
places_df = places_df.drop(['categories'], axis=1)

In [None]:
# Compute item similarity
cosine_sim = cosine_similarity(places_df.drop(['place_id', 'place_name'], axis=1))

# Define function to recommend places based on user preferences


In [None]:
def recommend_places(user_preferences, n_recommendations=5):
    # Create user profile vector
    user_profile = pd.Series(index=categories, data=0)
    for pref in user_preferences:
        user_profile[pref] = 1
    
    # Compute user similarity to items
    user_sim = cosine_similarity([user_profile], places_df.drop(['place_id', 'place_name'], axis=1))
    
    # Get top n similar items
    sim_scores = list(enumerate(user_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n_recommendations+1]
    
    # Get recommended places
    place_indices = [i[0] for i in sim_scores]
    recommendations = places_df[['place_name', 'categories']].iloc[place_indices].values.tolist()
    
    return recommendations

In [None]:
# Example usage: recommend 5 places for someone who likes 
recommendations = recommend_places(user_preferences=['Tombs'], n_recommendations=2)
print(recommendations)

# *# V2*

In [None]:
import pandas as pd
import numpy as np
!pip install surprise
from surprise import Dataset, Reader, SVD, KNNBasic
from geopy.distance import geodesic
from typing import List, Tuple

# Read in the CSV file
df = pd.read_csv('egypt_tourist_locations.csv')

# Define a function to calculate the distance between two locations
def calculate_distance(lat1: float, long1: float, lat2: float, long2: float) -> float:
    return geodesic((lat1, long1), (lat2, long2)).km

# Define a function to get recommended locations based on previously rated locations
def recommended_locations(rated_locations: List[str], num_recommendations: int=5) -> List[Tuple[str, float]]:
    # Calculate the mean rating, latitude, and longitude for each location
    mean_ratings = df.groupby('location')['rating'].mean().reset_index()
    mean_latitude = df.groupby('location')['latitude'].mean().reset_index()
    mean_longitude = df.groupby('location')['longitude'].mean().reset_index()

    # Create a DataFrame of the previously rated locations
    rated_df = pd.DataFrame({'location_name': rated_locations})


    return recommended_locations


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
rated_locations = ["Alexandria Library", "Cleopatra's Palace", "Pyramids of Giza"]
# Calculate the mean rating, latitude, and longitude for each location
mean_ratings = df.groupby('location')['rating'].mean().reset_index()
mean_latitude = df.groupby('location')['latitude'].mean().reset_index()
mean_longitude = df.groupby('location')['longitude'].mean().reset_index()

# Create a DataFrame of the previously rated locations
rated_df = pd.DataFrame({'location_name': rated_locations})

In [None]:
# Merge the mean ratings, latitude, and longitude with the rated locations DataFrame
rated_df = pd.merge(rated_df, df[['location', 'location_name']].drop_duplicates(), on='location_name', how='left')

rated_df = pd.merge(rated_df, mean_ratings, on='location', how='left')
rated_df = pd.merge(rated_df, mean_latitude, on='location', how='left')
rated_df = pd.merge(rated_df, mean_longitude, on='location', how='left')

In [None]:
rated_df.head()

Unnamed: 0,location_name,location,rating,latitude,longitude
0,Alexandria Library,Alexandria,4.466667,31.227117,29.933017
1,Cleopatra's Palace,Alexandria,4.466667,31.227117,29.933017
2,Pyramids of Giza,Giza,4.6,29.91165,31.165417


In [None]:
# Drop any rows with missing values
rated_df.dropna(inplace=True)

# Create a list of the unique locations
locations = df['location_name'].unique()

# Create a list of the previously rated locations
rated_locations = rated_df['location_name'].unique()

# Create a list of unrated locations
unrated_locations = np.setdiff1d(locations, rated_locations)

# Create a DataFrame of the unrated locations
unrated_df = pd.DataFrame({'location_name': unrated_locations})

# Merge the mean ratings, latitude, and longitude with the unrated locations DataFrame
unrated_df = pd.merge(unrated_df, df[['location', 'location_name']].drop_duplicates(), on='location_name', how='left')
unrated_df = pd.merge(unrated_df, mean_ratings, on='location', how='left')
unrated_df = pd.merge(unrated_df, mean_latitude, on='location', how='left')
unrated_df = pd.merge(unrated_df, mean_longitude, on='location', how='left')

# Calculate the distance between each unrated location and the nearest rated location
unrated_df['distance_to_rated'] = unrated_df.apply(
    lambda row: min([geodesic((row['latitude'], row['longitude']), 
                              (rated_df[rated_df['location_name']==loc][['latitude', 'longitude']].iloc[0])
                             ).km for loc in rated_locations]), axis=1
)

# Sort the unrated_df by the distance_to_rated column
unrated_df = unrated_df.sort_values('distance_to_rated')


In [None]:
# unrated_df = unrated_df[::-1]
# unrated_df.head(3)
unrated_df

Unnamed: 0,location_name,location,rating,latitude,longitude,distance_to_rated
35,Step Pyramid of Djoser,Giza,4.6,29.91165,31.165417,0.0
33,Sphinx,Giza,4.6,29.91165,31.165417,0.0
31,Saqqara,Giza,4.6,29.91165,31.165417,0.0
28,Qaitbay Citadel,Alexandria,4.466667,31.227117,29.933017,0.0
27,Pompey's Pillar,Alexandria,4.466667,31.227117,29.933017,0.0
20,Montaza Palace,Alexandria,4.466667,31.227117,29.933017,0.0
15,Giza Necropolis,Giza,4.6,29.91165,31.165417,0.0
12,Dahshur,Giza,4.6,29.91165,31.165417,0.0
10,Catacombs of Kom el Shoqafa,Alexandria,4.466667,31.227117,29.933017,0.0
0,Abdeen Palace,Cairo,4.435,30.030185,31.245205,15.230115


In [None]:
# Fill any missing values with the overall mean rating
unrated_df['rating'] = unrated_df['rating'].fillna(df['rating'].mean())

# Rename the 'location' column to 'location_name'
# unrated_df = unrated_df.rename(columns={'location': 'location_name'})
# rated_df = rated_df.rename(columns={'location': 'location_name'})

In [None]:
unrated_df.head(3)

Unnamed: 0,location_name,location,rating,latitude,longitude,distance_to_rated
35,Step Pyramid of Djoser,Giza,4.6,29.91165,31.165417,0.0
33,Sphinx,Giza,4.6,29.91165,31.165417,0.0
31,Saqqara,Giza,4.6,29.91165,31.165417,0.0


In [None]:
# Fill any missing values with the overall mean rating
unrated_df['rating'] = unrated_df['rating'].fillna(df['rating'].mean())

# Create a reader and a dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'location', 'rating']], reader)

# Train a KNNBasic algorithm on the data
sim_options = {'name': 'cosine', 'user_based': False}
algo_knn = KNNBasic(sim_options=sim_options)
trainset_knn = data.build_full_trainset()
algo_knn.fit(trainset_knn)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f1ba371c9d0>

In [None]:
df.loc[df['location_name'] == rated_location].iloc[0]['user_id']

In [None]:
distances, indices, _ = algo_knn.get_neighbors(1, k=3)

In [None]:
nearest_neighbors  = []
indices_series = pd.Series(indices)
nearest_neighbors.extend(df.loc[df['user_id'].isin(indices_series)][['location_name', 'latitude', 'longitude']].values.tolist())
nearest_neighbors = list(set([tuple(location) for location in nearest_neighbors]))
nearest_neighbors

[('Al-Azhar Mosque', 30.0459, 31.2627),
 ('El Moez Street', 30.0398, 31.2609),
 ('Aswan Botanical Garden', 24.0737, 32.8789),
 ('Cairo Tower', 30.0455, 31.2243),
 ('Philae Temple', 24.0256, 32.8853),
 ('Abdeen Palace', 30.0432, 31.2423),
 ('Museum of Islamic Art', 30.0483, 31.2299),
 ('Al-Muizz Street', 30.0404, 31.2494),
 ('Egyptian Museum', 30.0478, 31.2336)]

In [None]:
nearest_neighbors = []
for rated_location in rated_locations:
    user_id = df.loc[df['location_name'] == rated_location].iloc[0]['user_id']
    distances, indices, _ = algo_knn.get_neighbors(user_id, k=3)
    indices_series = pd.Series(indices)
    nearest_neighbors.extend(df.loc[df['user_id'].isin(indices_series)][['location_name', 'latitude', 'longitude']].values.tolist())


In [None]:
# Remove any duplicates from the nearest neighbor locations
nearest_neighbors = list(set([tuple(location) for location in nearest_neighbors]))

In [None]:
nearest_neighbors

[("Saint Catherine's Monastery", 28.5538, 33.9759),
 ("St. Anthony's Monastery", 28.5606, 33.9049),
 ('El Moez Street', 30.0398, 31.2609),
 ('Al-Azhar Mosque', 30.0459, 31.2627),
 ('Aswan Botanical Garden', 24.0737, 32.8789),
 ('Sharm El Sheikh', 27.9158, 34.3294),
 ('Ras Mohammed National Park', 27.7559, 34.2629),
 ('Hurghada', 27.2579, 33.8116),
 ('Mount Sinai', 28.5392, 33.9757),
 ('Philae Temple', 24.0256, 32.8853),
 ('Cairo Tower', 30.0455, 31.2243),
 ('Abdeen Palace', 30.0432, 31.2423),
 ('Museum of Islamic Art', 30.0483, 31.2299),
 ('Al-Muizz Street', 30.0404, 31.2494),
 ('Egyptian Museum', 30.0478, 31.2336)]

In [None]:
# Calculate the distance between the nearest neighbor locations and the previously rated locations
nearest_neighbor_distances = []
for nearest_neighbor in nearest_neighbors:
    distance = np.mean(
        [calculate_distance(
            nearest_neighbor[1], 
            nearest_neighbor[2], 
            rated_location['latitude'], 
            rated_location['longitude']
        ) for rated_location in rated_df.to_dict('records')]
    )

    nearest_neighbor_distances.append((nearest_neighbor[0], distance))

In [None]:
nearest_neighbors

[("Saint Catherine's Monastery", 28.5538, 33.9759),
 ("St. Anthony's Monastery", 28.5606, 33.9049),
 ('El Moez Street', 30.0398, 31.2609),
 ('Al-Azhar Mosque', 30.0459, 31.2627),
 ('Aswan Botanical Garden', 24.0737, 32.8789),
 ('Sharm El Sheikh', 27.9158, 34.3294),
 ('Ras Mohammed National Park', 27.7559, 34.2629),
 ('Hurghada', 27.2579, 33.8116),
 ('Mount Sinai', 28.5392, 33.9757),
 ('Philae Temple', 24.0256, 32.8853),
 ('Cairo Tower', 30.0455, 31.2243),
 ('Abdeen Palace', 30.0432, 31.2423),
 ('Museum of Islamic Art', 30.0483, 31.2299),
 ('Al-Muizz Street', 30.0404, 31.2494),
 ('Egyptian Museum', 30.0478, 31.2336)]

In [None]:
for rated_location in rated_df.to_dict('records'):
  print(rated_location['latitude']) 
  print(rated_location['longitude']) 

31.227116666666664
29.933016666666663
31.227116666666664
29.933016666666663
29.911649999999998
31.16541666666667


In [None]:
# Sort the nearest neighbor locations by distance (in ascending order)
nearest_neighbor_distances.sort(key=lambda x: x[1])

# Get the top 3 recommended locations from the nearest neighbor locations
recommendations = nearest_neighbor_distances[:7]

# Return a list of the recommended locations
recommended_locations = [recommendation[0] for recommendation in recommendations]
recommended_locations

['Cairo Tower',
 'Museum of Islamic Art',
 'Egyptian Museum',
 'Abdeen Palace',
 'Al-Muizz Street',
 'Al-Azhar Mosque',
 'El Moez Street']