In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

import re

In [5]:
df = pd.read_csv('Data/df_aggregated.csv')
schools_df = pd.read_csv('Data/final_schools_data.csv')
mosques_df = pd.read_csv('Data/mousq_final_data.csv')

In [6]:
work_url = 'https://www.google.com/maps/place/LuLu+Hypermarket+-+Riyadh+Avenue+Mall+-+Murabba/@24.662691,46.6995444,15z/data=!4m6!3m5!1s0x3e2f05870f68107f:0x50c5d160b1a95c24!8m2!3d24.6637609!4d46.7036873!16s%2Fg%2F1hd_998vr?entry=ttu'
wife_url = 'https://www.google.com/maps/@24.6474296,46.7198836,18.26z?entry=ttu'

In [7]:
def extract_coordinates(url):
    matches = re.findall(r'@([\d.]+),([\d.]+)', url)
    if matches:
        return matches[0]
    else:
        return None

In [8]:
lon_min, lon_max = 46.4, 47.2
lat_min, lat_max = 24.5, 25.0

np.random.seed(42) 
df['Longitude'] = np.random.uniform(lon_min, lon_max, df.shape[0])
df['Latitude'] = np.random.uniform(lat_min, lat_max, df.shape[0])

In [9]:
def haversine(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    r = 6371 
    return c * r

In [10]:
user_price_per_meter = 5000
wife_location = list(map(float, extract_coordinates(wife_url)))
work_location = list(map(float, extract_coordinates(work_url)))

In [11]:
X_10th = df['10th Percentile']
X_25th = df['25th Percentile']
X_mean = df['Mean']
X_75th = df['75th Percentile']
X_90th = df['90th Percentile']

In [12]:
scaler = StandardScaler()
X_10th = scaler.fit_transform(X_10th.values.reshape(-1, 1))
X_25th = scaler.fit_transform(X_25th.values.reshape(-1, 1))
X_mean = scaler.fit_transform(X_mean.values.reshape(-1, 1))
X_75th = scaler.fit_transform(X_75th.values.reshape(-1, 1))
X_90th = scaler.fit_transform(X_90th.values.reshape(-1, 1))

In [13]:
knn_10th = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_10th)
knn_25th = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_25th)
knn_mean = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_mean)
knn_75th = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_75th)
knn_90th = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_90th)

In [14]:
user_input = [[user_price_per_meter]]
user_input_scaled = scaler.transform(user_input)

distances_10th, indices_10th = knn_10th.kneighbors(user_input_scaled)
candidate_neighborhoods_10th = df.iloc[indices_10th[0]]

distances_25th, indices_25th = knn_25th.kneighbors(user_input_scaled)
candidate_neighborhoods_25th = df.iloc[indices_25th[0]]

distances_mean, indices_mean = knn_mean.kneighbors(user_input_scaled)
candidate_neighborhoods_mean = df.iloc[indices_mean[0]]

distances_75th, indices_75th = knn_75th.kneighbors(user_input_scaled)
candidate_neighborhoods_75th = df.iloc[indices_75th[0]]

distances_90th, indices_90th = knn_90th.kneighbors(user_input_scaled)
candidate_neighborhoods_90th = df.iloc[indices_90th[0]]

In [15]:
candidate_neighborhoods_10th['Distance_to_Work'] = candidate_neighborhoods_10th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], work_location[1], work_location[0]), axis=1)

candidate_neighborhoods_10th['Distance_to_Wife'] = candidate_neighborhoods_10th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], wife_location[1], wife_location[0]), axis=1)

candidate_neighborhoods_25th['Distance_to_Work'] = candidate_neighborhoods_25th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], work_location[1], work_location[0]), axis=1)

candidate_neighborhoods_25th['Distance_to_Wife'] = candidate_neighborhoods_25th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], wife_location[1], wife_location[0]), axis=1)

candidate_neighborhoods_mean['Distance_to_Work'] = candidate_neighborhoods_mean.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], work_location[1], work_location[0]), axis=1)

candidate_neighborhoods_mean['Distance_to_Wife'] = candidate_neighborhoods_mean.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], wife_location[1], wife_location[0]), axis=1)

candidate_neighborhoods_75th['Distance_to_Work'] = candidate_neighborhoods_75th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], work_location[1], work_location[0]), axis=1)

candidate_neighborhoods_75th['Distance_to_Wife'] = candidate_neighborhoods_75th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], wife_location[1], wife_location[0]), axis=1)

candidate_neighborhoods_90th['Distance_to_Work'] = candidate_neighborhoods_90th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], work_location[1], work_location[0]), axis=1)

candidate_neighborhoods_90th['Distance_to_Wife'] = candidate_neighborhoods_90th.apply(
    lambda row: haversine(row['Longitude'], row['Latitude'], wife_location[1], wife_location[0]), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_10th['Distance_to_Work'] = candidate_neighborhoods_10th.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_10th['Distance_to_Wife'] = candidate_neighborhoods_10th.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_25th['D

In [16]:
def count_nearby_schools(lon, lat, schools_df, max_distance=3):
    return sum(haversine(lon, lat, school_lon, school_lat) <= max_distance for school_lat, school_lon in zip(schools_df['latitude'], schools_df['longitude']))

In [17]:
def count_nearby_mosques(lon, lat, mosques_df, max_distance=3):
    return sum(haversine(lon, lat, mosques_lon, mosques_lat) <= max_distance for mosques_lat, mosques_lon in zip(mosques_df['latitude'], mosques_df['longitude']))

In [18]:
candidate_neighborhoods_10th['Nearby_Mosques'] = candidate_neighborhoods_10th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], mosques_df), axis=1)

candidate_neighborhoods_10th['Nearby_Schools'] = candidate_neighborhoods_10th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], schools_df), axis=1)

candidate_neighborhoods_25th['Nearby_Mosques'] = candidate_neighborhoods_25th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], mosques_df), axis=1)

candidate_neighborhoods_25th['Nearby_Schools'] = candidate_neighborhoods_25th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], schools_df), axis=1)

candidate_neighborhoods_mean['Nearby_Mosques'] = candidate_neighborhoods_mean.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], mosques_df), axis=1)

candidate_neighborhoods_mean['Nearby_Schools'] = candidate_neighborhoods_mean.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], schools_df), axis=1)

candidate_neighborhoods_75th['Nearby_Mosques'] = candidate_neighborhoods_75th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], mosques_df), axis=1)

candidate_neighborhoods_75th['Nearby_Schools'] = candidate_neighborhoods_75th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], schools_df), axis=1)

candidate_neighborhoods_90th['Nearby_Mosques'] = candidate_neighborhoods_90th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], mosques_df), axis=1)

candidate_neighborhoods_90th['Nearby_Schools'] = candidate_neighborhoods_90th.apply(
    lambda row: count_nearby_schools(row['Longitude'], row['Latitude'], schools_df), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_10th['Nearby_Mosques'] = candidate_neighborhoods_10th.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_10th['Nearby_Schools'] = candidate_neighborhoods_10th.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_25th['Nearb

In [19]:
candidate_neighborhoods_10th['Combined_Score'] = (candidate_neighborhoods_10th['Distance_to_Work'] + candidate_neighborhoods_10th['Distance_to_Wife']) / 2
candidate_neighborhoods_10th = candidate_neighborhoods_10th.sort_values(by='Combined_Score')

candidate_neighborhoods_25th['Combined_Score'] = (candidate_neighborhoods_25th['Distance_to_Work'] + candidate_neighborhoods_25th['Distance_to_Wife']) / 2
candidate_neighborhoods_25th = candidate_neighborhoods_25th.sort_values(by='Combined_Score')

candidate_neighborhoods_mean['Combined_Score'] = (candidate_neighborhoods_mean['Distance_to_Work'] + candidate_neighborhoods_mean['Distance_to_Wife']) / 2
candidate_neighborhoods_mean = candidate_neighborhoods_mean.sort_values(by='Combined_Score')

candidate_neighborhoods_75th['Combined_Score'] = (candidate_neighborhoods_75th['Distance_to_Work'] + candidate_neighborhoods_75th['Distance_to_Wife']) / 2
candidate_neighborhoods_75th = candidate_neighborhoods_75th.sort_values(by='Combined_Score')

candidate_neighborhoods_90th['Combined_Score'] = (candidate_neighborhoods_90th['Distance_to_Work'] + candidate_neighborhoods_90th['Distance_to_Wife']) / 2
candidate_neighborhoods_90th = candidate_neighborhoods_90th.sort_values(by='Combined_Score')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_10th['Combined_Score'] = (candidate_neighborhoods_10th['Distance_to_Work'] + candidate_neighborhoods_10th['Distance_to_Wife']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candidate_neighborhoods_25th['Combined_Score'] = (candidate_neighborhoods_25th['Distance_to_Work'] + candidate_neighborhoods_25th['Distance_to_Wife']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation

In [20]:
recommended_neighborhoods_10th = candidate_neighborhoods_10th[['الحي', 'Distance_to_Work', 'Distance_to_Wife', 'Nearby_Schools', 'Combined_Score', 'Nearby_Mosques']]
recommended_neighborhoods_25th = candidate_neighborhoods_25th[['الحي', 'Distance_to_Work', 'Distance_to_Wife', 'Nearby_Schools', 'Combined_Score', 'Nearby_Mosques']]
recommended_neighborhoods_mean = candidate_neighborhoods_mean[['الحي', 'Distance_to_Work', 'Distance_to_Wife', 'Nearby_Schools', 'Combined_Score', 'Nearby_Mosques']]
recommended_neighborhoods_75th = candidate_neighborhoods_75th[['الحي', 'Distance_to_Work', 'Distance_to_Wife', 'Nearby_Schools', 'Combined_Score', 'Nearby_Mosques']]
recommended_neighborhoods_90th = candidate_neighborhoods_90th[['الحي', 'Distance_to_Work', 'Distance_to_Wife', 'Nearby_Schools', 'Combined_Score', 'Nearby_Mosques']]

In [21]:
recommended_neighborhoods_10th.reset_index(drop=True, inplace=True)
recommended_neighborhoods_10th.index += 1
recommended_neighborhoods_10th

Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques
1,البيان,29.600323,29.805752,1,29.703037,0
2,غرناطة,37.866356,40.531639,0,39.198998,0
3,القادسية,46.638932,45.181531,0,45.910232,0
4,الدريهمية,46.975944,45.962167,0,46.469055,0
5,ظهرة العودة,54.654751,54.213043,0,54.433897,0


In [22]:
recommended_neighborhoods_25th.reset_index(drop=True, inplace=True)
recommended_neighborhoods_25th.index += 1
recommended_neighborhoods_25th

Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques
1,الصالحية,13.927045,13.121068,27,13.524057,15
2,الرمال,29.473715,30.14576,0,29.809737,1
3,النهضة,35.673307,33.060767,0,34.367037,0
4,الشعاب,41.029975,38.610831,0,39.820403,0
5,القادسية,46.638932,45.181531,0,45.910232,0


In [23]:
recommended_neighborhoods_mean.reset_index(drop=True, inplace=True)
recommended_neighborhoods_mean.index += 1
recommended_neighborhoods_mean

Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques
1,المهدية,22.96004,25.600717,0,24.280379,0
2,الرمال,29.473715,30.14576,0,29.809737,1
3,النهضة,35.673307,33.060767,0,34.367037,0
4,الزهراء,35.084238,33.96723,0,34.525734,0
5,القادسية,51.61245,51.395415,0,51.503932,0


In [24]:
recommended_neighborhoods_75th.reset_index(drop=True, inplace=True)
recommended_neighborhoods_75th.index += 1
recommended_neighborhoods_75th

Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques
1,معكال,20.505793,17.863146,0,19.184469,0
2,النموذجية,27.123256,24.524219,0,25.823737,0
3,الطندباوى,28.80383,30.710572,0,29.757201,0
4,النهضة,35.673307,33.060767,0,34.367037,0
5,لبن,34.803187,34.78695,0,34.795069,0


In [25]:
recommended_neighborhoods_90th.reset_index(drop=True, inplace=True)
recommended_neighborhoods_90th.index += 1
recommended_neighborhoods_90th

Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques
1,المهدية,15.649558,18.043504,86,16.846531,28
2,الخليج,18.702112,21.360344,0,20.031228,2
3,لبن,34.803187,34.78695,0,34.795069,0
4,أم سليم,36.99336,39.365299,0,38.17933,0
5,ام سليم,39.265811,41.92754,0,40.596676,0


In [26]:
df_10th = recommended_neighborhoods_10th.head(2)
df_25th = recommended_neighborhoods_25th.head(2)
df_mean = recommended_neighborhoods_mean.head(2)
df_75th = recommended_neighborhoods_75th.head(2)
df_90th = recommended_neighborhoods_90th.head(2)

df_10th['Color'] = 'Red'
df_10th['percentile'] = '10th'
df_25th['Color'] = 'Blue'
df_25th['percentile'] = '25th'
df_mean['Color'] = 'Green'
df_mean['percentile'] = 'Mean'
df_75th['Color'] = 'Yellow'
df_75th['percentile'] = '75th'
df_90th['Color'] = 'Orange'
df_90th['percentile'] = '90th'

combined_df = pd.concat([df_10th, df_25th, df_mean, df_75th, df_90th])

combined_df.reset_index(drop=True, inplace=True)

combined_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_10th['Color'] = 'Red'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_10th['percentile'] = '10th'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_25th['Color'] = 'Blue'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_90th['Color'] = 'Orange'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_90th['percentile'] = '90th'


Unnamed: 0,الحي,Distance_to_Work,Distance_to_Wife,Nearby_Schools,Combined_Score,Nearby_Mosques,Color,percentile
0,البيان,29.600323,29.805752,1,29.703037,0,Red,10th
1,غرناطة,37.866356,40.531639,0,39.198998,0,Red,10th
2,الصالحية,13.927045,13.121068,27,13.524057,15,Blue,25th
3,الرمال,29.473715,30.14576,0,29.809737,1,Blue,25th
4,المهدية,22.96004,25.600717,0,24.280379,0,Green,Mean
5,الرمال,29.473715,30.14576,0,29.809737,1,Green,Mean
6,معكال,20.505793,17.863146,0,19.184469,0,Yellow,75th
7,النموذجية,27.123256,24.524219,0,25.823737,0,Yellow,75th
8,المهدية,15.649558,18.043504,86,16.846531,28,Orange,90th
9,الخليج,18.702112,21.360344,0,20.031228,2,Orange,90th
