In [4]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from geopy.distance import geodesic

In [2]:
# Load the Excel file into a Pandas DataFrame
excel_file_path = '/content/Nasr City Restaurants.xlsx'
nasr_city_df = pd.read_excel(excel_file_path)

In [3]:
# Display the first few rows of the DataFrame
print(nasr_city_df.head())

                                               title              cuisine  \
0  كشري الشيخ - koshary El Sheikh‏ (فرع سوق السيا...   Koshari restaurant   
1                          El Abd, Eastern Nasr city   Dessert restaurant   
2                                     Shikh El Balad   Falafel restaurant   
3                                     Gad Restaurant  Egyptian restaurant   
4                           PRODO Restaurant | برودو     Pizza restaurant   

   rating  reviewsCount                                             street  \
0     3.9           188  محطه الجامع سوق السيارات، 79 Al Swissri Marhala B   
1     4.5            40                               شارع Dr Ismail Wahbi   
2     3.8            68  29WG+9CX، شارع محطة المدرسه الحي العاشر قسم مد...   
3     3.6          1514                                  88 El Merghany St   
4     3.4            18                               48 جامع الحاجة اسماء   

                                                 url  location/lat  

In [5]:
# Remove duplicate titles in the same location and keep the one with the highest reviews count
nasr_city_df = nasr_city_df.sort_values('reviewsCount', ascending=False)
nasr_city_df = nasr_city_df.drop_duplicates(subset=['title', 'location/lat', 'location/lng'], keep='first')

In [6]:
nasr_city_df.duplicated().sum()

0

In [7]:
# Flag all Koshary restaurants based on the "cuisine" column
nasr_city_df['Is_Koshary'] = nasr_city_df['cuisine'].str.contains('Koshari restaurant', case=False, na=False)


In [8]:
# Display the DataFrame to check the columns and first few rows after processing
print("Processed DataFrame:")
print(nasr_city_df.head())

Processed DataFrame:
                        title             cuisine  rating  reviewsCount  \
557       Abou Ramy nasr city       Sandwich shop     3.9         11743   
632  Murano Restaurant & Cafe  Italian restaurant     4.4          9796   
691         Bistro Heliopolis  Italian restaurant     4.3          7583   
474              Costa Coffee         Coffee shop     4.2          7006   
650      Zack's Fried Chicken  Chicken restaurant     3.9          6201   

                                                street  \
557                                    3 El Tayaran St   
632  عمر بن الخطاب، مساكن المهندسين، مدينة نصر، محا...   
691  33 عمر بن الخطاب ميدان الاسماعيلية 33 Omar Ibn...   
474                               37 Abbas El Akkad St   
650                                           389J+22X   

                                                   url  location/lat  \
557  https://www.google.com/maps/search/?api=1&quer...     30.043799   
632  https://www.google.com/maps/se

In [9]:
# Filter the DataFrame to include only Koshary restaurants
koshary_df = nasr_city_df[nasr_city_df['Is_Koshary']]

In [10]:
# Function to calculate the distance between two coordinates
def calculate_distance(row1, row2):
    coords_1 = (row1['location/lat'], row1['location/lng'])
    coords_2 = (row2['location/lat'], row2['location/lng'])
    return geodesic(coords_1, coords_2).meters

In [11]:
# Initialize variables to find the closest pair of Koshary restaurants
min_distance = float('inf')
closest_pair = (None, None)

In [12]:
# Find the two closest Koshary restaurants
for i, row1 in koshary_df.iterrows():
    for j, row2 in koshary_df.iterrows():
        if i != j:
            distance = calculate_distance(row1, row2)
            if distance < min_distance:
                min_distance = distance
                closest_pair = (row1['title'], row2['title'])

print(f"The closest Koshary restaurants are: {closest_pair[0]} and {closest_pair[1]} with a distance of {min_distance} meters.")


The closest Koshary restaurants are: كشرى حلوانى ابو حنفى and كشري الأمور with a distance of 488.7726675500647 meters.


In [13]:
# Save the DataFrame with the flagged Koshary restaurants
output_path = '/content/Nasr_City_Restaurants_Flagged.xlsx'
nasr_city_df.to_excel(output_path, index=False)

# Download the file
from google.colab import files
files.download(output_path)

# Display the final DataFrame to verify
print("Final DataFrame:")
print(nasr_city_df.head())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Final DataFrame:
                        title             cuisine  rating  reviewsCount  \
557       Abou Ramy nasr city       Sandwich shop     3.9         11743   
632  Murano Restaurant & Cafe  Italian restaurant     4.4          9796   
691         Bistro Heliopolis  Italian restaurant     4.3          7583   
474              Costa Coffee         Coffee shop     4.2          7006   
650      Zack's Fried Chicken  Chicken restaurant     3.9          6201   

                                                street  \
557                                    3 El Tayaran St   
632  عمر بن الخطاب، مساكن المهندسين، مدينة نصر، محا...   
691  33 عمر بن الخطاب ميدان الاسماعيلية 33 Omar Ibn...   
474                               37 Abbas El Akkad St   
650                                           389J+22X   

                                                   url  location/lat  \
557  https://www.google.com/maps/search/?api=1&quer...     30.043799   
632  https://www.google.com/maps/search