In [1]:
# Import libraries
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium
from tqdm import tqdm
from PIL import Image

In [2]:
# Read the excel files for each type of location
airport = pd.read_excel('uncleaned_data/airport.xlsx')
chapels = pd.read_excel('uncleaned_data/chapels.xlsx')
cinema = pd.read_excel('uncleaned_data/cinemas.xlsx')
city_center = pd.read_excel('uncleaned_data/city_center.xlsx')
custody_penitentiary = pd.read_excel('uncleaned_data/custodies_penitentiaries.xlsx')
district_court = pd.read_excel('uncleaned_data/district_court.xlsx')
emergency_room = pd.read_excel('uncleaned_data/emergency_rooms.xlsx')
football_stadium = pd.read_excel('uncleaned_data/football_stadiums.xlsx')
grocery_store = pd.read_excel('uncleaned_data/grocery_stores.xlsx')
gym = pd.read_excel('uncleaned_data/gyms.xlsx')
high_school = pd.read_excel('uncleaned_data/high_schools.xlsx')
horseback_riding_center = pd.read_excel('uncleaned_data/horseback_riding_center.xlsx')
hospital = pd.read_excel('uncleaned_data/hospitals.xlsx')
indoor_swimming_pool = pd.read_excel('uncleaned_data/indoor_swimming_pool.xlsx')
municipal_police_department = pd.read_excel('uncleaned_data/municipal_police_departments.xlsx')
museum = pd.read_excel('uncleaned_data/museums.xlsx')
natural_swimming_pool = pd.read_excel('uncleaned_data/natural_swimming_pools.xlsx')
orlik_soccer_field = pd.read_excel('uncleaned_data/orlik_soccer_fields.xlsx')
park = pd.read_excel('uncleaned_data/parks.xlsx')
pharmacy = pd.read_excel('uncleaned_data/pharmacies.xlsx')
police_station = pd.read_excel('uncleaned_data/police_stations.xlsx')
primary_care_units = pd.read_excel('uncleaned_data/primary_care_units.xlsx')
primary_school = pd.read_excel('uncleaned_data/primary_schools.xlsx')
river = pd.read_excel('uncleaned_data/river.xlsx')
roman_catholic_church = pd.read_excel('uncleaned_data/roman_catholic_church.xlsx')
rope_park = pd.read_excel('uncleaned_data/rope_parks.xlsx')
rossmann = pd.read_excel('uncleaned_data/rossmanns.xlsx')
shopping_center = pd.read_excel('uncleaned_data/shopping_centers.xlsx')
street_workout = pd.read_excel('uncleaned_data/street_workouts.xlsx')
tax_administration_chamber = pd.read_excel('uncleaned_data/tax_administration_chambers.xlsx')
tax_office = pd.read_excel('uncleaned_data/tax_offices.xlsx')
technical_college = pd.read_excel('uncleaned_data/technical_colleges.xlsx')
vocational_school = pd.read_excel('uncleaned_data/vocational_schools.xlsx')
zabka = pd.read_excel('uncleaned_data/zabka_shops.xlsx')

In [3]:
def get_lat_long(dataframe):
    # Initialize the geolocator using the Nominatim class from the geopy library
    geolocator = Nominatim(user_agent="http")

    # Initialize the progress bar
    pbar = tqdm(total=len(dataframe))

    # Iterate through each row in the dataframe
    for index, row in dataframe.iterrows():
        # Get the address from the current row
        address = row['Address']
    
        # Geocode the address using the geolocator
        location = geolocator.geocode(address)
    
        # Try to update the latitude column in the dataframe
        try:
            dataframe.loc[index, 'Latitude'] = location.latitude
        except:
            # If an error occurs, set the latitude to "N/A" and print a message
            dataframe.loc[index, 'Latitude'] = "N/A"
            print(f"{address} could not be reached for the latitude")

        # Try to update the longitude column in the dataframe
        try:
            dataframe.loc[index, 'Longitude'] = location.longitude
        except:
            # If an error occurs, set the longitude to "N/A" and print a message
            dataframe.loc[index, 'Longitude'] = "N/A"
            print(f"{address} could not be reached for the longitude")

        # Update the progress bar
        pbar.update(1)

    # Close the progress bar
    pbar.close()

In [4]:
def plot_map(df):
    
    # Create a map centered at the mean coordinates of the DataFrame
    lat_mean = df['Latitude'].mean()
    lon_mean = df['Longitude'].mean()
    m = folium.Map(location=[lat_mean, lon_mean], zoom_start=12)

    # Add markers for each coordinate in the DataFrame
    for index, row in df.iterrows():
        # Extract the information to be displayed in the popup
        info = row['Name'] 
        
        # Extract the coordinates and convert them to a tuple
        location = (row['Latitude'], row['Longitude'])
        tiles='Cloudmade'
        
        # Create a marker with a custom icon and the information to be displayed in the popup
        marker = folium.Marker(location=location, icon=folium.Icon(icon='info-sign'), popup=info)
        #icons: https://fontawesome.com/search?q=location&o=r
        
        # Add the marker to the map
        marker.add_to(m)

    # Display the map
    return m

In [5]:
plot_map(city_center)

In [6]:
plot_map(airport)

In [7]:
plot_map(park)

In [8]:
plot_map(river)

In [9]:
get_lat_long(chapels)
plot_map(chapels)

100%|██████████| 28/28 [00:13<00:00,  2.01it/s]


In [10]:
get_lat_long(cinema)
plot_map(cinema)

100%|██████████| 11/11 [00:05<00:00,  2.03it/s]


In [11]:
get_lat_long(custody_penitentiary)
plot_map(custody_penitentiary)

100%|██████████| 2/2 [00:00<00:00,  2.12it/s]


In [12]:
get_lat_long(district_court)
plot_map(district_court)

100%|██████████| 1/1 [00:00<00:00,  2.16it/s]


In [13]:
get_lat_long(emergency_room)
plot_map(emergency_room)

100%|██████████| 6/6 [00:02<00:00,  2.00it/s]


In [14]:
get_lat_long(football_stadium)
plot_map(football_stadium)

100%|██████████| 24/24 [00:11<00:00,  2.01it/s]


In [15]:
get_lat_long(grocery_store)
plot_map(grocery_store)

100%|██████████| 125/125 [01:02<00:00,  2.00it/s]


In [16]:
get_lat_long(gym)
plot_map(gym)

100%|██████████| 79/79 [00:39<00:00,  2.01it/s]


In [17]:
get_lat_long(high_school)
plot_map(high_school)

100%|██████████| 78/78 [00:38<00:00,  2.01it/s]


In [18]:
get_lat_long(horseback_riding_center)
plot_map(horseback_riding_center)

100%|██████████| 13/13 [00:06<00:00,  2.04it/s]


In [19]:
get_lat_long(hospital)
plot_map(hospital)

100%|██████████| 17/17 [00:08<00:00,  2.01it/s]


In [20]:
get_lat_long(indoor_swimming_pool)
plot_map(indoor_swimming_pool)

100%|██████████| 42/42 [00:20<00:00,  2.01it/s]


In [21]:
get_lat_long(municipal_police_department)
plot_map(municipal_police_department)

100%|██████████| 4/4 [00:01<00:00,  2.04it/s]


In [22]:
get_lat_long(museum)
plot_map(museum)

100%|██████████| 53/53 [00:26<00:00,  2.01it/s]


In [23]:
get_lat_long(natural_swimming_pool)
plot_map(natural_swimming_pool)

100%|██████████| 6/6 [00:03<00:00,  1.53it/s]


In [24]:
get_lat_long(orlik_soccer_field)
plot_map(orlik_soccer_field)

100%|██████████| 20/20 [00:09<00:00,  2.01it/s]


In [25]:
get_lat_long(pharmacy)
plot_map(pharmacy)

100%|██████████| 281/281 [02:22<00:00,  1.97it/s]


In [26]:
get_lat_long(police_station)
plot_map(police_station)

100%|██████████| 11/11 [00:05<00:00,  2.08it/s]


In [27]:
get_lat_long(primary_care_units)
plot_map(primary_care_units)

100%|██████████| 191/191 [01:35<00:00,  2.00it/s]


In [28]:
get_lat_long(primary_school)
plot_map(primary_school)

100%|██████████| 203/203 [01:41<00:00,  2.00it/s]


In [29]:
get_lat_long(roman_catholic_church)
plot_map(roman_catholic_church)

100%|██████████| 137/137 [01:08<00:00,  2.00it/s]


In [30]:
get_lat_long(rope_park)
plot_map(rope_park)

100%|██████████| 2/2 [00:00<00:00,  2.58it/s]


In [31]:
get_lat_long(rossmann)
plot_map(rossmann)

100%|██████████| 49/49 [00:24<00:00,  2.00it/s]


In [32]:
get_lat_long(shopping_center)
plot_map(shopping_center)

100%|██████████| 13/13 [00:06<00:00,  1.99it/s]


In [33]:
get_lat_long(street_workout)
plot_map(street_workout)

100%|██████████| 6/6 [00:02<00:00,  2.06it/s]


In [34]:
get_lat_long(tax_administration_chamber)
plot_map(tax_administration_chamber)

100%|██████████| 4/4 [00:01<00:00,  2.03it/s]


In [35]:
get_lat_long(tax_office)
plot_map(tax_office)

100%|██████████| 6/6 [00:02<00:00,  2.04it/s]


In [36]:
get_lat_long(technical_college)
plot_map(technical_college)

100%|██████████| 29/29 [00:14<00:00,  2.01it/s]


In [37]:
get_lat_long(vocational_school)
plot_map(vocational_school)

100%|██████████| 30/30 [00:14<00:00,  2.01it/s]


In [38]:
get_lat_long(zabka)
plot_map(zabka)

100%|██████████| 374/374 [03:06<00:00,  2.00it/s]


In [39]:
# Saving the dataframes with the new latitude and longitude columns to excel files
airport.to_excel('cleaned_data/geo_airport.xlsx')
chapels.to_excel('cleaned_data/geo_chapels.xlsx')
city_center.to_excel('cleaned_data/geo_city_center.xlsx')
emergency_room.to_excel('cleaned_data/geo_emergency_room.xlsx')
grocery_store.to_excel('cleaned_data/geo_grocery_store.xlsx')
gym.to_excel('cleaned_data/geo_gym.xlsx')
high_school.to_excel('cleaned_data/geo_high_school.xlsx')
hospital.to_excel('cleaned_data/geo_hospital.xlsx')
park.to_excel('cleaned_data/geo_park.xlsx')
pharmacy.to_excel('cleaned_data/geo_pharmacy.xlsx')
primary_care_units.to_excel('cleaned_data/geo_primary_care_units.xlsx')
primary_school.to_excel('cleaned_data/geo_primary_school.xlsx')
river.to_excel('cleaned_data/geo_river.xlsx')
roman_catholic_church.to_excel('cleaned_data/geo_roman_catholic_church.xlsx')
shopping_center.to_excel('cleaned_data/geo_shopping_center.xlsx')
technical_college.to_excel('cleaned_data/geo_technical_college.xlsx')
vocational_school.to_excel('cleaned_data/geo_vocational_school.xlsx')
zabka.to_excel('cleaned_data/geo_zabka.xlsx')
police_station.to_excel('cleaned_data/geo_police_station.xlsx')
custody_penitentiary.to_excel('cleaned_data/geo_custody_penitentiary.xlsx')
rossmann.to_excel('cleaned_data/geo_rossmann.xlsx')
tax_office.to_excel('cleaned_data/geo_tax_offices.xlsx')
tax_administration_chamber.to_excel('cleaned_data/geo_tax_administration_chambers.xlsx')
street_workout.to_excel('cleaned_data/geo_street_workouts.xlsx')
rope_park.to_excel('cleaned_data/geo_rope_parks.xlsx')
orlik_soccer_field.to_excel('cleaned_data/geo_orlik_soccer_fields.xlsx')
natural_swimming_pool.to_excel('cleaned_data/geo_natural_swimming_pools.xlsx')
museum.to_excel('cleaned_data/geo_museums.xlsx')
municipal_police_department.to_excel('cleaned_data/geo_municipal_police_departments.xlsx')
indoor_swimming_pool.to_excel('cleaned_data/geo_indoor_swimming_pool.xlsx')
horseback_riding_center.to_excel('cleaned_data/geo_horseback_riding_center.xlsx')
football_stadium.to_excel('cleaned_data/geo_football_stadiums.xlsx')
district_court.to_excel('cleaned_data/geo_district_court.xlsx')
cinema.to_excel('cleaned_data/geo_cinemas.xlsx')