<h1>Google Geolocation API Notebook</h1>

<p>This notebook has been created to call the Google Geolocation API to convert the addresses that haven been retrieved during the web scraping into long and lat coordinates. These coordinates can the later be used for further analysis.<p> 

In [4]:
import pandas as pd

filepath = 'TablesDB/Location.csv'

df = pd.read_csv(filepath, delimiter=";")
df

Unnamed: 0,LocationId,Street,ZIP,longitude,latitude
0,78753131-6d78-4d1e-a0e5-fc8b5f430570,,6598.0,,
1,f0c2bc0e-55ab-4eb1-98fa-edcb4a0ee01e,via albonago 43,6962.0,,
2,fbdcde66-0fd7-4304-a83e-70ecbb0f6ae7,Via San Gottardo 41,6500.0,,
3,2d7451a3-d7e1-4dc7-babf-ab237f2c02e4,Via F. Zorzi 17,6500.0,,
4,2bde1434-f480-4d46-9b05-99e0c0659671,Via San Gottardo,6900.0,,
...,...,...,...,...,...
11347,2dba2402-d23f-4ab8-9bb6-702bbe28cb4e,Chemin de la Petite-Californie,1222.0,,
11348,daed4779-995c-4c34-85e7-4b269d8bf5cc,Chemin William-Barbey,1292.0,,
11349,faddd38c-e9fc-4f4d-bf09-a41818cc620f,Rue du Temple 5,1236.0,,
11350,84d332ee-e1a8-4b5f-a432-82819fffe330,Chemin de la Pierre-à-Bochet,1226.0,,


<h2>Check the data for missing values</h2>

In [5]:
import pandas as pd
#counting missing addresses

missing_count = df['Street'].isnull().sum()
missing_zip = df['ZIP'].isnull().sum()
datapoints = df['LocationId'].count()

print("Total # of data points:", datapoints)
print("Missing addresses:", missing_count)
print("Missing ZIP codes:", missing_zip)

# Count the number of addresses without a street number
count_no_street_number = sum(~(df['Street'].str.match(r'^\d+.*\d+$') | df['Street'].str.match(r'^.*\d+$')))

print("Number of addresses without a street number:", count_no_street_number)




Total # of data points: 11352
Missing addresses: 708
Missing ZIP codes: 57
Number of addresses without a street number: 2377


<h2>Convert the addresses into lat/long coordinates using the Google Geocoding API</h2>

In [6]:


import json
import requests
import pandas as pd

api_key = 'AIzaSyDIq5EgNO2nm4hCj475zL2cyStM9l6PaV8'


# Request geolocation API


def get_geolocation(address, api_key):
    params = {
        'key': api_key,
        'address': address,
    }
    base_url = 'https://maps.googleapis.com/maps/api/geocode/json?'

    # Server response
    response = requests.get(base_url, params=params)
    data = response.json()
    if data['status'] == 'OK':
        location = data['results'][0]['geometry']['location']
        return location['lat'], location['lng']
    else:
        return None, None

data = df



# Iterate through each address
for index, row in data.iterrows():
    # Concatenate street and ZIP columns to form the address
    address = f"{row['Street']}, {row['ZIP']}, {'Switzerland'}"

    # Get geolocation data
    latitude, longitude = get_geolocation(address, api_key)

    # Update Longitude and Latitude columns in the DataFrame
    data.at[index, 'latitude'] = latitude
    data.at[index, 'longitude'] = longitude

filepath_csv = 'TablesDB/Location_v3.csv'
data.to_csv(filepath_csv, index=False)
    






<h2>Check the final result</h2>

In [12]:
import pandas as pd

filepath = 'TablesDB/Location_v3.csv'

df2 = pd.read_csv(filepath, delimiter=",")
df2

Unnamed: 0,LocationId,Street,ZIP,longitude,latitude
0,78753131-6d78-4d1e-a0e5-fc8b5f430570,,6598.0,8.849665,46.175962
1,f0c2bc0e-55ab-4eb1-98fa-edcb4a0ee01e,via albonago 43,6962.0,8.974113,46.013360
2,fbdcde66-0fd7-4304-a83e-70ecbb0f6ae7,Via San Gottardo 41,6500.0,8.808332,46.174824
3,2d7451a3-d7e1-4dc7-babf-ab237f2c02e4,Via F. Zorzi 17,6500.0,9.015443,46.187900
4,2bde1434-f480-4d46-9b05-99e0c0659671,Via San Gottardo,6900.0,8.840817,46.178948
...,...,...,...,...,...
11347,2dba2402-d23f-4ab8-9bb6-702bbe28cb4e,Chemin de la Petite-Californie,1222.0,6.207831,46.242178
11348,daed4779-995c-4c34-85e7-4b269d8bf5cc,Chemin William-Barbey,1292.0,6.149006,46.241745
11349,faddd38c-e9fc-4f4d-bf09-a41818cc620f,Rue du Temple 5,1236.0,6.018499,46.174626
11350,84d332ee-e1a8-4b5f-a432-82819fffe330,Chemin de la Pierre-à-Bochet,1226.0,6.214858,46.199271


In [13]:
import pandas as pd
#counting missing addresses

missing_long = df2['longitude'].isnull().sum()
missing_lat = df2['latitude'].isnull().sum()
datapoints = df2['LocationId'].count()

print("Total # of data points;", datapoints)
print("Missing long:", missing_long)
print("Missing lat:", missing_lat)

# Count the number of addresses without a street number
count_no_street_number = sum(~(df2['Street'].str.match(r'^\d+.*\d+$') | df2['Street'].str.match(r'^.*\d+$')))

print("Number of addresses without a street number:", count_no_street_number)

Total # of data points; 11352
Missing long: 2
Missing lat: 2
Number of addresses without a street number: 2377


In [18]:

# boundaries for latitude and longitude for Switzerland
min_lat = 45.8179 #(southernmost point of Switzerland)
max_lat = 47.8084 #(northernmost point of Switzerland)
min_lon = 5.9559 #(westernmost point of Switzerland)
max_lon = 10.4923 #(easternmost point of Switzerland)

# Check if each latitude and longitude value is within the specified boundaries
within_boundary = (df2['latitude'].between(min_lat, max_lat) & df2['longitude'].between(min_lon, max_lon))

# Create a new DataFrame containing data points outside the boundaries
outside_boundary_df = df[~within_boundary]

if outside_boundary_df.empty:
    print("All latitude and longitude values are within the specified boundary.")
else:
    print(f"{len(outside_boundary_df)} data points are outside the specified boundary.")

    # Print the information of data points outside the boundaries
    print("Data points outside the boundary:")
    print(outside_boundary_df)


28 data points are outside the specified boundary.
Data points outside the boundary:
                                 LocationId                    Street     ZIP  \
180    1c57e661-c4b0-422c-9ef8-cc66ad1af5eb   Robert-Walserstrasse 11  9100.0   
239    ef87259f-845f-46e0-8944-32bed7faa9d0  Robert-Walserstrasse 11a  9100.0   
913    2d6f5a62-5457-475e-8f5c-ce9acf790fad                Vorstadt 8  8200.0   
914    726d346b-d315-400d-803b-3212cffe97fc                Vorstadt 8  8200.0   
915    4e20fe81-f2cb-4e9a-a3b1-7a3d017ebaa7                Vorstadt 8  8200.0   
1773   66d28478-dbfc-4599-a77b-90383b0ab572       Rue des Tilleuls 31  2900.0   
1774   6dd4f402-af07-46d7-8c08-8779e3fec22a       Rue des Tilleuls 31  2900.0   
1775   f41a92fc-98da-4752-b641-7add8464a31e       Rue des Tilleuls 31  2900.0   
1942   28a4c39c-0be4-41cf-adf6-99fd93eb9aa7      Rue des Merisiers 10  2800.0   
1977   0c1c6011-fafb-4c69-bc69-74047c7c6df4      Rue des Merisiers 10  2800.0   
1983   f7238d97-2c56-422

In [19]:
outside_boundary_df

Unnamed: 0,LocationId,Street,ZIP,longitude,latitude
180,1c57e661-c4b0-422c-9ef8-cc66ad1af5eb,Robert-Walserstrasse 11,9100.0,,
239,ef87259f-845f-46e0-8944-32bed7faa9d0,Robert-Walserstrasse 11a,9100.0,,
913,2d6f5a62-5457-475e-8f5c-ce9acf790fad,Vorstadt 8,8200.0,9.213787,48.087721
914,726d346b-d315-400d-803b-3212cffe97fc,Vorstadt 8,8200.0,9.213787,48.087721
915,4e20fe81-f2cb-4e9a-a3b1-7a3d017ebaa7,Vorstadt 8,8200.0,9.213787,48.087721
1773,66d28478-dbfc-4599-a77b-90383b0ab572,Rue des Tilleuls 31,2900.0,1.135302,43.401046
1774,6dd4f402-af07-46d7-8c08-8779e3fec22a,Rue des Tilleuls 31,2900.0,1.135302,43.401046
1775,f41a92fc-98da-4752-b641-7add8464a31e,Rue des Tilleuls 31,2900.0,1.135302,43.401046
1942,28a4c39c-0be4-41cf-adf6-99fd93eb9aa7,Rue des Merisiers 10,2800.0,7.260488,48.085301
1977,0c1c6011-fafb-4c69-bc69-74047c7c6df4,Rue des Merisiers 10,2800.0,7.260488,48.085301
