# General Information
>
>Author: Lucas Lobianco De Matheo
>
>Title: **Geolocation**  
>
>
>
>Extension: .csv
>
>Source: previously generated | can be applied to any database with address  
>
>Date: 02-27-2022
>
> **Main Skills of this project:** 
> - **Data Preparation**
> - **Data Engineering**  
> - **APIs**
> - **Data Visualization**

# Importing Essential Libraries

In [1]:
import pandas as pd
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from geopy.geocoders import Nominatim

# Importing File

In [2]:
file = r'Address_Names_generator_1.csv'
df = pd.read_csv(file, encoding='iso-8859-1', sep=';')
df.head(10)

Unnamed: 0,ID,Gender,Name,Surname,Address,Profession,Income
0,1,Man,Bernardo,Jocome Ormo,"Rua Teixeira de Melo, 23",Singer,1756.769605
1,2,Woman,Amanda,Barbosa,"Rua Joana Angelica, 158",Singer,1797.975926
2,3,Woman,Maria Cecília,Brunelli Pasti Zardo,"Rua Canning, 68",Biomedic,2191.951183
3,4,Man,Afonso,Abreu,"Rua Paul Redfern, 56",Nurse,2160.014892
4,5,Man,Ravi,Oliveira Alves,"Rua Anibal de Mendonça, 51",Photographer,3208.608267
5,6,Woman,Camila,Raposo Valle,"Rua Prudente de Morais, 1620",Florist,2310.662726
6,7,Man,Theo,Torres Sodre,"Av. Epitácio Pessoa, 730",Computer engineer,6510.105724
7,8,Woman,Sandra,Jocome Silva,"Av. Henrique Dumont, 122",Designer,9326.478776
8,9,Woman,Camila,Toledo Zoppi Nucci,"Rua Maria Quiteria, 95",Architect,8611.781841
9,10,Woman,Renata,Herzog Kruger,"Rua Almirante Saddock de Sá, 105",Computer engineer,17544.70498


# Enriching the search and locating the places

In [3]:
sitesLocated = []
sitesLocatedSingle = [] # important for final atribution
for index, row in df.iterrows():
    siteName = row.Address + ', Ipanema' + ', ' + 'Rio de Janeiro' +  ', ' + 'Brasil'
    sitesLocated.append(siteName)
    
    siteNameSingle = row.Address
    sitesLocatedSingle.append(siteNameSingle)
    
sitesLocated = np.unique(sitesLocated)
sitesLocatedSingle = np.unique(sitesLocatedSingle) # not to change the original records
sitesLocated

array(['Av. Epitácio Pessoa, 1038, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 1084, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 1234, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 698, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 730, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 780, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 800, Ipanema, Rio de Janeiro, Brasil',
       'Av. Epitácio Pessoa, 944, Ipanema, Rio de Janeiro, Brasil',
       'Av. Henrique Dumont, 115, Ipanema, Rio de Janeiro, Brasil',
       'Av. Henrique Dumont, 118, Ipanema, Rio de Janeiro, Brasil',
       'Av. Henrique Dumont, 122, Ipanema, Rio de Janeiro, Brasil',
       'Av. Henrique Dumont, 125, Ipanema, Rio de Janeiro, Brasil',
       'Av. Henrique Dumont, 174, Ipanema, Rio de Janeiro, Brasil',
       'Av. Vieira Souto, 216, Ipanema, Rio de Janeiro, Brasil',
       'Av. Vieira Souto, 220, Ipanema, Rio de J

# Looking for the places

In [4]:
# instantiate a new Nominatim client
app = Nominatim(user_agent="tutorial")

i=0
latitude = []
longitude = []

# get location raw data
for val in sitesLocated:
    try:
        location = app.geocode(val).raw
        latitude.append(location["lat"])
        longitude.append(location["lon"])
        i += 1
    
    except:
        siteName = row
        latitude.append('null')
        longitude.append('null')
        i += 1

In [5]:
latitude = np.array(latitude)
longitude = np.array(longitude)
print(latitude)
print(longitude)

['-22.9806246' '-22.9806246' '-22.9806246' '-22.9806246' '-22.9806246'
 '-22.9806246' '-22.9806246' '-22.9806246' '-22.9808905' '-22.9808905'
 '-22.9808905' '-22.9808905' '-22.9808905' '-22.986284949999998'
 '-22.98628025' '-22.98626' '-22.9860671' '-22.9859383' '-22.9869591'
 '-22.9869591' '-22.9811559' '-22.9803664' '-22.9811559' '-22.9811559'
 '-22.9811559' '-22.9811559' '-22.9811559' '-22.9811559' '-22.9800821'
 '-22.9800821' '-22.9800821' '-22.9800821' '-22.9800821' '-22.9828334'
 '-22.9828334' '-22.9828334' '-22.9828334' '-22.9828334' '-22.9828334'
 '-22.9828334' '-22.9828334' '-22.9831351' '-22.9831351' '-22.9831351'
 '-22.9831351' '-22.9831351' '-22.9831351' '-22.9831351' '-22.9814293'
 '-22.9814293' '-22.9814293' '-22.9814293' '-22.9814293' '-22.9814293'
 '-22.9814293' '-22.9848833' '-22.9848833' '-22.9848833' '-22.9848833'
 '-22.9848833' '-22.9848833' '-22.9848833' '-22.9848833' '-22.9862279'
 '-22.9862279' '-22.9862279' '-22.9862279' '-22.9862279' '-22.9862279'
 '-22.9862279

In [6]:
sitesLocatedSingle # Cheking if the Single Names was Successful

array(['Av. Epitácio Pessoa, 1038', 'Av. Epitácio Pessoa, 1084',
       'Av. Epitácio Pessoa, 1234', 'Av. Epitácio Pessoa, 698',
       'Av. Epitácio Pessoa, 730', 'Av. Epitácio Pessoa, 780',
       'Av. Epitácio Pessoa, 800', 'Av. Epitácio Pessoa, 944',
       'Av. Henrique Dumont, 115', 'Av. Henrique Dumont, 118',
       'Av. Henrique Dumont, 122', 'Av. Henrique Dumont, 125',
       'Av. Henrique Dumont, 174', 'Av. Vieira Souto, 216',
       'Av. Vieira Souto, 220', 'Av. Vieira Souto, 288',
       'Av. Vieira Souto, 390', 'Av. Vieira Souto, 438',
       'Av. Vieira Souto, 494', 'Av. Vieira Souto, 572',
       'Rua Alberto de Campos, 100', 'Rua Alberto de Campos, 12',
       'Rua Alberto de Campos, 191', 'Rua Alberto de Campos, 195',
       'Rua Alberto de Campos, 65', 'Rua Alberto de Campos, 71',
       'Rua Alberto de Campos, 81', 'Rua Alberto de Campos, 88',
       'Rua Almirante Saddock de Sá, 101',
       'Rua Almirante Saddock de Sá, 105',
       'Rua Almirante Saddock de Sá, 2'

# Creating a dictionary with addresses and coordinates
> Optimized way to assign coordinates to addresses without having to call the API for each iteration

In [7]:
dictLat = dict(zip(sitesLocatedSingle, latitude)) # use single here for search compatibility
dictLon = dict(zip(sitesLocatedSingle, longitude))
print(dictLat)
print(dictLon)

{'Av. Epitácio Pessoa, 1038': '-22.9806246', 'Av. Epitácio Pessoa, 1084': '-22.9806246', 'Av. Epitácio Pessoa, 1234': '-22.9806246', 'Av. Epitácio Pessoa, 698': '-22.9806246', 'Av. Epitácio Pessoa, 730': '-22.9806246', 'Av. Epitácio Pessoa, 780': '-22.9806246', 'Av. Epitácio Pessoa, 800': '-22.9806246', 'Av. Epitácio Pessoa, 944': '-22.9806246', 'Av. Henrique Dumont, 115': '-22.9808905', 'Av. Henrique Dumont, 118': '-22.9808905', 'Av. Henrique Dumont, 122': '-22.9808905', 'Av. Henrique Dumont, 125': '-22.9808905', 'Av. Henrique Dumont, 174': '-22.9808905', 'Av. Vieira Souto, 216': '-22.986284949999998', 'Av. Vieira Souto, 220': '-22.98628025', 'Av. Vieira Souto, 288': '-22.98626', 'Av. Vieira Souto, 390': '-22.9860671', 'Av. Vieira Souto, 438': '-22.9859383', 'Av. Vieira Souto, 494': '-22.9869591', 'Av. Vieira Souto, 572': '-22.9869591', 'Rua Alberto de Campos, 100': '-22.9811559', 'Rua Alberto de Campos, 12': '-22.9803664', 'Rua Alberto de Campos, 191': '-22.9811559', 'Rua Alberto de 

# Assigning the coordinates to the respective addresses

In [8]:
lat = []
for val in df['Address']:
    for c, v in dictLat.items():
        if c == val:
            lat.append(v)
df['Latitude'] = lat

In [9]:
lon = []
for val in df['Address']:
    for c, v in dictLon.items():
        if c == val:
            lon.append(v)
df['Longitude'] = lon

In [10]:
df 

Unnamed: 0,ID,Gender,Name,Surname,Address,Profession,Income,Latitude,Longitude
0,1,Man,Bernardo,Jocome Ormo,"Rua Teixeira de Melo, 23",Singer,1756.769605,-22.9866645,-43.1987363
1,2,Woman,Amanda,Barbosa,"Rua Joana Angelica, 158",Singer,1797.975926,-22.9832734,-43.2049026
2,3,Woman,Maria Cecília,Brunelli Pasti Zardo,"Rua Canning, 68",Biomedic,2191.951183,-22.9848833,-43.1951764
3,4,Man,Afonso,Abreu,"Rua Paul Redfern, 56",Nurse,2160.014892,-22.9848306,-43.2142818
4,5,Man,Ravi,Oliveira Alves,"Rua Anibal de Mendonça, 51",Photographer,3208.608267,-22.9828334,-43.2112609
5,6,Woman,Camila,Raposo Valle,"Rua Prudente de Morais, 1620",Florist,2310.662726,-22.9845432,-43.21215795142644
6,7,Man,Theo,Torres Sodre,"Av. Epitácio Pessoa, 730",Computer engineer,6510.105724,-22.9806246,-43.2111012
7,8,Woman,Sandra,Jocome Silva,"Av. Henrique Dumont, 122",Designer,9326.478776,-22.9808905,-43.2132616
8,9,Woman,Camila,Toledo Zoppi Nucci,"Rua Maria Quiteria, 95",Architect,8611.781841,-22.9863847,-43.2072464
9,10,Woman,Renata,Herzog Kruger,"Rua Almirante Saddock de Sá, 105",Computer engineer,17544.70498,-22.9800821,-43.2012892


# Exporting the data

In [11]:
df.to_csv(r'Address_Names_generator_1_locations.csv', index=False)