# Geocode adresses without coordinates

#### Manually check address
Add full address to list **locations**

In [3]:
from geopy.geocoders import Nominatim
import time

locations = [
    ('Location1' ,'29 Skyline Rd, Fraser Rise, VIC, 3336')
]
geolocator = Nominatim(user_agent="test_app")
for city, address in locations:
    location = geolocator.geocode(address)
    print(city, location.latitude, location.longitude)
    time.sleep(2) 

Location1 -37.7154833 144.7159521


#### Import CSV file and read

In [13]:
import os
data_pkg_path = 'data'
filename = 'test_csv.csv'
path = os.path.join(data_pkg_path, filename)


In [14]:
df = pd.read_csv(path)

In [15]:
df

Unnamed: 0,ID,Address,Suburb,State,Postcode,Lat,Long,Address_full
0,H1,22 Evergreen Drive,Davoren Park,SA,5113,,,"22 Evergreen Drive, Davoren Park, SA, 5113"
1,H2,24 Evergreen Drive,Davoren Park,SA,5113,,,"24 Evergreen Drive, Davoren Park, SA, 5113"
2,H3,12 Fahey Court,Brookdale,WA,6112,,,"12 Fahey Court, Brookdale, WA, 6112"
3,H4,8 Raam Circuit,Armstrong Creek,VIC,3217,,,"8 Raam Circuit, Armstrong Creek, VIC, 3217"
4,H5,58 Sumac Street,Brookfield,VIC,3338,,,"58 Sumac Street, Brookfield, VIC, 3338"
...,...,...,...,...,...,...,...,...
1240,R320_2,1/36 Bert Street,GOSNELLS,WA,6110,,,"1/36 Bert Street, GOSNELLS, WA, 6110"
1241,R401_2,28 Campbell Street,Chinchilla,QLD,4413,,,"28 Campbell Street, Chinchilla, QLD, 4413"
1242,R517_2,2/85 Frederick Street,Wanneroo,WA,6065,,,"2/85 Frederick Street, Wanneroo, WA, 6065"
1243,R569_2,2/46 Calton Terrace,Gympie,QLD,4570,,,"2/46 Calton Terrace, Gympie, QLD, 4570"


## Finding coordinates
Import CSV file with column Address_full. Use free API geopy (OpenStreetMap - Nominatim) and fill Lat and Long columns and save in separate CSV output.csv

In [5]:
from geopy.geocoders import Nominatim
from tqdm import tqdm
import time
import pandas as pd
import os

# Load your CSV file
data_pkg_path = 'data'
filename = 'test_csv2.csv'
path = os.path.join(data_pkg_path, filename)
df = pd.read_csv(path)  # Ensure this file exists

# Initialize the geocoder
geolocator = Nominatim(user_agent="test_app")

# Function to get latitude and longitude
def get_lat_lon(address):
    try:
        location = geolocator.geocode(address, timeout=10)
        if location:
            return pd.Series([location.latitude, location.longitude])
        else:
            return pd.Series([None, None])  # Return None if no match found
    except Exception as e:
        print(f"Error with address {address}: {e}")
        return pd.Series([None, None])

# Apply the function to the 'Address_full' column
tqdm.pandas()
df[['Lat', 'Long']] = df['Address_full'].progress_apply(get_lat_lon)

# Save the updated DataFrame back to CSV
output_file = "updated_file3.csv"
df.to_csv(output_file, index=False)

# Display the first few rows
print(df.head())

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:09<00:00,  1.07it/s]

   ID             Address           Suburb State  Postcode        Lat  \
0  H1  22 Evergreen Drive     Davoren Park    SA      5113 -34.687853   
1  H2  24 Evergreen Drive     Davoren Park    SA      5113 -34.687853   
2  H3      12 Fahey Court        Brookdale    WA      6112 -32.168372   
3  H4      8 Raam Circuit  Armstrong Creek   VIC      3217 -38.245119   
4  H5     58 Sumac Street       Brookfield   VIC      3338 -37.689834   

         Long                                Address_full  
0  138.663741  22 Evergreen Drive, Davoren Park, SA, 5113  
1  138.663741  24 Evergreen Drive, Davoren Park, SA, 5113  
2  116.006220         12 Fahey Court, Brookdale, WA, 6112  
3  144.347297  8 Raam Circuit, Armstrong Creek, VIC, 3217  
4  144.545329      58 Sumac Street, Brookfield, VIC, 3338  





### Check it by using OpenRouteService API

Not really a big difference, but it applied coordinates for all addresses. It applied just coordinate of the middle of the street.

In [7]:
from tqdm import tqdm
import openrouteservice
import time
import pandas as pd
import os

# Load your CSV file
data_pkg_path = 'data'
filename = 'test_csv2.csv'
path = os.path.join(data_pkg_path, filename)
df = pd.read_csv(path)  # Ensure this file exists

# Initialize the geocoder
client = openrouteservice.Client(key='5b3ce3597851110001cf624828ffde81d8634733b0f7b548d69e98af')

# Function to get latitude and longitude
def get_lat_lon(address):
    try:
        # Geocode the address using OpenRouteService
        geocode_result = client.pelias_search(address, size=1)
        
        # Check if the geocoding result is available
        if geocode_result['features']:
            # Extract latitude and longitude
            lat = geocode_result['features'][0]['geometry']['coordinates'][1]
            lon = geocode_result['features'][0]['geometry']['coordinates'][0]
            return pd.Series([lat, lon])
        else:
            return pd.Series([None, None])  # Return None if no match found
    except Exception as e:
        print(f"Error with address {address}: {e}")
        return pd.Series([None, None])

# Apply the function to the 'Address_full' column
tqdm.pandas()
df[['Lat', 'Long']] = df['Address_full'].progress_apply(get_lat_lon)

# Save the updated DataFrame back to CSV
output_file = "updated_file3.csv"
df.to_csv(output_file, index=False)

# Display the first few rows
print(df.head())

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:26<00:00,  2.64s/it]

   ID             Address           Suburb State  Postcode        Lat  \
0  H1  22 Evergreen Drive     Davoren Park    SA      5113 -34.690598   
1  H2  24 Evergreen Drive     Davoren Park    SA      5113 -34.690598   
2  H3      12 Fahey Court        Brookdale    WA      6112 -32.168374   
3  H4      8 Raam Circuit  Armstrong Creek   VIC      3217 -38.246193   
4  H5     58 Sumac Street       Brookfield   VIC      3338 -37.699115   

         Long                                Address_full  
0  138.668807  22 Evergreen Drive, Davoren Park, SA, 5113  
1  138.668807  24 Evergreen Drive, Davoren Park, SA, 5113  
2  116.006223         12 Fahey Court, Brookdale, WA, 6112  
3  144.348048  8 Raam Circuit, Armstrong Creek, VIC, 3217  
4  144.555560      58 Sumac Street, Brookfield, VIC, 3338  





## Geocoding by coordinates

In [11]:
import os
data_pkg_path = 'data'
filename = 'updated_file3.csv'
path = os.path.join(data_pkg_path, filename)

In [12]:
df = pd.read_csv(path)

In [22]:
import geopandas as gpd
#write file, create geometry geadataframe, save as SHP
output_dir = 'output'
output_filename = 'geocoded_addresses.shp'
output_path = os.path.join(output_dir, output_filename)

coordinates = df[['Long', 'Lat']]

geometry = gpd.points_from_xy(coordinates['Long'], coordinates['Lat'])
gdf = gpd.GeoDataFrame(coordinates, crs='EPSG:4326', geometry=geometry)
gdf.to_file(filename=output_path, encoding='utf-8')
print('Successfully written output file at {}'.format(output_path))

Successfully written output file at output\geocoded_addresses.shp
