In [None]:
# --------WITHOUT GOOGLE MAP API-------------------

import pandas as pd
import geocoder

# Path to your CSV file containing addresses
input_csv_path = 'toronto_shelter_addresses.csv'
output_csv_path = 'toronto_shelter_addresses_with_coordinates.csv'

df = pd.read_csv(input_csv_path)

# Create empty lists to store the longitude and latitude
longitudes = []
latitudes = []

# Iterate over each shelter_address and apply geocoding
for shelter_address in df['shelter_address']:
    g = geocoder.geolytica(shelter_address)  # Use geolytica for geocoding
    if g.latlng:  # If latlng is found
        latitudes.append(g.latlng[0])  # Latitude
        longitudes.append(g.latlng[1])  # Longitude
    else:
        latitudes.append(None)  # If not found, append None
        longitudes.append(None)

# Add the longitude and latitude to the DataFrame
df['latitude'] = latitudes
df['longitude'] = longitudes

# Save the DataFrame to a new CSV file
df.to_csv(output_csv_path, index=False)

print(f"Geocoding completed. Results saved to {output_csv_path}")

In [6]:
# ----TEST IF THIS WORKS------


import requests
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Define the Google API key (make sure your .env file has google_map_api_key)
google_api_key = os.getenv('google_map_api_key')

# Google Maps Geocoding API URL
geocode_url = "https://maps.googleapis.com/maps/api/geocode/json"

# Define the function to get latitude and longitude from Google Maps API
def get_lat_lng(address, api_key):
    params = {
        'address': address,  # Use 'address' for Google API
        'key': api_key
    }
    response = requests.get(geocode_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if len(data['results']) > 0:
            location = data['results'][0]['geometry']['location']
            return location['lat'], location['lng']
        else:
            print(f"No results found for address: {address}")
    else:
        print(f"Error fetching data for address: {address}, Status code: {response.status_code}")
    
    return None, None

# Test the function with a single address
lat, lng = get_lat_lng('189B Booth Ave M4M 2M5 Toronto', google_api_key)
print(lng, lat)

-79.3429848 43.6600165


In [1]:
#----------------WITH GOOGLE MAP API--------------------
import pandas as pd
import time
import requests
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

google_api_key = os.getenv('google_map_api_key')

# Google Maps Geocoding API URL
geocode_url = "https://maps.googleapis.com/maps/api/geocode/json"

input_csv_path = 'toronto_shelter_addresses.csv' #Be sure to have  this in your wkdir
output_csv_path = 'toronto_shelter_addresses_with_coordinate.csv'

df = pd.read_csv(input_csv_path)

# Ensure there are 'longitude' and 'latitude' columns; create them if they don't exist
if 'longitude' not in df.columns:
    df['longitude'] = None
if 'latitude' not in df.columns:
    df['latitude'] = None

# Function to get latitude and longitude from Google Maps API
def get_lat_lng(address, api_key):
    params = {
        'address': address, 
        'key': api_key
    }
    response = requests.get(geocode_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if len(data['results']) > 0:
            location = data['results'][0]['geometry']['location']
            return location['lat'], location['lng']
        else:
            print(f"No results found for address: {address}")
    else:
        print(f"Error fetching data for address: {address}, Status code: {response.status_code}")
    
    return None, None

# Iterate over each row in the DataFrame where longitude and latitude are missing
for i, row in df.iterrows():
    if pd.isna(row['longitude']) or pd.isna(row['latitude']):
        shelter_address = row['shelter_address']  # Replace with the actual shelter_address column name in your CSV
        
        # Get latitude and longitude from Google API
        latitude, longitude = get_lat_lng(shelter_address, google_api_key)
        
        # Update the DataFrame with the fetched latitude and longitude
        if latitude and longitude:
            df.at[i, 'latitude'] = latitude
            df.at[i, 'longitude'] = longitude
        else:
            print(f"Failed to geocode: {shelter_address}")
        
        # Sleep for 1 second to avoid exceeding API rate limits
        time.sleep(1)

df.to_csv(output_csv_path, index=False)

print(f"Geocoding completed. Updated data saved to {output_csv_path}")

Geocoding completed. Updated data saved to toronto_shelter_addresses_with_coordinate.csv


In [22]:
# -------LOAD TO DATABASE ---------
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, Column, VARCHAR
from sqlalchemy.exc import SQLAlchemyError
from urllib.parse import quote_plus
import os

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Path to your CSV file
file_path = 'toronto_shelter_addresses_with_coordinate.csv'

# Read the CSV data into a pandas DataFrame
df = pd.read_csv(file_path)

# Simplify and consolidate column name formatting
df.columns = df.columns.str.replace(' ', '_', regex=True).str.replace('[().?]', '', regex=True).str.lower()

# Database connection details
database_username = os.getenv('database_username')
database_password = os.getenv('database_password')
database_name = 'analytics_db'
database_host = 'localhost'
database_port = '5432'
database_schema = 'toronto'

database_connection_string = f'postgresql://{database_username}:{quote_plus(database_password)}@{database_host}:{database_port}/{database_name}'
# Create a SQLAlchemy engine
engine = create_engine(database_connection_string)

# Initialize MetaData object
metadata = MetaData()

# Define columns for the table without conditions
columns = [Column(column_name, VARCHAR) for column_name in df.columns]

# Name of the table in PostgreSQL
table_name = 'toronto_shelther_address'

# Create a table with the defined columns within the specified schema
table = Table(table_name, metadata, *columns, schema=database_schema, extend_existing=True)

try:
    # Create table if it doesn't exist
    metadata.create_all(engine)
    print(f"The table '{table_name}' has been created or already exists.")

    # Insert data into the table or append if shelter_address does not exist
    with engine.connect() as connection:
        existing_addresses_query = f"SELECT shelter_address FROM {database_schema}.{table_name}"
        existing_addresses = pd.read_sql(existing_addresses_query, connection)['shelter_address'].tolist()
        
        new_data = df[~df['shelter_address'].isin(existing_addresses)]
        
        if not new_data.empty:
            new_data.to_sql(table_name, engine, if_exists='append', index=False, schema=database_schema)
            print(f"New data appended to '{table_name}'.")
        else:
            print(f"No new data to append.")
        # Remove the temporary file after the data is inserted
        if os.path.exists(temp_csv_path):
            os.remove(temp_csv_path)  # Delete the temporary CSV file
            print(f"Temporary file '{temp_csv_path}' has been deleted.")

    # Get and print the row counts after loading data
    with engine.connect() as connection:
        current_count = connection.execute(f"SELECT COUNT(*) FROM {database_schema}.{table_name}").scalar()
        print(f"The table '{table_name}' now has {current_count} rows after the update.")
except SQLAlchemyError as e:
    print(f"An error occurred: {e}")

The table 'toronto_shelther_address' has been created or already exists.
No new data to append.
Temporary file 'temp.csv' has been deleted.
An error occurred: Not an executable object: 'SELECT COUNT(*) FROM toronto.toronto_shelther_address'
