In [1]:
!pip install pgeocode
import pandas as pd
import numpy as np
import pgeocode
import io

Collecting pgeocode
  Downloading pgeocode-0.5.0-py3-none-any.whl.metadata (7.9 kB)
Downloading pgeocode-0.5.0-py3-none-any.whl (9.8 kB)
Installing collected packages: pgeocode
Successfully installed pgeocode-0.5.0


In [8]:
# Creating the Tesla Service Center data manually
tesla_data = """Location Name,Type,City,Postal Code,Latitude,Longitude
Tesla Service Kitchener,Service Center,Kitchener,N2H 5G3,43.4612,-80.4722
Tesla Service Toronto-Lawrence,Service Center,Toronto,M3A 1C6,43.7431,-79.3175
Tesla Service Toronto-Sherway,Service Center,Toronto,M9C 1B8,43.6127,-79.5576
Tesla Service Etobicoke,Service Center,Etobicoke,M9B 1B7,43.6366,-79.5370
Tesla Service Vaughan,Service Center,Vaughan,L4L 8V1,43.7845,-79.5524
Tesla Service Mississauga,Service Center,Mississauga,L5T 2J3,43.6473,-79.6880
Tesla Service Brampton,Service Center,Brampton,L6R 3Y2,43.7559,-79.7934
Tesla Service Oakville,Service Center,Oakville,L6K 3S3,43.4350,-79.6890
Tesla Service Hamilton,Service Center,Hamilton,L9A 4X5,43.2185,-79.8620
Tesla Service London,Service Center,London,N6L 1A8,42.9460,-81.2330
Tesla Service Barrie,Service Center,Innisfil,L9S 0A3,44.3410,-79.6640
Tesla Service Ottawa,Service Center,Ottawa,K1S 2E7,45.3970,-75.7070
Tesla Service Nepean (Barrhaven),Service Center,Nepean,K2R 0A5,45.2728,-75.8015
Tesla Service North York,Service Center,North York,M3A 1A3,43.7380,-79.3240
Tesla Service Oshawa,Service Center,Oshawa,L1J 2K5,43.8790,-78.8920"""

df_tesla = pd.read_csv(io.StringIO(tesla_data))
print("Tesla Data Loaded! Found", len(df_tesla), "locations.")

Tesla Data Loaded! Found 15 locations.


In [11]:
filename = "/content/98-401-X2021030_English_CSV_data"

print("Reading Census Data... (This might take 30 seconds)")
df_census = pd.read_csv(filename, encoding="ISO-8859-1", low_memory=False)

# 1. Filter for Ontario FSAs (Postal codes starting with K, L, M, N, P)
# We look for rows where GEO_LEVEL is 'Forward sortation area' (or check format)
df_ontario = df_census[df_census['GEO_NAME'].str.match(r'^[KLMNP]\d[A-Z]', na=False)].copy()

print("Filtered to Ontario. Finding Population and Income columns...")

# 2. Extract Population and Median Income
# StatCan formatting is tricky, so we search for the text strings
pop_rows = df_ontario[df_ontario['CHARACTERISTIC_NAME'].str.contains("Population, 2021", case=False, na=False)]
income_rows = df_ontario[df_ontario['CHARACTERISTIC_NAME'].str.contains("Median total income", case=False, na=False)]

# Merge them together
df_pop = pop_rows[['GEO_NAME', 'C1_COUNT_TOTAL']].rename(columns={'C1_COUNT_TOTAL': 'Population'})
df_inc = income_rows[['GEO_NAME', 'C1_COUNT_TOTAL']].rename(columns={'C1_COUNT_TOTAL': 'Median_Income'})

df_final = pd.merge(df_pop, df_inc, on='GEO_NAME', how='inner')

# Convert columns to numbers (remove commas if any)
df_final['Population'] = pd.to_numeric(df_final['Population'], errors='coerce')
df_final['Median_Income'] = pd.to_numeric(df_final['Median_Income'], errors='coerce')

print(f"Data ready for {len(df_final)} postal codes.")

# 3. Calculate Distance to Nearest Tesla Center
nomi = pgeocode.Nominatim('ca')

# Get Lat/Lon for every FSA
print("Geocoding postal codes...")
locations = nomi.query_postal_code(df_final['GEO_NAME'].tolist())
df_final['Latitude'] = locations.latitude
df_final['Longitude'] = locations.longitude

# Drop rows where we couldn't find coordinates
df_final = df_final.dropna(subset=['Latitude', 'Longitude'])

# Function: Find nearest Tesla center
def calculate_distance(row):
    # Approximate distance (Euclidean is fine for this scale, or Haversine)
    # 1 degree lat ~= 111km.
    dists = []
    for _, tesla in df_tesla.iterrows():
        d = np.sqrt((row['Latitude'] - tesla['Latitude'])**2 +
                    (row['Longitude'] - tesla['Longitude'])**2) * 111
        dists.append(d)
    return min(dists)

df_final['Distance_to_Tesla_km'] = df_final.apply(calculate_distance, axis=1)

# 4. The "Opportunity Score"
# High Pop + High Income + High Distance = High Opportunity
# We normalize it so numbers aren't huge
df_final['Expansion_Score'] = (df_final['Population'] * df_final['Median_Income'] * df_final['Distance_to_Tesla_km']) / 1000000

print("Analysis Complete! Top 5 Expansion Candidates:")
print(df_final.sort_values(by='Expansion_Score', ascending=False).head(5))

# 5. Save file
df_final.to_csv('tesla_expansion_analysis.csv', index=False)

Reading Census Data... (This might take 30 seconds)
Filtered to Ontario. Finding Population and Income columns...
Data ready for 5210 postal codes.
Geocoding postal codes...
Analysis Complete! Top 5 Expansion Candidates:
     GEO_NAME  Population  Median_Income  Latitude  Longitude  \
4777      P0T     32000.0       132000.0   49.2924   -88.7560   
5097      P7A     28587.0       136000.0   48.4601   -89.2035   
5107      P7B     22675.0       142000.0   48.9475   -89.4063   
4727      P0M     49727.0       145000.0   47.6526   -82.4753   
4787      P0V     22944.0       101000.0   52.9648   -90.1505   

      Distance_to_Tesla_km  Expansion_Score  
4777           1092.503491     4.614735e+06  
5097           1075.808021     4.182561e+06  
5107           1125.546760     3.624092e+06  
4727            482.181014     3.476725e+06  
4787           1488.799928     3.450062e+06  
