In [1]:
# uncomment these lines for one time install of required packages

# !conda install -c conda-forge geopy --yes 
# !pip install geocoder
# !pip install --upgrade pandas pgeocode
# !pip install pgeocode
# !conda install -c conda-forge folium=0.5.0 --yes

import pandas as pd                     # For Dataframes, plotting etc
import numpy as np                      # For Mathematical calculation 
import zipfile                          # For unzipping the web scraped files
import os                               # For correcting the file paths
import requests                         # For geting files and jsons
import json                             # For reading and wrangling json files
from bs4 import BeautifulSoup           # For scraping html data from websites
from pandas import json_normalize       # For formatting the jsons readable
import folium                           # For plotting maps
import matplotlib as plt                # For plotting charts
import matplotlib.cm as cm              # For functions to handle colormaps
import matplotlib.colors as colors      # For colour maps
import pgeocode                         # For getting lat and lon for postal codes
from geopy.geocoders import Nominatim   # For address into latitude and longitude values on maps
from sklearn.cluster import KMeans      # import k-means from clustering stage
import warnings                         # For ignoreing all warnings 
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# Does Toronto have 103 postal codes? lets check another website for counts of postal codes in Toronto city
# # URL of the file to be downloaded
url = 'https://download.geonames.org/export/zip/CA_full.csv.zip'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Save the content to a file
    zip_file_path = 'CA_full.csv.zip'
    with open(zip_file_path, 'wb') as file:
        file.write(response.content)
    print('File downloaded successfully!')

    # Unzip the file
    extract_path = 'extracted_files'
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print('File unzipped successfully!')

    # Optionally, remove the zip file after extraction
    os.remove(zip_file_path)
else:
    print('Failed to download the file.')

File downloaded successfully!
File unzipped successfully!


In [3]:
# Read the file
file_path = 'extracted_files/CA_full.txt'
with open(file_path, 'r') as file:
    lines = file.readlines()

# Filter lines where the text after "CA" and the tab starts with "M"
filtered_lines = [line for line in lines if line.split('\t')[1].startswith('M')]

# Create a DataFrame from the filtered lines
data = [line.strip().split('\t') for line in filtered_lines]
df = pd.DataFrame(data, columns=['Country', 'Postal Code', 'Place Name', 'Province', 'Province Abbreviation', 'Column6', 'Column7', 'Column8', 'Latitude', 'Longitude', 'Column11', 'Column12'])

# Split the postal code into two parts
df['Postal Code Part 1'] = df['Postal Code'].str[:3]
df['Postal Code Part 2'] = df['Postal Code'].str[3:]

# Count distinct values in "Postal Code Part 1"
distinct_count = df['Postal Code Part 1'].nunique()

# Display the count
print(f"Count of distinct values of Toronto Postal codes is: {distinct_count}")

Count of distinct values of Toronto Postal codes is: 103
