# Populated Places of BC

Goal: Create a list of places in BC in .csv format which have a population of at least 1. Includes Cities, Towns, Villages, Reserves and other populated regions.

Combines data from:
- Geographical Place Names in BC
- 2021 Population Census of BC communities

*Note that the original CSVs have been modified by hand to remove metadata information (headers & footnotes) and aggregation rows*

In [None]:
import pandas as pd

features = pd.read_csv("./geographical_names_bc.csv.gz", compression="gzip")
populations = pd.read_csv("./population_census_bc.csv.gz", compression="gzip")

features.drop(
  columns=[
    "ISO Language Code",
    "Concise Code",
    "Toponymic Feature ID",
    "Province - Territory",
    "Source",
    "Relevance at Scale",
    "Syllabic Form",
    "Decision Date",
    "CGNDB ID",
    "Language",
  ],
  inplace=True,
)

populations.drop(
  columns=[
    "Population, 2016",
    "Population percentage change, 2016 to 2021",
    "Total private dwellings, 2021",
    "Total private dwellings, 2016",
    "Total private dwellings percentage change, 2016 to 2021",
    "Private dwellings occupied by usual residents, 2021",
    "Private dwellings occupied by usual residents, 2016",
    "Private dwellings occupied by usual residents percentage change, 2016 to 2021",
    "National population rank, 2021",
    "Province/territory population rank, 2021",
    "Geographic area type abbreviation",
  ],
  inplace=True,
)

populations.rename(columns={"Geographic name": "Geographical Name"}, inplace=True)

# Only keep columns in features where a population can be found
joined = features.join(
  populations.set_index("Geographical Name"), on="Geographical Name", how="left"
)

joined = joined[
  (joined["Population, 2021"].notnull())
  & (joined["Land area in square kilometres, 2021"].notnull())
  & (joined["Population density per square kilometre, 2021"].notnull())
]

# Replace numeric columns in "123,456" string format with integers
joined["Population, 2021"] = joined["Population, 2021"].str.replace(",", "").astype(int)
joined["Land area in square kilometres, 2021"] = (
  joined["Land area in square kilometres, 2021"].str.replace(",", "").astype(float)
)
joined["Population density per square kilometre, 2021"] = (
  joined["Population density per square kilometre, 2021"]
  .str.replace(",", "")
  .astype(float)
)


# Only keep places with a population greater than 30
joined = joined[(joined["Population, 2021"] > 0)]

# Remove duplicate features (gets rid of, for example "Bowen Island" as an island, and "Bowen Island" as a municipality)
# Keep larger number
joined = joined.sort_values(by=["Population, 2021"], ascending=False)
joined = joined.drop_duplicates(subset=["Geographical Name"])

# Drop weird outlier not in BC "Unnamed 10"
joined = joined[~(joined["Geographical Name"].str.startswith("Unnamed"))]

# Create an UID for each populated place
joined["id"] = joined.index + 1

# Lowercase all headers
joined.columns = [x.lower() for x in joined.columns]

# Mark as Urban or not
# https://www23.statcan.gc.ca/imdb/p3VD.pl?Function=getVD&TVD=113331&CVD=113332&CLV=0&MLV=2&D=1&adm=0&dis=0
joined["isUrban"] = (joined["population, 2021"] > 1000) & (
  joined["population density per square kilometre, 2021"] > 400
)

joined.to_csv("../BCPopulatedPlaces.csv", index=False)