In [32]:
import pandas as pd
import numpy as np
from datetime import datetime

In [33]:
data = pd.read_csv("data_sources/2020_CA_Region_Mobility_Report.csv");

In [34]:
data.tail()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,place_id,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
89576,CA,Canada,Yukon,,,CA-YT,,ChIJiYtStJiBF1ER6pbMYdWcFC4,2021-02-12,-30.0,,,,-11.0,
89577,CA,Canada,Yukon,,,CA-YT,,ChIJiYtStJiBF1ER6pbMYdWcFC4,2021-02-13,-33.0,,,,3.0,
89578,CA,Canada,Yukon,,,CA-YT,,ChIJiYtStJiBF1ER6pbMYdWcFC4,2021-02-14,-37.0,,,,,
89579,CA,Canada,Yukon,,,CA-YT,,ChIJiYtStJiBF1ER6pbMYdWcFC4,2021-02-15,-42.0,,,,-14.0,
89580,CA,Canada,Yukon,,,CA-YT,,ChIJiYtStJiBF1ER6pbMYdWcFC4,2021-02-16,-37.0,,,,-13.0,


In [35]:
data["sub_region_1"].unique()

array([nan, 'Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
       'Newfoundland and Labrador', 'Northwest Territories',
       'Nova Scotia', 'Nunavut', 'Ontario', 'Prince Edward Island',
       'Quebec', 'Saskatchewan', 'Yukon'], dtype=object)

In [36]:
data["sub_region_2"].unique()

array([nan, 'Division No. 1 - Medicine Hat',
       'Division No. 10 - Lloydminster', 'Division No. 11 - Edmonton',
       'Division No. 12 - Cold Lake', 'Division No. 13 - Whitecourt',
       'Division No. 14 - Hinton', 'Division No. 15 - Canmore',
       'Division No. 16 - Fort McMurray', 'Division No. 17 - Slave Lake',
       'Division No. 18 - Grande Cache',
       'Division No. 19 - Grande Prairie', 'Division No. 2 - Lethbridge',
       'Division No. 3 - Claresholm', 'Division No. 5 - Strathmore',
       'Division No. 6 - Calgary', 'Division No. 7 - Wainwright',
       'Division No. 8 - Red Deer',
       'Division No. 9 - Rocky Mountain House', 'Alberni-Clayoquot',
       'Bulkley-Nechako', 'Capital', 'Cariboo', 'Central Kootenay',
       'Central Okanagan', 'Columbia-Shuswap', 'Comox Valley',
       'Cowichan Valley', 'East Kootenay', 'Fraser Valley',
       'Fraser-Fort George', 'Kitimat-Stikine', 'Kootenay Boundary',
       'Metro Vancouver', 'Mount Waddington', 'Nanaimo', 'Nor

In [37]:
# metro_area field doesn't seem to be used for Canada
data["metro_area"].unique()

array([nan])

In [38]:
columns = ["sub_region_1", "sub_region_2", "date",
           "retail_and_recreation_percent_change_from_baseline", 
           "grocery_and_pharmacy_percent_change_from_baseline", 
           "parks_percent_change_from_baseline",
          "transit_stations_percent_change_from_baseline", 
           "workplaces_percent_change_from_baseline",
          "residential_percent_change_from_baseline"]

mobility_data = data[columns]



In [39]:
mobility_data.tail()

Unnamed: 0,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
89576,Yukon,,2021-02-12,-30.0,,,,-11.0,
89577,Yukon,,2021-02-13,-33.0,,,,3.0,
89578,Yukon,,2021-02-14,-37.0,,,,,
89579,Yukon,,2021-02-15,-42.0,,,,-14.0,
89580,Yukon,,2021-02-16,-37.0,,,,-13.0,


In [40]:
mobility_data[mobility_data["sub_region_2"] == "Ottawa Division"].tail()

Unnamed: 0,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
45514,Ontario,Ottawa Division,2021-02-12,-52.0,-16.0,-39.0,-75.0,-55.0,25.0
45515,Ontario,Ottawa Division,2021-02-13,-48.0,-4.0,-4.0,-70.0,-22.0,12.0
45516,Ontario,Ottawa Division,2021-02-14,-37.0,6.0,77.0,-68.0,-13.0,8.0
45517,Ontario,Ottawa Division,2021-02-15,-73.0,-61.0,12.0,-85.0,-83.0,29.0
45518,Ontario,Ottawa Division,2021-02-16,-47.0,-21.0,-52.0,-72.0,-59.0,23.0


In [41]:
city_mask = (mobility_data["sub_region_2"] == "Ottawa Division") \
          | (mobility_data["sub_region_2"] == "Toronto Division") \
          | (mobility_data["sub_region_2"] == "Regional Municipality of Durham") \
          | (mobility_data["sub_region_2"] == "Regional Municipality of Halton") \
          | (mobility_data["sub_region_2"] == "Regional Municipality of Peel") \
          | (mobility_data["sub_region_2"] == "Regional Municipality of York")

mobility_data["date"] = pd.to_datetime(mobility_data["date"], format="%Y-%m-%d" )

start_date = datetime.strptime("2020-10-06", "%Y-%m-%d")
end_date = datetime.strptime("2021-02-06", "%Y-%m-%d")

four_month_mask = (start_date <= mobility_data["date"]) & (mobility_data["date"] <= end_date)

mobility_dimension = mobility_data.loc[city_mask & four_month_mask]

mobility_dimension.columns = ["province", "subregion", "date", 
              "retail_and_recreation", "grocery_and_pharmacy", "parks", "transit_stations", "workplaces", "residential"]

mobility_dimension.insert(0, "mobility_key", np.arange(len(mobility_dimension.index)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mobility_data["date"] = pd.to_datetime(mobility_data["date"], format="%Y-%m-%d" )


In [42]:
mobility_dimension.head()

Unnamed: 0,mobility_key,province,subregion,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
45385,0,Ontario,Ottawa Division,2020-10-06,-28.0,-6.0,-4.0,-64.0,-53.0,18.0
45386,1,Ontario,Ottawa Division,2020-10-07,-32.0,-9.0,-26.0,-68.0,-53.0,19.0
45387,2,Ontario,Ottawa Division,2020-10-08,-24.0,3.0,5.0,-61.0,-52.0,18.0
45388,3,Ontario,Ottawa Division,2020-10-09,-29.0,0.0,19.0,-57.0,-50.0,17.0
45389,4,Ontario,Ottawa Division,2020-10-10,-29.0,1.0,89.0,-49.0,-4.0,6.0


In [43]:
# Looking for missing values

print(mobility_dimension["retail_and_recreation"].isna().sum())

print(mobility_dimension["grocery_and_pharmacy"].isna().sum())

print(mobility_dimension["parks"].isna().sum())

print(mobility_dimension["transit_stations"].isna().sum())

print(mobility_dimension["workplaces"].isna().sum())

print(mobility_dimension["residential"].isna().sum())


0
0
0
0
0
0


In [44]:
mobility_dimension.to_csv("dimensions/mobility_data_dimension_dated.csv", index=False)

In [45]:
mobility_dimension_nodate = mobility_dimension.drop(columns='date', inplace=False)
mobility_dimension_nodate.to_csv("dimensions/mobility_data_dimension.csv", index=False)

In [46]:
mobility_dimension_nodate.dtypes

mobility_key               int32
province                  object
subregion                 object
retail_and_recreation    float64
grocery_and_pharmacy     float64
parks                    float64
transit_stations         float64
workplaces               float64
residential              float64
dtype: object