In [1]:
import sys

sys.executable.split('/')[-3]

'mobility_venv'

In [2]:
import datetime
import json
import os
import warnings

import numpy as np
import pandas as pd
from tqdm import tqdm

warnings.filterwarnings('ignore')
tqdm.pandas()
pd.options.display.max_columns = None

## Load google mobility reports data

In [3]:
google_data = '../../data/external/google_mobility_reports'
reports_dir = '11-05-2021'
report_2020_file = '2020_PL_Region_Mobility_Report.csv'
report_2021_file = '2021_PL_Region_Mobility_Report.csv'

data_2020 = pd.read_csv(os.path.join(google_data, reports_dir, report_2020_file))
data_2021 = pd.read_csv(os.path.join(google_data, reports_dir, report_2021_file))

## Clear data

In [4]:
# delete unnecessary columns
drop_cols = ['country_region_code', 'country_region', 'metro_area', 'census_fips_code']

data_2020.drop(drop_cols, axis=1, inplace=True)
data_2021.drop(drop_cols, axis=1, inplace=True)

# map long column names
data_2020.columns = [col.replace('_percent_change_from_baseline','') for col in data_2020.columns.to_list()]
data_2021.columns = [col.replace('_percent_change_from_baseline','') for col in data_2021.columns.to_list()]

# convert 'date' column from string to datetime format
data_2020['date'] = pd.to_datetime(data_2020['date'])
data_2021['date'] = pd.to_datetime(data_2021['date'])

# column iso_3166_2_code -> 16 unique el + NaN = 17 | codes for the names of the principal subdivisions (kody województw)
#   handy: 'PL-DS'
# column sub_region_1 -> 16 unique el + NaN = 17 | provinces (województwa)
#   handy: 'Lower Silesian Voivodeship'
# column sub_region_2 -> 380 unique el + NaN = 381 | e.g.: 'Aleksandrów County', 'Augustów County', 'Bartoszyce County', ...
#   handy: 'Wrocław County', 'wrocławski'

In [5]:
data_2020.head()

Unnamed: 0,sub_region_1,sub_region_2,iso_3166_2_code,place_id,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
0,,,,ChIJuwtkpGSZAEcR6lXMScpzdQk,2020-02-15,7.0,-1.0,26.0,4.0,0.0,-1.0
1,,,,ChIJuwtkpGSZAEcR6lXMScpzdQk,2020-02-16,12.0,-13.0,18.0,6.0,-2.0,0.0
2,,,,ChIJuwtkpGSZAEcR6lXMScpzdQk,2020-02-17,6.0,1.0,20.0,1.0,1.0,0.0
3,,,,ChIJuwtkpGSZAEcR6lXMScpzdQk,2020-02-18,3.0,-1.0,13.0,-1.0,1.0,1.0
4,,,,ChIJuwtkpGSZAEcR6lXMScpzdQk,2020-02-19,5.0,0.0,13.0,-1.0,1.0,0.0


In [6]:
unique_sub_region_2 = data_2020['sub_region_2'].unique().tolist()
unique_sub_region_2 = sorted(([sub_region for sub_region in unique_sub_region_2 if str(sub_region) != 'nan']))

[sub_region for sub_region in unique_sub_region_2 if 'wrocław' in sub_region.lower()]

['Inowrocław County', 'Wrocław County', 'wrocławski']

In [7]:
data_2020[
    data_2020['sub_region_1'] == 'Lower Silesian Voivodeship'
].shape[0] # ['iso_3166_2_code'].unique()

9630

In [8]:
data_2020[
    (data_2020['sub_region_2'] == 'Wrocław County')
    # | (data_2020['sub_region_2'] == 'wrocławski')
].shape[0] # ['iso_3166_2_code'].unique()

321

In [9]:
data_2020[
    data_2020['iso_3166_2_code'] == 'PL-DS'
].shape[0] # ['sub_region_1'].unique()

321

### Focus on sub_region_2 == 'Wrocław County'

In [10]:
date_start = pd.to_datetime(datetime.date(2020, 6, 15))
date_end = pd.to_datetime(datetime.date(2020, 7, 15))

date_start = pd.to_datetime('2020-06-15')
date_end = pd.to_datetime('2020-07-15')

data = data_2020[
    (data_2020['sub_region_2'] == 'Wrocław County') & 
    (data_2020['date'] >= date_start) & 
    (data_2020['date'] <= date_end) 
]

In [11]:
# drop weekends
data['is_weekend'] = np.where(data['date'].dt.dayofweek < 5, 0, 1) # 0: Monday, ..., 6: Sunday0

data = data[
    (data['is_weekend'] == 0)
]
del data['is_weekend']

In [12]:
print(data.info())
data.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23 entries, 41848 to 41878
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   sub_region_1           23 non-null     object        
 1   sub_region_2           23 non-null     object        
 2   iso_3166_2_code        0 non-null      object        
 3   place_id               23 non-null     object        
 4   date                   23 non-null     datetime64[ns]
 5   retail_and_recreation  23 non-null     float64       
 6   grocery_and_pharmacy   23 non-null     float64       
 7   parks                  23 non-null     float64       
 8   transit_stations       23 non-null     float64       
 9   workplaces             23 non-null     float64       
 10  residential            23 non-null     float64       
dtypes: datetime64[ns](1), float64(6), object(4)
memory usage: 2.2+ KB
None


Unnamed: 0,sub_region_1,sub_region_2,iso_3166_2_code,place_id,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
41848,Lower Silesian Voivodeship,Wrocław County,,ChIJv4q11MLpD0cRwFAALURV1YE,2020-06-15,-16.0,-14.0,65.0,-42.0,-34.0,7.0
41849,Lower Silesian Voivodeship,Wrocław County,,ChIJv4q11MLpD0cRwFAALURV1YE,2020-06-16,-15.0,-17.0,98.0,-41.0,-34.0,9.0
41850,Lower Silesian Voivodeship,Wrocław County,,ChIJv4q11MLpD0cRwFAALURV1YE,2020-06-17,-16.0,-17.0,105.0,-39.0,-33.0,7.0
41851,Lower Silesian Voivodeship,Wrocław County,,ChIJv4q11MLpD0cRwFAALURV1YE,2020-06-18,-20.0,-16.0,35.0,-42.0,-35.0,9.0
41852,Lower Silesian Voivodeship,Wrocław County,,ChIJv4q11MLpD0cRwFAALURV1YE,2020-06-19,-26.0,-19.0,-20.0,-44.0,-35.0,10.0


In [13]:
data.describe()

Unnamed: 0,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
count,23.0,23.0,23.0,23.0,23.0,23.0
mean,-14.869565,-17.782609,66.869565,-38.0,-34.73913,6.869565
std,4.836456,2.859697,40.895993,3.316625,1.214211,1.841549
min,-27.0,-26.0,-26.0,-45.0,-37.0,4.0
25%,-16.0,-19.0,47.5,-40.0,-35.5,5.5
50%,-14.0,-17.0,73.0,-38.0,-35.0,7.0
75%,-12.0,-16.0,92.0,-36.0,-34.0,7.5
max,-7.0,-14.0,130.0,-33.0,-33.0,11.0


In [14]:
def save_object(obj, name, out_dir='out'):
    file_name = name if name.endswith('.json') else name + '.json'
    file_path = os.path.join(out_dir, file_name)
    with open(file_path, 'w') as f:
        json.dump(obj, f)

In [15]:
means_dict = dict(data.describe().loc['mean'])
means_dict

{'retail_and_recreation': -14.869565217391305,
 'grocery_and_pharmacy': -17.782608695652176,
 'parks': 66.8695652173913,
 'transit_stations': -38.0,
 'workplaces': -34.73913043478261,
 'residential': 6.869565217391305}

In [16]:
out_dir = '../../data/processed/mc/pandemic_mobility'

if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [17]:
save_object(
    obj=means_dict, 
    name='mobility_change', 
    out_dir=out_dir
)