In [5]:
import numpy as np
import pandas as pd
import plotly.express as px

#path = "../Input_Data/Region_Mobility_Report_CSVs/2020_IT_Region_Mobility_Report.csv"
#subsetting a dataframe according to its unique feature values
def subset_df_feature(df, 
                      feature):
    """
    df: input dataframe
    feature: feature of df for which unique values are computed
    
    return DICT_df_feature 

    > DICT_df_feature is a dictionary which assigns each unique feature value to the portion of the dataframe having that feature value
    """
    
    feature_values = df[feature].unique()
    
    DICT_df_feature = {f: df[df[feature] == f] for f in feature_values}
    
    return DICT_df_feature

def compare_sets(a,b):
    
    ab_common = np.intersect1d(a,b)
    a_diff_b  = np.setdiff1d(a,b)
    b_diff_a  = np.setdiff1d(b,a)
    
    DICT_comp = {'ab_common' : ab_common,
                 'a_diff_b' : a_diff_b, 
                 'b_diff_a' : b_diff_a}
    return DICT_comp

__IMPORT GLOBAL CSV__

In [3]:
path_glob = "../Input_Data/Global_Mobility_Report.csv.gz"
df_glob = pd.read_csv(path_glob, index_col=0)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
df_glob.head()

Unnamed: 0_level_0,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,place_id,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
country_region_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-15,0.0,4.0,5.0,0.0,2.0,1.0
AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-16,1.0,4.0,4.0,1.0,2.0,1.0
AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-17,-1.0,1.0,5.0,1.0,2.0,1.0
AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-18,-2.0,1.0,5.0,0.0,2.0,1.0
AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-19,-2.0,0.0,4.0,-1.0,2.0,1.0


__CREATING RESIDENTIAL TIME-SERIES__

In [4]:
Countries = ['Austria', 'France', 'Spain']

cols = ['sub_region_2', 
        'date',
        'residential_percent_change_from_baseline']

Study_period = ['2020-09-01', '2021-07-20']

DICT_c = {}

for c in Countries: 
    
    #records for c country
    df_c = df_glob[ df_glob.country_region == c]
    #records at 2nd level administrative resolution
    df_c = df_c[~df_c.sub_region_2.isna()][cols]
    df_c = df_c[ (df_c.date >= Study_period[0]) & (df_c.date <= Study_period[1]) ]
    df_c.columns = ['prov', 'date', 'residential']
    
    Date_range = pd.date_range(Study_period[0], Study_period[1], freq = "D").astype(str)
    Provs = df_c.prov.unique()
    
    df_c.set_index('date', inplace = True)
    
    #extending date indexes 
    DICT_p = subset_df_feature(df_c, 'prov')
    DICT_p = {p: DICT_p[p].reindex(Date_range) for p in Provs}
    
    #time-series framing of data 
    ts_c = pd.concat([DICT_p[p].drop('prov', axis=1).rename(columns = {'residential':p}) for p in Provs], axis=1)
    DICT_c[c] = ts_c

#RENAME PROVINCES OF AUSTRIA (remove District and add Stadt string) 
c = "Austria"
Provs_c = DICT_c[c].columns
Provs_c_rename = []
for p in Provs_c:
    if 'District' in p:
        Provs_c_rename.append(p[:-9])
    else:
        Provs_c_rename.append(p + " (Stadt)") 
DICT_c["Austria"].columns = Provs_c_rename      

__RENAMING PROVINCE NAMES OF RESIDENTIAL TIME-SERIES__

In [6]:
DICT_rename_provs = {}
DICT_rename_provs['Spain'] = {'Biscay': 'Bizkaia', 
                              'Girona': 'Gerona', 
                              #'Las Palmas': '', 
                              'Lleida': 'Lérida', 
                              'Province of Ourense': 'Orense',
                              #'Santa Cruz de Tenerife': '', 
                              'Seville':'Sevilla', 
                              'Álava':'Araba/Álava'} 

DICT_rename_provs['France'] = {'Ariege': 'Ariège', 
                               'Bouches-du-Rhone': 'Bouches-du-Rhône', 
                               'Correze': 'Corrèze', 
                               #'Corse-du-Sud': '',
                               'Finistere': 'Finistère', 
                               #'Haute-Corse': '', 
                               'Isere': 'Isère', 
                               'Puy-de-Dome': 'Puy-de-Dôme'}

DICT_rename_provs['Austria'] = {'East Tyrol (Stadt)':'', 
                                'Innsbruck (Stadt)':'Innsbruck-Stadt', 
                                'Klagenfurt (Stadt)':'Klagenfurt am Wörthersee (Stadt)',
                                'Klagenfurt-Land':'Klagenfurt Land', 
                                'Krems-Land':'Krems (Land)', 
                                'Leibnitz (Stadt)':'Leibnitz',
                                'St. Johann im Pongau':'Sankt Johann im Pongau', 
                                'Südoststeiermark (Stadt)':'Südoststeiermark',
                                'Villach-Land (Stadt)':'Villach Land', 
                                'Wiener Neustadt-Land':'Wiener Neustadt (Land)'}

for c in Countries:

    #Residential time-series
    ts_c = DICT_c[c]
    
    #column Provinces of the time-series 
    Provs = ts_c.columns
    
    #renaming the Provinces
    DICT_rename = DICT_rename_provs[c]
    
    New_Prov_names = []
    for p in Provs:
        if p not in DICT_rename.keys():
            New_Prov_names.append(p)
        else:
            New_Prov_names.append(DICT_rename[p])
    
    ts_c.columns = New_Prov_names
    DICT_c[c] = ts_c

__IMPORT OLD META TIME-SERIES__

In [7]:
df_old = pd.read_csv('../Input_Data/ts_mob_covid_daily.csv', index_col = 0)
Countries = ['Austria', 'France', 'Spain']
DICT_old_c = {c : df_old[df_old.Country == c] for c in Countries}

__COMPARE PROVINCE NAMES OF GOOGLE AND META DATA__

In [12]:
c = "France"
ts_c_google = DICT_c[c]
ts_c_meta   = DICT_old_c[c] 

provs_google = ts_c_google.columns 
provs_meta = ts_c_meta.prov.unique()

DICT_compare = compare_sets(provs_google, provs_meta)

print(c + ' Selected provinces: \n')
print(DICT_compare['ab_common'])

print('\n'+ c + ' neglected provinces from epidemiologic data: \n')
print(DICT_compare['b_diff_a'])

France Selected provinces: 

['Ain' 'Aisne' 'Allier' 'Alpes-Maritimes' 'Alpes-de-Haute-Provence'
 'Ardennes' 'Ardèche' 'Ariège' 'Aube' 'Aude' 'Aveyron' 'Bas-Rhin'
 'Bouches-du-Rhône' 'Calvados' 'Cantal' 'Charente' 'Charente-Maritime'
 'Cher' 'Corrèze' 'Creuse' "Côte-d'Or" "Côtes-d'Armor" 'Deux-Sèvres'
 'Dordogne' 'Doubs' 'Drôme' 'Essonne' 'Eure' 'Eure-et-Loir' 'Finistère'
 'Gard' 'Gers' 'Gironde' 'Haut-Rhin' 'Haute-Garonne' 'Haute-Loire'
 'Haute-Marne' 'Haute-Savoie' 'Haute-Saône' 'Haute-Vienne' 'Hautes-Alpes'
 'Hautes-Pyrénées' 'Hauts-de-Seine' 'Hérault' 'Ille-et-Vilaine' 'Indre'
 'Indre-et-Loire' 'Isère' 'Jura' 'Landes' 'Loir-et-Cher' 'Loire'
 'Loire-Atlantique' 'Loiret' 'Lot' 'Lot-et-Garonne' 'Lozère'
 'Maine-et-Loire' 'Manche' 'Marne' 'Mayenne' 'Meurthe-et-Moselle' 'Meuse'
 'Morbihan' 'Moselle' 'Nièvre' 'Nord' 'Oise' 'Orne' 'Paris'
 'Pas-de-Calais' 'Puy-de-Dôme' 'Pyrénées-Atlantiques'
 'Pyrénées-Orientales' 'Rhône' 'Sarthe' 'Savoie' 'Saône-et-Loire'
 'Seine-Maritime' 'Seine-Saint-D

__JOINING THE TIME-SERIES__

In [24]:
df_meta_epid_resid = [] 

Countries = ['Austria', 'France', 'Spain']

for c in Countries:

    ts_c_google = DICT_c[c]
    ts_c_meta   = DICT_old_c[c] 
    
    provs_google = ts_c_google.columns 
    provs_meta = ts_c_meta.prov.unique()
    
    DICT_compare = compare_sets(provs_google, provs_meta)
    
    Common_provs = DICT_compare['ab_common']
    
    ts_residential = ts_c_google[Common_provs]
    ts_epid_meta = ts_c_meta[ ts_c_meta.prov.isin(Common_provs) ]
    
    DICT_epid_meta_prov = subset_df_feature(ts_epid_meta, 'prov')
    
    df_new = []
    for p in Common_provs:
        ts_p = DICT_epid_meta_prov[p]
        ts_p["residential"] = ts_residential[p].values
        df_new.append(ts_p)
    
    df_new = pd.concat(df_new, axis = 0)
    
    df_meta_epid_resid.append(df_new)
    
df_meta_epid_resid = pd.concat(df_meta_epid_resid, axis = 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ts_p["residential"] = ts_residential[p].values


In [26]:
#df_meta_epid_resid.to_csv('../Input_Data/ts_mob_covid_daily_residential.csv')