In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

#path = "../Input_Data/Region_Mobility_Report_CSVs/2020_IT_Region_Mobility_Report.csv"
#subsetting a dataframe according to its unique feature values
def subset_df_feature(df, 
                      feature):
    """
    df: input dataframe
    feature: feature of df for which unique values are computed
    
    return DICT_df_feature 

    > DICT_df_feature is a dictionary which assigns each unique feature value to the portion of the dataframe having that feature value
    """
    
    feature_values = df[feature].unique()
    
    DICT_df_feature = {f: df[df[feature] == f] for f in feature_values}
    
    return DICT_df_feature

def compare_sets(a,b):
    
    ab_common = np.intersect1d(a,b)
    a_diff_b  = np.setdiff1d(a,b)
    b_diff_a  = np.setdiff1d(b,a)
    
    DICT_comp = {'ab_common' : ab_common,
                 'a_diff_b' : a_diff_b, 
                 'b_diff_a' : b_diff_a}
    return DICT_comp

In [8]:
df_mer = pd.read_csv('../Input_Data/ts_mob_covid_daily_residential.csv', index_col = 0)

#Consider only Spain and France
df_mer = df_mer[ df_mer.Country != 'Austria' ]

#Remove Lozere province 
prov = 'Lozère'
df_mer = df_mer[ df_mer['prov'] != prov]

#subset dataset by province time-series
DICT_prov = subset_df_feature(df_mer, 'prov')
Provs = list(DICT_prov.keys())


df_mer_new = []

for p in Provs: 
    
    #select time-series of single province 
    ts_p = DICT_prov[p]    
    #perform linear interpolation on residential time-series
    ts_p = ts_p.interpolate()
    
    df_mer_new.append(ts_p)
    
df_mer_new = pd.concat(df_mer_new, axis=0)

path_new = '../Input_Data/ts_mob_covid_daily_residential_interpolated.csv'
df_mer_new.to_csv(path_new)