In [1]:
%run __init__.ipynb

Successfully connected to MongoDB


In [2]:
%%time

#--- Check if the data are already updated
def init_script():
    stop = False
    file_path = os.path.join(OUTPUT_FOLDER,'WORLD_DB_TREND.csv')
    if path.exists(file_path):
        df_check = pd.read_csv(file_path,
                               sep=';',
                               low_memory=False)
        
        date_check = df_check[df_check['SCENARIO'] == YESTERDAY.strftime('%d/%m/%Y')]
        if len(date_check) == 0:
            %run "covid-19_world.ipynb"
        else:
            raise Exception("Script already updated !")
    else:
        %run "covid-19_world.ipynb"
    
init_script()

Exception: Script already updated !

In [3]:
%%time

#--- Load data source
db_conso = get_datasource('WORLD_DB_CONSO')
db_all = get_datasource('WORLD_DB_ALL')
db_trend = get_datasource('WORLD_DB_TREND')

CPU times: user 5.89 s, sys: 1.18 s, total: 7.06 s
Wall time: 7.38 s


In [4]:
def df_to_csv_mongo(df,df_name):
    if not df.empty:
        # Save in CSV
        df_save(df, df_name,'csv')
        
        # Save in MongoDB
        if USE_MONGO:
            naas_drivers.mongo.send(df, df_name, DB_APP, True)
    else:
        print(f'DataFrame {df_name} is empty !')

In [5]:
%%time

#Dashboard value dynamic template
def dashboard_value(df, kpi, domain_num):
    filters = ['WORLDWIDE','Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    domain = df[(df['ENTITY_GROUPS'].isin(filters)) & (df['KPI'].isin([kpi]))]
    domain = pd.DataFrame({
        'ENTITY': domain['ENTITY'],
        'ENTITY_GROUPS': domain['ENTITY_GROUPS'],
        'SCENARIO': domain['SCENARIO'],
        'DATE_ORDER': domain['DATE_ORDER'],
        'LABEL': domain['KPI'],
        'VALUE': domain['VALUE'],
        'VALUE_D-1': domain['VALUE_D-1'],
        'VAR': domain['VARV'],
        'VARP': domain['VARP'],
        'UNIT_VALUE': domain['UNIT_VALUE'],
        'UNIT_VAR': domain['UNIT_VAR'],
        'UNIT_VARP': domain['UNIT_VARP'],
        'PRECISION_VALUE': domain['PRECISION_VALUE'],
        'PRECISION_VAR': domain['PRECISION_VAR'],
        'PRECISION_VARP': domain['PRECISION_VARP'],
        'LAST_UPDATE': datetime.now().strftime('%d/%m/%Y %H:%M:%S'),
    })
    domain = domain.sort_values(by=['ENTITY','SCENARIO'],ascending=False)
    domain = optimize(domain, ['LAST_UPDATE'])
    df_to_csv_mongo(domain,domain_num)
    print(DB_APP, domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("ENTITY", DESCENDING),("SCENARIO", DESCENDING)])
    return domain

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.44 µs


In [6]:
#Duplicate to rank countries WORLDWIDE
def get_ww(df): 
    filters = ['Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    df_ww = df.copy()[(df['ENTITY'].isin(filters))] 
    df_ww['ENTITY'] = "WORLDWIDE"           
    df = df.append(df_ww)
    return df

In [7]:
#Dashboard leaderboard template
def dashboard_leaderboard(df,kpi,domain_num):
    domain = df[(df['KPI'].isin([kpi]))]
    domain = pd.DataFrame({
        'ENTITY': domain['ENTITY_GROUPS'],
        'SCENARIO': domain['SCENARIO'],
        'DATE_ORDER': domain['DATE_ORDER'],
        'LABEL': domain['ENTITY'],
        'VALUE': domain['VALUE'],
        'VALUE_D-1': domain['VALUE_D-1'],
        'VAR': domain['VARV'],
        'VARP': domain['VARP'],
        'UNIT_VALUE': domain['UNIT_VALUE'],
        'UNIT_VAR': domain['UNIT_VAR'],
        'UNIT_VARP': domain['UNIT_VARP'],
        'PRECISION_VALUE': domain['PRECISION_VALUE'],
        'PRECISION_VAR': domain['PRECISION_VAR'],
        'PRECISION_VARP': domain['PRECISION_VARP'],
        'LAST_UPDATE': datetime.now().strftime('%d/%m/%Y %H:%M:%S'),
    })
    domain = domain.sort_values(by=['ENTITY','SCENARIO'],ascending=False)
    if domain_num == '010' or domain_num == '011' or domain_num == '012':
        domain = get_ww(domain)
    df_to_csv_mongo(domain,domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("ENTITY", DESCENDING),("SCENARIO", DESCENDING)])
    return domain

In [8]:
%%time

#-- Date selector : ENTITIES
def get_entities(df):
    cols_to_rename = {"ENTITY": "ENTITIES", "ENTITY_GROUPS": "GROUPS"}
    filters = ['WORLDWIDE','Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    df = df[df['ENTITY_GROUPS'].isin(filters)].rename(index=str, columns=cols_to_rename)[['ENTITIES','GROUPS']].drop_duplicates().sort_values(by="ENTITIES", ascending=True).reset_index(drop=True)
    df['ORDER'] = df.index + 8
    df.loc[df['ENTITIES'] == 'WORLDWIDE', 'ORDER'] = 0
    df.loc[df['ENTITIES'] == 'Europe', 'ORDER'] = 1
    df.loc[df['ENTITIES'] == 'Asia', 'ORDER'] = 2
    df.loc[df['ENTITIES'] == 'Africa', 'ORDER'] = 3
    df.loc[df['ENTITIES'] == 'North America', 'ORDER'] = 4
    df.loc[df['ENTITIES'] == 'South America', 'ORDER'] = 5
    df.loc[df['ENTITIES'] == 'Oceania', 'ORDER'] = 6
    df.loc[df['ENTITIES'] == 'Other', 'ORDER'] = 7
    df.loc[df['ENTITIES'] == 'To be affected', 'ORDER'] = 8
    
    df.loc[df['ENTITIES'] == 'WORLDWIDE', 'ORDER_GROUPS'] = 0
    df.loc[df['ENTITIES'] == 'Europe', 'ORDER_GROUPS'] = 1
    df.loc[df['ENTITIES'] == 'Asia', 'ORDER_GROUPS'] = 2
    df.loc[df['ENTITIES'] == 'Africa', 'ORDER_GROUPS'] = 3
    df.loc[df['ENTITIES'] == 'North America', 'ORDER_GROUPS'] = 4
    df.loc[df['ENTITIES'] == 'South America', 'ORDER_GROUPS'] = 5
    df.loc[df['ENTITIES'] == 'Oceania', 'ORDER_GROUPS'] = 6
    df.loc[df['ENTITIES'] == 'Other', 'ORDER_GROUPS'] = 7
    df.loc[df['ENTITIES'] == 'To be affected', 'ORDER_GROUPS'] = 8

    df.loc[df['GROUPS'] == 'Europe', 'ORDER_GROUPS'] = 1
    df.loc[df['GROUPS'] == 'Asia', 'ORDER_GROUPS'] = 2
    df.loc[df['GROUPS'] == 'Africa', 'ORDER_GROUPS'] = 3
    df.loc[df['GROUPS'] == 'North America', 'ORDER_GROUPS'] = 4
    df.loc[df['GROUPS'] == 'South America', 'ORDER_GROUPS'] = 5
    df.loc[df['GROUPS'] == 'Oceania', 'ORDER_GROUPS'] = 6
    df.loc[df['GROUPS'] == 'Other', 'ORDER_GROUPS'] = 7
    df.loc[df['GROUPS'] == 'To be affected', 'ORDER_GROUPS'] = 8
    
    df.loc[df['GROUPS'] == 'WORLDWIDE','ENTITY_DISPLAY'] = df['ENTITIES']
    df.loc[df['GROUPS'] != 'WORLDWIDE','ENTITY_DISPLAY'] = '--- ' + df['ENTITIES']
    return df.sort_values(by=['ORDER_GROUPS','ORDER'], ascending=True)

entities = get_entities(db_conso)
df_to_csv_mongo(entities,'ENTITIES')
# entities

ENTITIES successfully saved in ENTITIES
Dataframe ENTITIES successfully save in database app-wsr in MongoDB. Time: --- 0.04246807098388672 secnds ---
CPU times: user 67.6 ms, sys: 0 ns, total: 67.6 ms
Wall time: 143 ms


In [9]:
%%time

#-- Date selector : SCENARIOS
def get_scenarios(df):
    df = df[['SCENARIO','DATE_ORDER']].drop_duplicates()
    df['SCENARIO_DIPLAY'] = pd.to_datetime(df['SCENARIO'], format='%d/%m/%Y').dt.strftime('%d %b %Y')
    df['SCENARIO_TODAY'] = (pd.to_datetime(df['SCENARIO'], format='%d/%m/%Y') + timedelta(days = 1)).dt.strftime('%d/%m/%Y')
    df['SCENARIO_TODAY_DIPLAY'] = pd.to_datetime(df['SCENARIO_TODAY'], format='%d/%m/%Y').dt.strftime('%d %b %Y')
    return df.sort_values(by='DATE_ORDER', ascending=False)

scenarios = get_scenarios(db_all)
df_to_csv_mongo(scenarios,'SCENARIOS')
# scenarios

SCENARIOS successfully saved in SCENARIOS
Dataframe SCENARIOS successfully save in database app-wsr in MongoDB. Time: --- 0.06773948669433594 secnds ---
CPU times: user 110 ms, sys: 18.8 ms, total: 129 ms
Wall time: 233 ms


In [10]:
%%time

#--- DASHBOARD ---
#-- VALUE DYNAMIC KPIS
domain001 = dashboard_value(db_all,'Confirmed','001')
domain002 = dashboard_value(db_all,'Deaths','002')
domain003 = dashboard_value(db_all,'Recovered','003')
domain004 = dashboard_value(db_all,'Active cases','004')
domain005 = dashboard_value(db_all,"Fatality Rate",'005')
domain006 = dashboard_value(db_all,"Recovery Rate",'006')

001 successfully saved in 001
Dataframe 001 successfully save in database app-wsr in MongoDB. Time: --- 2.7387585639953613 secnds ---
app-wsr 001
002 successfully saved in 002
Dataframe 002 successfully save in database app-wsr in MongoDB. Time: --- 2.751988172531128 secnds ---
app-wsr 002
003 successfully saved in 003
Dataframe 003 successfully save in database app-wsr in MongoDB. Time: --- 2.7404401302337646 secnds ---
app-wsr 003
004 successfully saved in 004
Dataframe 004 successfully save in database app-wsr in MongoDB. Time: --- 2.633456230163574 secnds ---
app-wsr 004
005 successfully saved in 005
Dataframe 005 successfully save in database app-wsr in MongoDB. Time: --- 2.846132516860962 secnds ---
app-wsr 005
006 successfully saved in 006
Dataframe 006 successfully save in database app-wsr in MongoDB. Time: --- 2.6973443031311035 secnds ---
app-wsr 006
CPU times: user 14.4 s, sys: 268 ms, total: 14.7 s
Wall time: 22.9 s


In [11]:
%%time

#-- LEADERBOARD BY COUNTRIES
db_c = db_all[(db_all['ENTITY_GROUPS'] != 'WORLDWIDE')]
domain010 = dashboard_leaderboard(db_c,'Confirmed','010')
domain011 = dashboard_leaderboard(db_c,'Deaths','011')
domain012 = dashboard_leaderboard(db_c,'Recovered','012')

010 successfully saved in 010
Dataframe 010 successfully save in database app-wsr in MongoDB. Time: --- 43.1256742477417 secnds ---
011 successfully saved in 011
Dataframe 011 successfully save in database app-wsr in MongoDB. Time: --- 42.9902560710907 secnds ---
012 successfully saved in 012
Dataframe 012 successfully save in database app-wsr in MongoDB. Time: --- 43.507179260253906 secnds ---
CPU times: user 42.5 s, sys: 2.16 s, total: 44.6 s
Wall time: 2min 18s


In [12]:
%%time

#-- Covid-19 - TRENDS
def domain_101(df,domain_num):
#     filters = ['Confirmed','Deaths','Recovered','Active cases','Deaths ratio','Recovered ratio']
    filters = ['Confirmed','Deaths','Recovered','Active cases']
    domain = df[df['KPI'].isin(filters)]
    domain = pd.DataFrame({
        'ENTITY': domain['ENTITY'],
        'ENTITY_GROUPS': domain['ENTITY_GROUPS'],
        'SCENARIO': domain['SCENARIO'],
        'DATE_ORDER': domain['DATE_ORDER'],
        'GROUPS': domain['KPI'],
        'BOTTOM_FILTER': domain['DATE_SCENARIO'],
        'UPPER_FILTER_M': domain['METRIC'],
        'VALUE': domain['VALUE'],
        'UNIT_VALUE': domain['UNIT'],
        'PRECISION': domain['PRECISION'],
        'LAST_UPDATE': datetime.now().strftime('%d/%m/%Y %H:%M:%S'),
    })

    #Deconfinement France
    groups_name = 'Deconfinement'
    df_france = domain.copy()[(domain['ENTITY'] == 'France') & (domain['GROUPS'] == 'Confirmed') & (domain['UPPER_FILTER_M'] == 'VARV')]
    df_france.loc[:,'GROUPS'] = groups_name
    df_france.loc[:,'VALUE'] = 3000
    domain = pd.concat([domain,df_france],axis=0)
    
    #Rename upper filter
    domain.loc[domain['UPPER_FILTER_M'] == 'VALUE','UPPER_FILTER_M'] = 'EVOLUTION'
    domain.loc[(domain['UPPER_FILTER_M'] == 'VARV') & (domain['GROUPS'].isin(['Confirmed','Deaths','Recovered','Active cases',groups_name])),'UPPER_FILTER_M'] = 'DAILY VARIATION (CASES)'
#     domain.loc[(domain['UPPER_FILTER_M'] == 'VARV') & (domain['GROUPS'].isin(['Deaths ratio','Recovered ratio'])),'UPPER_FILTER_M'] = 'DAILY VARIATION (PTS)'
    domain.loc[(domain['UPPER_FILTER_M'] == 'VARP') & (domain['GROUPS'].isin(['Confirmed','Deaths','Recovered','Active cases',groups_name])),'UPPER_FILTER_M'] = 'DAILY VARIATION (%)'
#     domain.loc[(domain['UPPER_FILTER_M'] == 'VARP') & (domain['GROUPS'].isin(['Deaths ratio','Recovered ratio'])),'UPPER_FILTER_M'] = ''
    
    df_to_csv_mongo(domain,domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("ENTITY", DESCENDING), ("UPPER_FILTER_M", DESCENDING), ("BOTTOM_FILTER", DESCENDING)])
    domain = optimize(domain, ['LAST_UPDATE'])
    return domain

domain101 = domain_101(db_trend,'101')
# domain101

101 successfully saved in 101
Dataframe 101 successfully save in database app-wsr in MongoDB. Time: --- 123.76988315582275 secnds ---
CPU times: user 1min 8s, sys: 2.87 s, total: 1min 11s
Wall time: 2min 32s


In [16]:
db_trend.info()
domain101.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3145824 entries, 0 to 3145823
Data columns (total 12 columns):
 #   Column         Dtype  
---  ------         -----  
 0   ENTITY_GROUPS  object 
 1   ENTITY         object 
 2   SCENARIO       object 
 3   DATE           object 
 4   DATE_ORDER     int64  
 5   KPI            object 
 6   METRIC         object 
 7   VALUE          float64
 8   DATE_SCENARIO  object 
 9   KPI_ORDER      float64
 10  UNIT           object 
 11  PRECISION      object 
dtypes: float64(2), int64(1), object(9)
memory usage: 288.0+ MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2097878 entries, 0 to 3139751
Data columns (total 11 columns):
 #   Column          Dtype         
---  ------          -----         
 0   ENTITY          category      
 1   ENTITY_GROUPS   category      
 2   SCENARIO        category      
 3   DATE_ORDER      int32         
 4   GROUPS          category      
 5   BOTTOM_FILTER   category      
 6   UPPER_FILTER_M  category 

In [17]:
domain101

Unnamed: 0,ENTITY,ENTITY_GROUPS,SCENARIO,DATE_ORDER,GROUPS,BOTTOM_FILTER,UPPER_FILTER_M,VALUE,UNIT_VALUE,PRECISION,LAST_UPDATE
0,0,0,22/01/2020,20200122,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16
1,0,0,23/01/2020,20200123,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16
2,0,0,24/01/2020,20200124,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16
3,0,0,25/01/2020,20200125,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16
4,0,0,26/01/2020,20200126,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16
...,...,...,...,...,...,...,...,...,...,...,...
3120743,France,France,07/10/2020,20201007,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16
3130209,France,Europe,06/10/2020,20201006,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16
3130247,France,France,06/10/2020,20201006,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16
3139713,France,Europe,05/10/2020,20201005,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16


In [19]:
%%time

#-- Covid-19 - TRENDS BY COUNTRIES
def domain_102(df,domain_num):
    domain = df.copy()
    max_date = domain['DATE_ORDER'].max()
    
    filters =  ['Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    countries_df = domain[(domain['ENTITY_GROUPS'].isin(filters)) & (domain['DATE_ORDER'] == max_date) & (domain['GROUPS'] == 'Confirmed') & (domain['BOTTOM_FILTER'] == 'Since beginning')].sort_values(by= 'VALUE', ascending=False).drop_duplicates().reset_index(drop=True)[:10]#['ENTITY'].drop_duplicates().values.tolist()[:10]
    countries_df['ENTITY_ORDER'] = countries_df.index
    countries_df = countries_df[['ENTITY','ENTITY_ORDER']]
    
    countries = countries_df['ENTITY'].values.tolist()
    
    domain = domain[(domain['ENTITY_GROUPS'] != "WORLDWIDE") & (domain['ENTITY'].isin(countries))]
    domain = pd.merge(domain,countries_df, on=['ENTITY'], how='left')

    df_to_csv_mongo(domain,domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("GROUPS", ASCENDING), ("UPPER_FILTER_M", DESCENDING), ("BOTTOM_FILTER", DESCENDING)])
    domain = optimize(domain, ['LAST_UPDATE'])
    return domain
    
domain_102(domain101,'102')
# domain102 = domain_102(domain101,'102')
# domain102

102 successfully saved in 102
Dataframe 102 successfully save in database app-wsr in MongoDB. Time: --- 3.488053798675537 secnds ---
CPU times: user 2.42 s, sys: 32.2 ms, total: 2.46 s
Wall time: 5.89 s


Unnamed: 0,ENTITY,ENTITY_GROUPS,SCENARIO,DATE_ORDER,GROUPS,BOTTOM_FILTER,UPPER_FILTER_M,VALUE,UNIT_VALUE,PRECISION,LAST_UPDATE,ENTITY_ORDER
0,Argentina,Argentina,22/01/2020,20200122,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16,6
1,Argentina,Argentina,23/01/2020,20200123,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16,6
2,Argentina,Argentina,24/01/2020,20200124,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16,6
3,Argentina,Argentina,25/01/2020,20200125,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16,6
4,Argentina,Argentina,26/01/2020,20200126,Confirmed,Since beginning,EVOLUTION,0.0,cases,",.0f",2020-04-11 11:56:16,6
...,...,...,...,...,...,...,...,...,...,...,...,...
80097,France,France,07/10/2020,20201007,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16,4
80098,France,Europe,06/10/2020,20201006,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16,4
80099,France,France,06/10/2020,20201006,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16,4
80100,France,Europe,05/10/2020,20201005,Deconfinement,Last 30 days,DAILY VARIATION (CASES),3000.0,cases,"+,.0f",2020-04-11 11:56:16,4


In [20]:
%%time

#-- Covid-19 - RANKING BY COUNTRIES
#-> Referentials
ref_continent = pd.read_excel(REF_WSR_PATH, sheet_name='REF_CONTINENT')
def domain_103(df,ref,domain_num):
    domain = df.copy()
    cols_to_rename = {"LABEL": "KPI","ENTITY": "LABEL", "ENTITY_GROUPS":"ENTITY", "VARV":"VAR"}
    domain = get_ww(domain.drop(['DATE','UNIT_VAR','UNIT_VARP','PRECISION_VALUE','PRECISION_VAR','PRECISION_VARP'],axis=1).rename(index=str, columns=cols_to_rename).reset_index(drop=True))
    domain = pd.merge(domain, ref, left_on=['LABEL'], right_on=['COUNTRY_REGION'], how='left').drop(['CONTINENT','COUNTRY_REGION'],axis=1).reset_index(drop=True)

    df_to_csv_mongo(domain,domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("ENTITY", DESCENDING), ("SCENARIO", DESCENDING), ("LABEL", DESCENDING), ("KPI", ASCENDING), ("VALUE", DESCENDING)])
    domain = optimize(domain, ['LAST_UPDATE'])
    return domain

domain_103(db_all,ref_continent,'103')
# domain103 = domain_103(db_all,ref_continent,'103')
# domain103

103 successfully saved in 103
Dataframe 103 successfully save in database app-wsr in MongoDB. Time: --- 105.39439129829407 secnds ---
CPU times: user 50.9 s, sys: 1.84 s, total: 52.7 s
Wall time: 2min 8s


Unnamed: 0,ENTITY,LABEL,KPI,VALUE,VALUE_D-1,VAR,VARP,SCENARIO,DATE_ORDER,UNIT_VALUE,WORLDMAP,WORLDMAP_CS,CONTINENT_CS
0,0,0,Confirmed,0.000000,0.000000,0.000000,,22/01/2020,20200122,cases,,,
1,0,0,Confirmed,0.000000,0.000000,0.000000,,23/01/2020,20200123,cases,,,
2,0,0,Confirmed,0.000000,0.000000,0.000000,,24/01/2020,20200124,cases,,,
3,0,0,Confirmed,0.000000,0.000000,0.000000,,25/01/2020,20200125,cases,,,
4,0,0,Confirmed,0.000000,0.000000,0.000000,,26/01/2020,20200126,cases,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238113,WORLDWIDE,Solomon Islands,Recovery Rate,50.000000,50.000000,0.000000,,30/10/2020,20201030,%,,,
1238114,WORLDWIDE,Solomon Islands,Recovery Rate,50.000000,50.000000,0.000000,,31/10/2020,20201031,%,,,
1238115,WORLDWIDE,Solomon Islands,Recovery Rate,50.000000,50.000000,0.000000,,01/11/2020,20201101,%,,,
1238116,WORLDWIDE,Solomon Islands,Recovery Rate,30.769231,50.000000,-19.230769,,02/11/2020,20201102,%,,,


In [21]:
%%time

#-- Covid-19 - DAILY TREND
def domain_106(df,domain_num):
    #-- Exclude Rates & Date
    date_list = df['DATE_ORDER'].drop_duplicates().sort_values(ascending=False).values.tolist()[:15]
    df = df[(~df['KPI'].isin(['Fatality Rate',"Recovery Rate"])) &
            (df['DATE_ORDER'].isin(date_list))]
    
    #-- Calculate Value
    filters = ['WORLDWIDE','Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    domain_val = df.copy()[['ENTITY_GROUPS','ENTITY','SCENARIO','KPI','VALUE','VALUE_D-1']][df['ENTITY_GROUPS'].isin(filters)]#[df['ENTITY_GROUPS'] == 'WORLDWIDE']
    cols_to_keep = ['ENTITY_GROUPS','ENTITY','SCENARIO','KPI']
    domain_val = domain_val.melt(id_vars=cols_to_keep, value_name='VALUE',var_name='METRIC')
    domain_val['ENTITY_GROUPS'] = domain_val['ENTITY']
    domain_val.loc[domain_val['METRIC'] == 'VALUE', 'LABEL_ORDER'] = 99999999
    domain_val.loc[domain_val['METRIC'] == 'VALUE_D-1', 'LABEL_ORDER'] = 0
    domain_val.loc[domain_val['METRIC'] == 'VALUE', 'METRIC'] = domain_val['SCENARIO']
    domain_val.loc[domain_val['METRIC'] == 'VALUE_D-1', 'METRIC'] = (pd.to_datetime(domain_val['SCENARIO'], format='%d/%m/%Y') + timedelta(days=-1)).dt.strftime('%d/%m/%Y')
    
    cols_to_rename = {'METRIC':'LABEL', 'ENTITY_GROUPS': 'ENTITY','KPI':'UPPER_FILTER_R'}
    domain_val = domain_val.drop('ENTITY',axis=1).rename(index=str, columns=cols_to_rename)
    domain_val['PRECISION'] = ',.0f'

    #--Variation
    #filters = ['Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    domain_var = df.copy()[['ENTITY_GROUPS','ENTITY','SCENARIO','KPI','VARV','VARP']][df['ENTITY_GROUPS']!='WORLDWIDE']#[df['ENTITY_GROUPS'].isin(filters)]
    cols_to_rename = {'VARV':'VALUE', 'VARP': 'VAR'}
    domain_var = domain_var.rename(index=str, columns=cols_to_rename).reset_index(drop=True)
    domain_var = domain_var[domain_var['VALUE'] !=0 ]
    
    #-- Get the first 10
    filters = domain_var[['ENTITY_GROUPS','SCENARIO','KPI']].drop_duplicates().values.tolist()
    domain_w = pd.DataFrame()
    for f in filters:
        tmp_df = domain_var[(domain_var['ENTITY_GROUPS'] == f[0]) & (domain_var['SCENARIO'] == f[1]) & (domain_var['KPI'] == f[2])].sort_values(by=["VALUE","ENTITY"],ascending=False).reset_index(drop=True)
        tmp_df['LABEL_ORDER'] = tmp_df.index.astype(int) + 1
        if len(tmp_df) > 9:
            tmp_df.loc[tmp_df.index > 9, 'ENTITY'] = 'Others'
            tmp_df.loc[tmp_df.index > 9, 'LABEL_ORDER'] = 11
            cols_to_group = ['ENTITY_GROUPS','ENTITY','SCENARIO','KPI','LABEL_ORDER']
            tmp_df = tmp_df.groupby(cols_to_group, as_index=False).agg({'VALUE':'sum'}).reset_index(drop=True)
        tmp_df['PRECISION'] = '+,.0f'
        domain_w = domain_w.append(tmp_df)
        
    cols_to_rename = {'ENTITY':'LABEL', 'ENTITY_GROUPS': 'ENTITY','KPI':'UPPER_FILTER_R'}
    domain_w = domain_w.rename(index=str, columns=cols_to_rename)
    domain_w['GROUPS'] = domain_w['LABEL']
    
    filters = ['Europe','Asia','Africa','North America','South America','Oceania','Other','To be affected']
    cols_to_rename = {'LABEL':'GROUPS', 'ENTITY':'LABEL'}
    domain_w2 = domain_w.copy()[domain_w['ENTITY'].isin(filters)].drop('GROUPS',axis=1).rename(index=str, columns=cols_to_rename)
    domain_w2['ENTITY'] = 'WORLDWIDE'
    
    domain = pd.concat([domain_val,domain_w,domain_w2],axis=0)

    #-- Save df
    df_to_csv_mongo(domain,domain_num)
    df_collection = MONGO_DB[DB_APP][domain_num]
    df_collection.create_index([("ENTITY", DESCENDING), ("SCENARIO", DESCENDING), ("UPPER_FILTER_R", ASCENDING)])
    domain = optimize(domain, ['LAST_UPDATE'])
    return domain

domain_106(db_all, '106')
# domain106 = domain_106(db_all, '106')
# domain106



106 successfully saved in 106
Dataframe 106 successfully save in database app-wsr in MongoDB. Time: --- 70.77073812484741 secnds ---
CPU times: user 1min 22s, sys: 1.27 s, total: 1min 23s
Wall time: 2min 24s


Unnamed: 0,ENTITY,SCENARIO,UPPER_FILTER_R,LABEL,VALUE,LABEL_ORDER,PRECISION,VAR,GROUPS
0,Algeria,20/10/2020,Confirmed,20/10/2020,54829.0,99999999.0,",.0f",,
1,Algeria,21/10/2020,Confirmed,21/10/2020,55081.0,99999999.0,",.0f",,
2,Algeria,22/10/2020,Confirmed,22/10/2020,55357.0,99999999.0,",.0f",,
3,Algeria,23/10/2020,Confirmed,23/10/2020,55630.0,99999999.0,",.0f",,
4,Algeria,24/10/2020,Confirmed,24/10/2020,55880.0,99999999.0,",.0f",,
...,...,...,...,...,...,...,...,...,...
0,WORLDWIDE,27/10/2020,Active cases,To be affected,4.0,1.0,"+,.0f",,Solomon Islands
1,WORLDWIDE,27/10/2020,Active cases,To be affected,4.0,2.0,"+,.0f",0.432432,Lesotho
0,WORLDWIDE,02/11/2020,Active cases,To be affected,5.0,1.0,"+,.0f",125.000000,Solomon Islands
1,WORLDWIDE,02/11/2020,Active cases,To be affected,-5.0,2.0,"+,.0f",-0.535332,Lesotho


In [22]:
%run "map-chart_us.ipynb"

Successfully connected to MongoDB
Dataframe 108 successfully save in database app-wsr in MongoDB. Time: --- 0.10523819923400879 secnds ---
US_MAP successfully saved in US_MAP
CPU times: user 1.07 s, sys: 11.3 ms, total: 1.08 s
Wall time: 1.33 s
Confirmed : Min: 96, Max: 950302,Average: 170590.0, AvrMax: 0.18,Med: 114709.0, MedMax: 0.12
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Confirmed Rate : Min: 174.0, Max: 6192.0,Average: 2757.0, AvrMax: 0.45,Med: 2734.0, MedMax: 0.44
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Deaths : Min: 2, Max: 33543,Average: 4229.0, AvrMax: 0.13,Med: 2003.0, MedMax: 0.06
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Deaths Rate : Min: 4.0, Max: 184.0,Average: 58.0, AvrMax: 0.32,Med: 49.0, MedMax: 0.27
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Fatality Rate : Min: 0.48, Max: 6.74,Average: 2.0, AvrMax: 0.3,Med: 2.0, MedMax: 0.3
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Dataframe 109 successfully save in database app-wsr in MongoDB. Time: --- 0.0246121883392334 secnds ---


In [23]:
%run "map-chart_world.ipynb"

Successfully connected to MongoDB
2020-11-03
            ENTITY            LABEL            KPI      VALUE  VALUE_D-1  \
0                0                0      Confirmed   0.000000   0.000000   
1                0                0      Confirmed   0.000000   0.000000   
2                0                0      Confirmed   0.000000   0.000000   
3                0                0      Confirmed   0.000000   0.000000   
4                0                0      Confirmed   0.000000   0.000000   
...            ...              ...            ...        ...        ...   
1238113  WORLDWIDE  Solomon Islands  Recovery Rate  50.000000  50.000000   
1238114  WORLDWIDE  Solomon Islands  Recovery Rate  50.000000  50.000000   
1238115  WORLDWIDE  Solomon Islands  Recovery Rate  50.000000  50.000000   
1238116  WORLDWIDE  Solomon Islands  Recovery Rate  30.769231  50.000000   
1238117  WORLDWIDE  Solomon Islands  Recovery Rate  30.769231  30.769231   

               VAR  VARP    SCENARIO  DATE

Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
VALUE : Min: 0.0, Max: 5444867.0,Average: 76348.0, AvrMax: 0.01,Med: 2709.0, MedMax: 0.0
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
VALUE : Min: 0.0, Max: 232620.0,Average: 6355.0, AvrMax: 0.03,Med: 315.0, MedMax: 0.0
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
VALUE : Min: 0.0, Max: 7656478.0,Average: 165493.0, AvrMax: 0.02,Med: 10962.0, MedMax: 0.0
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
VALUE : Min: 0.0, Max: 29.13,Average: 2.0, AvrMax: 0.07,Med: 2.0, MedMax: 0.07
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
VALUE : Min: 0.0, Max: 100.0,Average: 72.0, AvrMax: 0.72,Med: 82.0, MedMax: 0.82
👌 Well done! Your Assets has been sent to production.



Button(description='Copy URL', style=ButtonStyle())

Output()

PS: to remove the "Assets" feature, just replace .add by .delete
Dataframe 111 successfully save in database app-wsr in MongoDB. Time: --- 0.0236055850982666 secnds ---
