In [81]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 80)

In [71]:
def get_dataframe(name):
    
    file_name = f'Output/{name}.csv'
    return pd.read_csv(file_name)

dataset = get_dataframe('dataset_filtered')
dataset_ind = get_dataframe('individual record')
dataset_dis = get_dataframe('district record')

In [72]:
def get_template():
    '''Returns an empty dataframe with required columns'''
    
    columns = ['Day Id', 'Total Number of cases', 'Total Number of fatalities',
               'Total Number of cases(Females)', 'Total Number of cases(Males)',
               'Total Number of fatalities(Females)', 'Total Number of fatalities(Males)',
               
               'Age (1-10)', 'Age (11-20)', 'Age (21-30)', 'Age (31-40)', 'Age (41-50)', 
               'Age (51-60)', 'Age (61-70)', 'Age (71-80)', 'Age (81 and above)',
               
               'Age (1-10) Female', 'Age (11-20) Female', 'Age (21-30) Female', 'Age (31-40) Female', 'Age (41-50) Female', 
               'Age (51-60) Female', 'Age (61-70) Female', 'Age (71-80) Female', 'Age (81 and above) Female',
               
               'Age (1-10) Male', 'Age (11-20) Male', 'Age (21-30) Male', 'Age (31-40) Male', 'Age (41-50) Male', 
               'Age (51-60) Male', 'Age (61-70) Male', 'Age (71-80) Male', 'Age (81 and above) Male',
               
               'Age (1-10) Fatalities', 'Age (11-20) Fatalities', 'Age (21-30) Fatalities', 'Age (31-40) Fatalities', 'Age (41-50) Fatalities', 
               'Age (51-60) Fatalities', 'Age (61-70) Fatalities', 'Age (71-80) Fatalities', 'Age (81 and above) Fatalities',
               
               'Age (1-10) Female Fatalities', 'Age (11-20) Female Fatalities', 'Age (21-30) Female Fatalities', 'Age (31-40) Female Fatalities', 'Age (41-50) Female Fatalities', 
               'Age (51-60) Female Fatalities', 'Age (61-70) Female Fatalities', 'Age (71-80) Female Fatalities', 'Age (81 and above) Female Fatalities',
               
               'Age (1-10) Male Fatalities', 'Age (11-20) Male Fatalities', 'Age (21-30) Male Fatalities', 'Age (31-40) Male Fatalities', 'Age (41-50) Male Fatalities', 
               'Age (51-60) Male Fatalities', 'Age (61-70) Male Fatalities', 'Age (71-80) Male Fatalities', 'Age (81 and above) Male Fatalities',
                     
              ]
    
    return pd.DataFrame(columns=columns)

In [86]:
def each_row(day_from_first):
    df_total = dataset[dataset['Day Id'] == day_from_first]  # Required dataframe from full dataset
    df_ind = dataset_ind[dataset_ind['Day Id'] == day_from_first] # Required dataframe from district record dataset
    df_dis = dataset_dis[dataset_dis['Day Id'] == day_from_first] # Required dataframe from individual level dataset
    
    df_total_d = dataset[dataset['Status Day Id'] == day_from_first]
    df_ind_d = dataset_ind[dataset_ind['Status Day Id'] == day_from_first]
    df_dis_d = dataset_dis[dataset_dis['Status Day Id'] == day_from_first]
    
    def get_cases(df, gender=None):
        df = df[df['Current Status'] == 'Hospitalized']
        
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df['Num Cases'].sum()
    
    def get_fatalities(df, gender=None):
        df = df[df['Current Status'] == 'Deceased']
     
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df['Num Cases'].sum()
    
    def get_cases_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
        df = df[df['Current Status'] == 'Hospitalized']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]

        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]

    def get_fatalities_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
        df = df[df['Current Status'] == 'Deceased']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]
        
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]
        
    new_row = dict()
    new_row['Day Id'] = day_from_first
    new_row['Total Number of cases'] = get_cases(df_total)
    new_row['Total Number of cases(Males)'] = get_cases(df_ind, 'M')
    new_row['Total Number of cases(Females)'] = get_cases(df_ind, 'F')
    new_row['Total Number of fatalities'] = get_fatalities(df_total_d)
    new_row['Total Number of fatalities(Males)'] = get_fatalities(df_ind_d, 'M')
    new_row['Total Number of fatalities(Females)'] = get_fatalities(df_ind_d, 'F')
    
    for i in range(1, 81, 10):
        new_row[f'Age ({i}-{i+9})'] = get_cases_distribution(df_total, i, i+9)
        new_row[f'Age ({i}-{i+9}) Female'] = get_cases_distribution(df_ind, i, i+9, 'F')        
        new_row[f'Age ({i}-{i+9}) Male'] = get_cases_distribution(df_ind, i, i+9, 'M')
    
    new_row['Age (81 and above)'] = get_cases_distribution(df_total, 81, 120)
    new_row['Age (81 and above) Female'] = get_cases_distribution(df_ind, 81, 120, 'F')
    new_row['Age (81 and above) Male'] = get_cases_distribution(df_ind, 81, 120, 'M')
    
    
    for i in range(1, 81, 10):
        new_row[f'Age ({i}-{i+9}) Fatalities'] = get_fatalities_distribution(df_total_d, i, i+9)
        new_row[f'Age ({i}-{i+9}) Female Fatalities'] = get_fatalities_distribution(df_ind_d, i, i+9, 'F')        
        new_row[f'Age ({i}-{i+9}) Male Fatalities'] = get_fatalities_distribution(df_ind_d, i, i+9, 'M')
    
    new_row['Age (81 and above) Fatalities'] = get_fatalities_distribution(df_total_d, 81, 120)
    new_row['Age (81 and above) Female Fatalities'] = get_fatalities_distribution(df_ind_d, 81, 120, 'F')
    new_row['Age (81 and above) Male Fatalities'] = get_fatalities_distribution(df_ind_d, 81, 120, 'M')
 

    return new_row
    
    
# each_row(100)

In [88]:
def append_row():
    day_id = dataset['Day Id'].unique()

    processed_df = get_template()
    for day in day_id:
        processed_df = processed_df.append(each_row(day), ignore_index=True, sort=False)
    display(processed_df)
    
    return processed_df
    
combined_df = append_row()

Unnamed: 0,Day Id,Total Number of cases,Total Number of fatalities,Total Number of cases(Females),Total Number of cases(Males),Total Number of fatalities(Females),Total Number of fatalities(Males),Age (1-10),Age (11-20),Age (21-30),Age (31-40),Age (41-50),Age (51-60),Age (61-70),Age (71-80),Age (81 and above),Age (1-10) Female,Age (11-20) Female,Age (21-30) Female,Age (31-40) Female,Age (41-50) Female,Age (51-60) Female,Age (61-70) Female,Age (71-80) Female,Age (81 and above) Female,Age (1-10) Male,Age (11-20) Male,Age (21-30) Male,Age (31-40) Male,Age (41-50) Male,Age (51-60) Male,Age (61-70) Male,Age (71-80) Male,Age (81 and above) Male,Age (1-10) Fatalities,Age (11-20) Fatalities,Age (21-30) Fatalities,Age (31-40) Fatalities,Age (41-50) Fatalities,Age (51-60) Fatalities,Age (61-70) Fatalities,Age (71-80) Fatalities,Age (81 and above) Fatalities,Age (1-10) Female Fatalities,Age (11-20) Female Fatalities,Age (21-30) Female Fatalities,Age (31-40) Female Fatalities,Age (41-50) Female Fatalities,Age (51-60) Female Fatalities,Age (61-70) Female Fatalities,Age (71-80) Female Fatalities,Age (81 and above) Female Fatalities,Age (1-10) Male Fatalities,Age (11-20) Male Fatalities,Age (21-30) Male Fatalities,Age (31-40) Male Fatalities,Age (41-50) Male Fatalities,Age (51-60) Male Fatalities,Age (61-70) Male Fatalities,Age (71-80) Male Fatalities,Age (81 and above) Male Fatalities
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,472.0,59852.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
472,473.0,60600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
473,474.0,60721.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
474,475.0,58615.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [89]:
def save_to_csv(df, name):
    path = f'Output/{name}.csv'
    df.to_csv(path, index=False)

save_to_csv(combined_df, 'summary')