In [15]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 80)
pd.set_option('display.max_rows', 1000)

In [16]:
def get_dataframe(name):
    
    file_name = f'Output/{name}.csv'
    return pd.read_csv(file_name)

dataset_ind = get_dataframe('individual record')

In [17]:
def get_template_extended():
    '''Returns an empty dataframe with required columns'''
    
    columns = ['State', 'District',
               
               'Age (1-10)', 'Age (11-20)', 'Age (21-30)', 'Age (31-40)', 'Age (41-50)', 
               'Age (51-60)', 'Age (61-70)', 'Age (71-80)', 'Age (81 and above)',
               
               'Age (1-10) Fatalities', 'Age (11-20) Fatalities', 'Age (21-30) Fatalities', 'Age (31-40) Fatalities', 'Age (41-50) Fatalities', 
               'Age (51-60) Fatalities', 'Age (61-70) Fatalities', 'Age (71-80) Fatalities', 'Age (81 and above) Fatalities',  
       
              ]
    
    return pd.DataFrame(columns=columns)

In [28]:
def get_row_extended(state, district):

    df_ind = dataset_ind[dataset_ind['Detected District'] == district]

    def get_cases_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
#         df = df[df['Current Status'] == 'Hospitalized']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]

        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]

    def get_fatalities_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
        df = df[df['Current Status'] == 'Deceased']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]
        
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]
        
    new_row = dict()
    new_row['State'] = state
    new_row['District'] = district
    
    for i in range(1, 81, 10):

        tempf = get_cases_distribution(df_ind, i, i+9, 'F')    
        tempm = get_cases_distribution(df_ind, i, i+9, 'M')
        new_row[f'Age ({i}-{i+9})'] = tempm/tempf if tempf != 0 else -1
    

    tempf = get_cases_distribution(df_ind, 81, 120, 'F')    
    tempm = get_cases_distribution(df_ind, 81, 120, 'M')
    new_row['Age (81 and above)'] = tempm/tempf if tempf != 0 else -1
    
    for i in range(1, 81, 10):
        tempf = get_fatalities_distribution(df_ind, i, i+9, 'F')        
        tempm = get_fatalities_distribution(df_ind, i, i+9, 'M')
        new_row[f'Age ({i}-{i+9}) Fatalities'] = tempm/tempf if tempf != 0 else -1

    
    tempf = get_fatalities_distribution(df_ind, 81, 120, 'F')
    tempm = get_fatalities_distribution(df_ind, 81, 120, 'M')
    new_row['Age (81 and above) Fatalities'] = tempm/tempf if tempf != 0 else -1


    return new_row
    
# get_row_extended('Karnataka', 'Udupi')

In [30]:
def add_rows():
    states = dataset_ind['Detected State'].unique()
    processed_df = get_template_extended()
    for state in states:
        
        districts = dataset_ind[dataset_ind['Detected State'] == state]['Detected District'].unique()
        
        for district in districts:
            processed_df = processed_df.append(get_row_extended(state, district), ignore_index=True, sort=False)
#         print(state, districts)
    display(processed_df)
    return processed_df
    
processed_df = add_rows()
# add_rows()

Unnamed: 0,State,District,Age (1-10),Age (11-20),Age (21-30),Age (31-40),Age (41-50),Age (51-60),Age (61-70),Age (71-80),Age (81 and above),Age (1-10) Fatalities,Age (11-20) Fatalities,Age (21-30) Fatalities,Age (31-40) Fatalities,Age (41-50) Fatalities,Age (51-60) Fatalities,Age (61-70) Fatalities,Age (71-80) Fatalities,Age (81 and above) Fatalities
0,Kerala,Alappuzha,-1.0,0.0,-1.0,1.2,3.555556,1.625,1.521739,1.679245,-1,-1.0,0.0,-1.0,1.2,3.555556,1.625,1.507246,1.679245,-1
1,Kerala,Kasaragod,0.0,2.5,4.0,2.6,6.5,2.857143,2.818182,1.666667,-1,0.0,-1.0,-1.0,1.333333,3.0,3.75,2.636364,2.142857,-1
2,Kerala,Pathanamthitta,-1.0,0.0,0.666667,1.6,2.333333,1.1,2.0,1.392857,-1,-1.0,-1.0,0.5,1.25,2.714286,1.176471,2.105263,1.392857,-1
3,Kerala,Kannur,-1.0,-1.0,3.0,1.444444,3.666667,1.896552,2.0,2.263158,-1,-1.0,-1.0,1.5,1.0,3.333333,1.857143,1.979592,2.297297,-1
4,Kerala,Ernakulam,-1.0,1.0,2.5,1.3,2.588235,2.042553,1.77027,1.742857,-1,-1.0,0.0,2.0,1.222222,2.588235,2.086957,1.794521,1.742857,-1
5,Kerala,Kottayam,-1.0,0.0,1.333333,1.2,1.909091,2.4,1.705882,1.055556,-1,-1.0,-1.0,-1.0,1.0,1.727273,2.526316,1.757576,1.055556,-1
6,Kerala,Thrissur,-1.0,-1.0,-1.0,4.0,1.76,1.865385,2.455882,1.773333,-1,-1.0,-1.0,-1.0,3.833333,1.76,1.846154,2.455882,1.773333,-1
7,Kerala,Thiruvananthapuram,-1.0,0.0,1.714286,1.692308,1.787879,1.315436,1.392473,1.506173,-1,-1.0,0.0,1.714286,1.692308,1.787879,1.315436,1.392473,1.506173,-1
8,Kerala,Idukki,-1.0,-1.0,-1.0,0.5,6.0,1.333333,0.692308,1.0,-1,-1.0,-1.0,-1.0,0.0,5.0,1.333333,0.727273,1.0,-1
9,Kerala,Malappuram,1.0,-1.0,2.0,2.0,3.0,1.388889,1.516129,1.66,-1,1.0,-1.0,3.0,1.571429,3.0,1.388889,1.516129,1.66,-1


In [32]:
def save_to_csv(df, name):
    path = f'Output/{name}.csv'
    df.to_csv(path, index=False)

# save_to_csv(processed_df, 'State-District age group ratio')