In [1]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 80)
pd.set_option('display.max_rows', 1000)

In [6]:
def get_dataframe(name):
    
    file_name = f'Output/{name}.csv'
    return pd.read_csv(file_name)

dataset_ind = get_dataframe('individual record')
dataset = get_dataframe('dataset_filtered')

In [19]:
def get_template_extended():
    '''Returns an empty dataframe with required columns'''
    
    columns = ['State', 'District','Total Number of cases', 'Total Number of fatalities',
               'Total Number of cases(Females)', 'Total Number of cases(Males)',
               'Total Number of fatalities(Females)', 'Total Number of fatalities(Males)',
               
               'Age (1-10)', 'Age (11-20)', 'Age (21-30)', 'Age (31-40)', 'Age (41-50)', 
               'Age (51-60)', 'Age (61-70)', 'Age (71-80)', 'Age (81 and above)',
               
               'Age (1-10) Female', 'Age (11-20) Female', 'Age (21-30) Female', 'Age (31-40) Female', 'Age (41-50) Female', 
               'Age (51-60) Female', 'Age (61-70) Female', 'Age (71-80) Female', 'Age (81 and above) Female',
               
               'Age (1-10) Male', 'Age (11-20) Male', 'Age (21-30) Male', 'Age (31-40) Male', 'Age (41-50) Male', 
               'Age (51-60) Male', 'Age (61-70) Male', 'Age (71-80) Male', 'Age (81 and above) Male',
               
               'Age (1-10) Fatalities', 'Age (11-20) Fatalities', 'Age (21-30) Fatalities', 'Age (31-40) Fatalities', 'Age (41-50) Fatalities', 
               'Age (51-60) Fatalities', 'Age (61-70) Fatalities', 'Age (71-80) Fatalities', 'Age (81 and above) Fatalities',
               
               'Age (1-10) Female Fatalities', 'Age (11-20) Female Fatalities', 'Age (21-30) Female Fatalities', 'Age (31-40) Female Fatalities', 'Age (41-50) Female Fatalities', 
               'Age (51-60) Female Fatalities', 'Age (61-70) Female Fatalities', 'Age (71-80) Female Fatalities', 'Age (81 and above) Female Fatalities',
               
               'Age (1-10) Male Fatalities', 'Age (11-20) Male Fatalities', 'Age (21-30) Male Fatalities', 'Age (31-40) Male Fatalities', 'Age (41-50) Male Fatalities', 
               'Age (51-60) Male Fatalities', 'Age (61-70) Male Fatalities', 'Age (71-80) Male Fatalities', 'Age (81 and above) Male Fatalities',
                     
              ]
    
    return pd.DataFrame(columns=columns)

In [20]:
def get_row_extended(state, district):

    df_ind = dataset_ind[dataset_ind['Detected District'] == district]
    df_total = dataset[dataset['Detected District'] == district]
#     display(df_total)
    
    def get_cases(df, gender=None):

        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df['Num Cases'].sum()

    def get_fatalities(df, gender=None):
        df = df[df['Current Status'] == 'Deceased']
     
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df['Num Cases'].sum()
    
    def get_cases_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
#         df = df[df['Current Status'] == 'Hospitalized']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]

        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]

    def get_fatalities_distribution(df, lower, upper, gender=None):
        lower = str(lower); upper = str(upper)
        df = df[df['Current Status'] == 'Deceased']
        df['Age Bracket'] = df['Age Bracket'].apply(str)
        df = df[(df['Age Bracket'] >= lower) & (df['Age Bracket'] <= upper)]
        
        if gender:
            return df[df['Gender'] == gender].shape[0]
        return df.shape[0]
        
    new_row = dict()
    new_row['State'] = state
    new_row['District'] = district
    new_row['Total Number of cases'] = get_cases(df_total)
    new_row['Total Number of cases(Males)'] = get_cases(df_ind, 'M')
    new_row['Total Number of cases(Females)'] = get_cases(df_ind, 'F')
    new_row['Total Number of fatalities'] = get_fatalities(df_total)
    new_row['Total Number of fatalities(Males)'] = get_fatalities(df_ind, 'M')
    new_row['Total Number of fatalities(Females)'] = get_fatalities(df_ind, 'F')
    
    for i in range(1, 81, 10):
        new_row[f'Age ({i}-{i+9})'] = get_cases_distribution(df_total, i, i+9)
        new_row[f'Age ({i}-{i+9}) Female'] = get_cases_distribution(df_ind, i, i+9, 'F')        
        new_row[f'Age ({i}-{i+9}) Male'] = get_cases_distribution(df_ind, i, i+9, 'M')
    
    new_row['Age (81 and above)'] = get_cases_distribution(df_total, 81, 120)
    new_row['Age (81 and above) Female'] = get_cases_distribution(df_ind, 81, 120, 'F')
    new_row['Age (81 and above) Male'] = get_cases_distribution(df_ind, 81, 120, 'M')
    
    
    for i in range(1, 81, 10):
        new_row[f'Age ({i}-{i+9}) Fatalities'] = get_fatalities_distribution(df_total, i, i+9)
        new_row[f'Age ({i}-{i+9}) Female Fatalities'] = get_fatalities_distribution(df_ind, i, i+9, 'F')        
        new_row[f'Age ({i}-{i+9}) Male Fatalities'] = get_fatalities_distribution(df_ind, i, i+9, 'M')
    
    new_row['Age (81 and above) Fatalities'] = get_fatalities_distribution(df_total, 81, 120)
    new_row['Age (81 and above) Female Fatalities'] = get_fatalities_distribution(df_ind, 81, 120, 'F')
    new_row['Age (81 and above) Male Fatalities'] = get_fatalities_distribution(df_ind, 81, 120, 'M')
 
    return new_row
    
get_row_extended('Karnataka', 'Udupi')

{'State': 'Karnataka',
 'District': 'Udupi',
 'Total Number of cases': 117778.0,
 'Total Number of cases(Males)': 1511,
 'Total Number of cases(Females)': 794,
 'Total Number of fatalities': 335.0,
 'Total Number of fatalities(Males)': 73,
 'Total Number of fatalities(Females)': 24,
 'Age (1-10)': 34,
 'Age (1-10) Female': 16,
 'Age (1-10) Male': 18,
 'Age (11-20)': 171,
 'Age (11-20) Female': 76,
 'Age (11-20) Male': 95,
 'Age (21-30)': 408,
 'Age (21-30) Female': 164,
 'Age (21-30) Male': 244,
 'Age (31-40)': 580,
 'Age (31-40) Female': 193,
 'Age (31-40) Male': 387,
 'Age (41-50)': 469,
 'Age (41-50) Female': 117,
 'Age (41-50) Male': 352,
 'Age (51-60)': 257,
 'Age (51-60) Female': 82,
 'Age (51-60) Male': 175,
 'Age (61-70)': 165,
 'Age (61-70) Female': 49,
 'Age (61-70) Male': 116,
 'Age (71-80)': 63,
 'Age (71-80) Female': 26,
 'Age (71-80) Male': 37,
 'Age (81 and above)': 0,
 'Age (81 and above) Female': 0,
 'Age (81 and above) Male': 0,
 'Age (1-10) Fatalities': 0,
 'Age (1-1

In [26]:
def add_rows(state):
    districts = dataset[dataset['Detected State'] == state]['Detected District'].unique()
    processed_df = get_template_extended()
    for district in districts:
        processed_df = processed_df.append(get_row_extended(state, district), ignore_index=True, sort=False)

    display(processed_df)
    return processed_df
    
processed_df = add_rows('Tamil Nadu')

Unnamed: 0,State,District,Total Number of cases,Total Number of fatalities,Total Number of cases(Females),Total Number of cases(Males),Total Number of fatalities(Females),Total Number of fatalities(Males),Age (1-10),Age (11-20),Age (21-30),Age (31-40),Age (41-50),Age (51-60),Age (61-70),Age (71-80),Age (81 and above),Age (1-10) Female,Age (11-20) Female,Age (21-30) Female,Age (31-40) Female,Age (41-50) Female,Age (51-60) Female,Age (61-70) Female,Age (71-80) Female,Age (81 and above) Female,Age (1-10) Male,Age (11-20) Male,Age (21-30) Male,Age (31-40) Male,Age (41-50) Male,Age (51-60) Male,Age (61-70) Male,Age (71-80) Male,Age (81 and above) Male,Age (1-10) Fatalities,Age (11-20) Fatalities,Age (21-30) Fatalities,Age (31-40) Fatalities,Age (41-50) Fatalities,Age (51-60) Fatalities,Age (61-70) Fatalities,Age (71-80) Fatalities,Age (81 and above) Fatalities,Age (1-10) Female Fatalities,Age (11-20) Female Fatalities,Age (21-30) Female Fatalities,Age (31-40) Female Fatalities,Age (41-50) Female Fatalities,Age (51-60) Female Fatalities,Age (61-70) Female Fatalities,Age (71-80) Female Fatalities,Age (81 and above) Female Fatalities,Age (1-10) Male Fatalities,Age (11-20) Male Fatalities,Age (21-30) Male Fatalities,Age (31-40) Male Fatalities,Age (41-50) Male Fatalities,Age (51-60) Male Fatalities,Age (61-70) Male Fatalities,Age (71-80) Male Fatalities,Age (81 and above) Male Fatalities
0,Tamil Nadu,Kancheepuram,128651.0,1030.0,136,230,0,3,3,17,81,87,44,41,17,9,0,2,7,34,22,17,12,10,2,0,1,10,47,65,27,29,7,7,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0
1,Tamil Nadu,Chennai,977536.0,7517.0,5071,7829,55,115,61,944,2563,2442,2158,1722,877,423,0,26,412,1026,889,767,643,350,163,0,31,514,1478,1506,1351,1050,514,254,0,0,0,3,8,22,38,41,31,0,0,0,1,2,5,17,14,6,0,0,0,2,6,17,21,27,25,0
2,Tamil Nadu,Erode,146053.0,494.0,18,35,0,0,1,0,3,1,4,3,4,1,0,0,0,1,1,0,2,0,0,0,1,0,2,0,4,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Tamil Nadu,Coimbatore,376990.0,1753.0,22,66,0,0,0,0,4,16,11,4,1,0,0,0,0,1,2,3,0,0,0,0,0,0,3,14,8,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Tamil Nadu,Tirunelveli,87137.0,395.0,139,195,0,0,1,40,52,52,52,30,13,10,0,0,21,20,23,21,7,4,5,0,1,19,32,29,31,23,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Tamil Nadu,Tiruppur,144182.0,621.0,3,15,0,0,0,0,1,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Tamil Nadu,Madurai,129354.0,1004.0,82,151,1,1,1,15,53,37,42,23,4,5,0,0,6,23,14,12,8,2,3,0,1,9,30,22,29,14,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
7,Tamil Nadu,Chengalpattu,279879.0,2186.0,406,651,2,5,6,65,228,221,147,109,73,32,0,5,28,109,66,51,47,22,12,0,1,37,118,153,96,61,51,20,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,2,0
8,Tamil Nadu,Salem,151164.0,1220.0,50,113,0,0,0,14,32,46,22,20,8,1,0,0,5,10,15,5,8,1,0,0,0,9,22,31,17,12,7,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Tamil Nadu,Ranipet,73775.0,583.0,14,63,0,1,0,1,10,15,9,5,2,0,0,0,0,4,0,3,0,0,0,0,0,1,5,15,5,5,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [27]:
def save_to_csv(df, name):
    path = f'Output/{name}.csv'
    df.to_csv(path, index=False)

save_to_csv(processed_df, 'TamilNadu_district_wise')