In [47]:
import pandas as pd

In [239]:
# pythonically retrieving stores csv
pop_2010 = pd.read_csv('pop_2010.csv')
pop_2011 = pd.read_csv('pop_2011.csv')
pop_2012 = pd.read_csv('pop_2012.csv')
pop_2013 = pd.read_csv('pop_2013.csv')
pop_2014 = pd.read_csv('pop_2014.csv')
pop_2015 = pd.read_csv('pop_2015.csv')
pop_2016 = pd.read_csv('pop_2016.csv')
pop_2017 = pd.read_csv('pop_2017.csv')
pop_2018 = pd.read_csv('pop_2018.csv')
pop_2019 = pd.read_csv('pop_2019.csv')
pop_2020 = pd.read_csv('pop_2020.csv')
pop_2021 = pd.read_csv('pop_2021.csv')

In [240]:
def pop_group(df):
    """
    Process and clean population data in DataFrame.

    Parameters:
    - df (pd.DataFrame): Input DataFrame containing population data.

    Returns:
    pd.DataFrame: Processed DataFrame with columns representing different age groups and demographic information.

    This function takes a DataFrame containing population data and performs the following tasks:
    1. Transpose the DataFrame and reset index.
    2. Rename columns for better readability.
    3. Clean up column values and remove unnecessary characters.
    4. Select relevant columns and handle duplicates.
    5. Convert numeric columns to integer and handle missing values.
    6. Calculate additional columns such as child_pop, adult_pop, young_adult_pop, and senior_pop.
    7. Return a cleaned and processed DataFrame with relevant demographic information.
    """
    df = df.T
    df.reset_index(inplace=True)
    df.columns = df.iloc[0]
    df = df.iloc[1:]
    df.rename(columns={'Label (Grouping)':'county',
                            '\xa0\xa0\xa0\xa0Total population': 'total_pop',
                            '\xa0\xa0\xa0\xa0Under 5 years': 'under_5',
                            '\xa0\xa0\xa0\xa05 to 9 years': '5-9',
                            '\xa0\xa0\xa0\xa010 to 14 years': '10-14',
                            '\xa0\xa0\xa0\xa015 to 19 years': '15-19',
                            '\xa0\xa0\xa0\xa020 to 24 years': '20-24',
                            '\xa0\xa0\xa0\xa025 to 34 years': '25-34',
                            '\xa0\xa0\xa0\xa035 to 44 years': '35-44',
                            '\xa0\xa0\xa0\xa045 to 54 years': '45-54',
                            '\xa0\xa0\xa0\xa055 to 59 years': '55-59',
                            '\xa0\xa0\xa0\xa060 to 64 years': '60-64',
                            '\xa0\xa0\xa0\xa065 to 74 years': '65-74',
                            '\xa0\xa0\xa0\xa075 to 84 years': '75-84',
                            '\xa0\xa0\xa0\xa085 years and over': '85+',
                            '\xa0\xa0\xa0\xa018 years and over': '18+',
                            '\xa0\xa0\xa0\xa065 years and over': '65+',}, inplace=True)
    df.county = df.county.str.replace('!!Estimate', '')
    cat = ['county', 'total_pop','under_5', '5-9', '10-14', '15-19', '20-24', '25-34', '35-44', '45-54',
           '55-59', '60-64', '65-74', '75-84', '85+', '65+', '18+']
    df = df[cat]
    df = df.loc[:, ~df.columns.duplicated()]
    for col in df.drop(columns = 'county'):
        df[col] = df[col].str.replace(',','')
    df = df.dropna()
    df = df.reset_index().drop(columns = 'index')
    num = df.drop(columns='county').astype(int)
    df = pd.concat([df.county, num], axis = 1)
    df['child_pop'] = (df['total_pop'] - df['18+'])
    df['adult_pop'] = (df['25-34'] + df['35-44'] + df['45-54'] + df['55-59'] + df['60-64'])
    df['young_adult_pop'] = (df['18+'] - df['25-34'] - df['35-44'] - df['45-54'] - df['55-59'] - df['60-64'] - df['65+'])
    df = df.rename(columns = {'65+': 'senior_pop'})
    pop = ['county', 'child_pop', 'young_adult_pop', 'adult_pop', 'senior_pop', 'total_pop']
    df = df[pop]
    return df

In [243]:
# test of function
pop_group(pop_2010)

Unnamed: 0,county,child_pop,young_adult_pop,adult_pop,senior_pop,total_pop
0,"Autauga County, Alabama",14725,4546,27824,6060,53155
1,"Baldwin County, Alabama",40974,13257,92570,28990,175791
2,"Barbour County, Alabama",6230,2590,15073,3806,27699
3,"Bibb County, Alabama",5298,2230,12270,2812,22610
4,"Blount County, Alabama",14048,4559,30031,8054,56692
...,...,...,...,...,...,...
3186,"Vega Baja Municipio, Puerto Rico",15950,5914,30556,8016,60436
3187,"Vieques Municipio, Puerto Rico",2311,899,4587,1516,9313
3188,"Villalba Municipio, Puerto Rico",7697,3156,12966,2746,26565
3189,"Yabucoa Municipio, Puerto Rico",9989,3968,19782,4648,38387


In [242]:
pop_group(pop_2011)

Unnamed: 0,county,child_pop,young_adult_pop,adult_pop,senior_pop,total_pop
0,"Autauga County, Alabama",14708,4655,28294,6287,53944
1,"Baldwin County, Alabama",41594,13479,94440,30010,179523
2,"Barbour County, Alabama",6133,2489,14998,3926,27546
3,"Bibb County, Alabama",5363,2092,12421,2870,22746
4,"Blount County, Alabama",14060,4607,30222,8251,57140
...,...,...,...,...,...,...
3184,"Vega Baja Municipio, Puerto Rico",15467,5860,30400,8268,59995
3185,"Vieques Municipio, Puerto Rico",2256,894,4612,1556,9318
3186,"Villalba Municipio, Puerto Rico",7398,3110,12941,2841,26290
3187,"Yabucoa Municipio, Puerto Rico",9701,3895,19725,4817,38138
