In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Data is pulled and initial cleaning is done

# Data is pulled from csv
raw_data_df = pd.read_csv("data/state_populations.csv")
raw_data_df

# Iterate through rows deleting periods and commas
for index, row in raw_data_df.iterrows():
    
    # Period is deleted from jurisdiction
    row[0] = row[0].replace(".", "")
    
    # Commas are deleted from numbers, which are then converted to integers
    row[1] = int(row[1].replace(",",""))
    row[2] = int(row[2].replace(",",""))
    row[3] = int(row[3].replace(",",""))
    row[4] = int(row[4].replace(",",""))
    row[5] = int(row[5].replace(",",""))
    row[6] = int(row[6].replace(",",""))
    row[7] = int(row[7].replace(",",""))
    
raw_data_df

Unnamed: 0,jurisdiction,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019,pop_2020
0,Alabama,4841799,4852347,4863525,4874486,4887681,4903185,4911278
1,Alaska,736283,737498,741456,739700,735139,731545,751328
2,Arizona,6730413,6829676,6941072,7044008,7158024,7278717,7268694
3,Arkansas,2967392,2978048,2989918,3001345,3009733,3017804,3038491
4,California,38596972,38918045,39167117,39358497,39461588,39512223,40438640
5,Colorado,5350101,5450623,5539215,5611885,5691287,5758736,5843359
6,Connecticut,3594524,3587122,3578141,3573297,3571520,3565287,3593542
7,Delaware,932487,941252,948921,956823,965479,973764,987393
8,District of Columbia,662328,675400,685815,694906,701547,705749,732552
9,Florida,19845911,20209042,20613477,20963613,21244317,21477737,21877257


In [3]:
# Jurisdictions with bad data are dropped

# Array of jurisdictions to drop
juri_to_drop = ["Connecticut", "North Carolina", "District of Columbia"]

# Dataframe for the jurisdictions to drop. Flip the boolean to get without the jurisdictions.
data_states = raw_data_df[raw_data_df['jurisdiction'].isin(juri_to_drop) == True]
data_states

Unnamed: 0,jurisdiction,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019,pop_2020
6,Connecticut,3594524,3587122,3578141,3573297,3571520,3565287,3593542
8,District of Columbia,662328,675400,685815,694906,701547,705749,732552
33,North Carolina,9932887,10031646,10154788,10268233,10381615,10488084,10568033


In [4]:
# Dataframe containing only desired states
wanted_juri_df = raw_data_df[raw_data_df['jurisdiction'].isin(juri_to_drop) == False]
wanted_juri_df

Unnamed: 0,jurisdiction,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019,pop_2020
0,Alabama,4841799,4852347,4863525,4874486,4887681,4903185,4911278
1,Alaska,736283,737498,741456,739700,735139,731545,751328
2,Arizona,6730413,6829676,6941072,7044008,7158024,7278717,7268694
3,Arkansas,2967392,2978048,2989918,3001345,3009733,3017804,3038491
4,California,38596972,38918045,39167117,39358497,39461588,39512223,40438640
5,Colorado,5350101,5450623,5539215,5611885,5691287,5758736,5843359
7,Delaware,932487,941252,948921,956823,965479,973764,987393
9,Florida,19845911,20209042,20613477,20963613,21244317,21477737,21877257
10,Georgia,10067278,10178447,10301890,10410330,10511131,10617423,10725351
11,Hawaii,1414538,1422052,1427559,1424393,1420593,1415872,1453902


In [5]:
# Dataframe for summed populations is crafted then appended to dataframe
summed_pop_df = pd.DataFrame([["United States (w/o dropped)", wanted_juri_df["pop_2014"].sum(),
                             wanted_juri_df["pop_2015"].sum(),
                             wanted_juri_df["pop_2016"].sum(),
                             wanted_juri_df["pop_2017"].sum(),
                             wanted_juri_df["pop_2018"].sum(),
                             wanted_juri_df["pop_2019"].sum(),
                             wanted_juri_df["pop_2020"].sum()]],
                             columns=["jurisdiction","pop_2014","pop_2015","pop_2016","pop_2017",
                                      "pop_2018","pop_2019","pop_2020",])

final_2014_2018_df = wanted_juri_df.append(summed_pop_df)
final_2014_2018_df

Unnamed: 0,jurisdiction,pop_2014,pop_2015,pop_2016,pop_2017,pop_2018,pop_2019,pop_2020
0,Alabama,4841799,4852347,4863525,4874486,4887681,4903185,4911278
1,Alaska,736283,737498,741456,739700,735139,731545,751328
2,Arizona,6730413,6829676,6941072,7044008,7158024,7278717,7268694
3,Arkansas,2967392,2978048,2989918,3001345,3009733,3017804,3038491
4,California,38596972,38918045,39167117,39358497,39461588,39512223,40438640
5,Colorado,5350101,5450623,5539215,5611885,5691287,5758736,5843359
7,Delaware,932487,941252,948921,956823,965479,973764,987393
9,Florida,19845911,20209042,20613477,20963613,21244317,21477737,21877257
10,Georgia,10067278,10178447,10301890,10410330,10511131,10617423,10725351
11,Hawaii,1414538,1422052,1427559,1424393,1420593,1415872,1453902
