In [1]:
# Dependencies
import os
import glob
import pandas as pd
import shutil
pd.options.mode.chained_assignment = None  # default='warn'


## Firm by Jane
* #### Get, loop thru all files in resources/state/Business
* #### Format, clean, and filter data
* #### If error occurs, write to text file and read to display
* #### Save result to output path

In [2]:
# Set directory file paths
filepath = os.path.join(".", "Resources/State/Business")
error_path = os.path.join(".", "Output", "data_clean_error.txt")
output_co_ttl = os.path.join(".", "Output/Business", "state_co_ttl_2012-16.csv")
output_co_grp = os.path.join(".", "Output/Business", "state_co%_2012-16.csv")
output_co_ind = os.path.join(".", "Output/Business", "state_co_ind_ttl_2012-16.csv")
output_co_ind_grp = os.path.join(".", "Output/Business", "state_co_ind_grp_2012-16.csv")

# Read all files from directory filepath
xList = [x for x in os.listdir(filepath)]


In [3]:
# Open text file in APPEND mode (a, r - read, w -write)
with open(error_path, "a") as xfile:
    i = 1
    
    # Loop thru the file list and write info to opened text file if criteria are met
    for x in xList:                
        # If error, record exception
        try:
            # Split the extension from the path and normalise it to lowercase.
            ext = os.path.splitext(x)[-1].lower()
            excel = os.path.splitext(x)[0].lower()

            # If it's excel and contain string below, then get year, read file,  
            # format data by year on # of Companys column, selected certain columns, and merge data
            if "state_naicssector" in x:
                if ext == ".xls" or ext == ".xlsx": 
                    yr = excel.split("_")[2]

                    business_data = pd.read_excel(f'{filepath}/{x}', skiprows=5)

                    business_data = business_data.drop(business_data.index[[0,1]])
                    splitted_col = business_data["ENTERPRISE EMPLOYMENT SIZE"].str.split(":", n = 1, expand = True) 

                    # Split enterprise employment size to # & size cols
                    business_data["Num"]= splitted_col[0].str.strip()   
                    business_data["Co_Size"]= splitted_col[1].str.strip()
                  
                    # Select data columns
                    state_co = business_data[["STATE DESCRIPTION", "NUMBER OF FIRMS", "NAICS DESCRIPTION", "Num", "Co_Size"]]
                    state_co.columns = ["State", yr + "_#Companies", "NAICS_Desc", "Num(#)", "Company_Size"]
                    state_co[yr + "_#Companies"] = state_co[yr + "_#Companies"].astype(int)

                    # Filter data by state total # of companies
                    state_total = state_co.loc[(state_co["NAICS_Desc"] == "Total") & (state_co["Company_Size"] == "Total")]
                    state_co_ttl = state_total[["State", yr + "_#Companies"]]
                    
                    # -------------------------------------------------------------------------------------------
                    # Filter data by state % of small & large company group 
                    state_detail = state_co.loc[(state_co["NAICS_Desc"] == "Total") & (~state_co["Company_Size"].str.contains('<')) & (state_co["Company_Size"] != "Total")]
                    company_total = state_detail.groupby("State")
                    company_total = company_total[yr + "_#Companies"].sum()

                    state_detail[yr + "_Co_Size%"] =  [round((state[yr + "_#Companies"]/company_total[state["State"]]) * 100,1)  for i, state in state_detail.iterrows()]
                    state_co_grp = state_detail[["State", "Company_Size", yr + "_#Companies", yr + "_Co_Size%"]]
    
                    # -------------------------------------------------------------------------------------------
                    # Filter data by state total # of companies in 20 different industies
                    co_ttl_industry = state_co.loc[(state_co["NAICS_Desc"] != "Total") & (state_co["Company_Size"] == "Total")]
                    co_ttl_industry = co_ttl_industry.reset_index(drop=True)
                    co_ttl_industry = co_ttl_industry[["State", "NAICS_Desc", yr + "_#Companies"]].rename(columns={"NAICS_Desc": "Industry"})
                    
                    # -------------------------------------------------------------------------------------------
                    # Filter data by state total # of companies in 20 different industies
                    co_ind_grp = state_co.loc[(state_co["NAICS_Desc"] != "Total") & (~state_co["Company_Size"].str.contains('<')) & (state_co["Company_Size"] != "Total")]
                    co_ind_grp = co_ind_grp.reset_index(drop=True)
                    co_ind_grp = co_ind_grp[["State", "NAICS_Desc", "Company_Size", yr + "_#Companies"]].rename(columns={"NAICS_Desc": "Industry"})
                    
                    # -------------------------------------------------------------------------------------------
                    # Merge data by state
                    if i == 1:
                        co_ttl_df = pd.DataFrame(state_co_ttl)
                        co_grp_pct_df =  pd.DataFrame(state_co_grp)
                        ind_ttl_df =  pd.DataFrame(co_ttl_industry)
                        ind_grp_df =  pd.DataFrame(co_ind_grp)
                    else:
                        co_ttl_df = pd.merge(co_ttl_df, state_co_ttl, how='inner', on='State')
                        co_grp_pct_df = pd.merge(co_grp_pct_df , state_co_grp, on=['State','Company_Size'])
                        ind_ttl_df = pd.merge(ind_ttl_df, co_ttl_industry, on=['State','Industry'])
                        ind_grp_df = pd.merge(ind_grp_df, co_ind_grp, on=['State','Industry', 'Company_Size'])
                    
                    i += 1    # Increment by 1 as excel sheet is cleaned and formatted
        except Exception as e:
            # Write error to text file if occurs
            xfile.write(f'****************************************************************************\n')
            xfile.write(f'### {x} ###\n')  # Write file name to text file            
            xfile.write("Exception occurred: {}".format(e) + "\n")  # Write error to text file


# Dataset result - count columns' rows
co_ttl_df.count()


State              51
2012_#Companies    51
2013_#Companies    51
2014_#Companies    51
2015_#Companies    51
2016_#Companies    51
dtype: int64

In [4]:
# Dataset result - count columns' rows
co_grp_pct_df.count()

State              306
Company_Size       306
2012_#Companies    306
2012_Co_Size%      306
2013_#Companies    306
2013_Co_Size%      306
2014_#Companies    306
2014_Co_Size%      306
2015_#Companies    306
2015_Co_Size%      306
2016_#Companies    306
2016_Co_Size%      306
dtype: int64

In [5]:
# Dataset result - count columns' rows
ind_ttl_df.count()

State              1019
Industry           1019
2012_#Companies    1019
2013_#Companies    1019
2014_#Companies    1019
2015_#Companies    1019
2016_#Companies    1019
dtype: int64

In [6]:
# Dataset result - count columns' rows
ind_grp_df.count()

State              5866
Industry           5866
Company_Size       5866
2012_#Companies    5866
2013_#Companies    5866
2014_#Companies    5866
2015_#Companies    5866
2016_#Companies    5866
dtype: int64

In [7]:
# Open output file & print result if error occurs
if os.path.exists(error_path):
    print(open(error_path).read())




In [8]:
# Save cleaned data to csv & display
# Total Companies in each state
# co_ttl_df.to_csv(output_co_ttl)
co_ttl_df.head()

Unnamed: 0,State,2012_#Companies,2013_#Companies,2014_#Companies,2015_#Companies,2016_#Companies
0,Alabama,73661,73371,73154,73409,73976
1,Alaska,16656,16733,16841,16952,17059
2,Arizona,101205,102313,103312,105004,106963
3,Arkansas,50412,50318,50151,50451,50511
4,California,701899,711086,724483,740303,751982


In [9]:
# Save cleaned data to csv & display
# Company size total & percentage by state 
co_grp_pct_df["Company_Size"] = [(f'\{percent["Company_Size"]}') for i, percent in co_grp_pct_df.iterrows()]
# co_grp_pct_df.to_csv(output_co_grp)
co_grp_pct_df.head()

Unnamed: 0,State,Company_Size,2012_#Companies,2012_Co_Size%,2013_#Companies,2013_Co_Size%,2014_#Companies,2014_Co_Size%,2015_#Companies,2015_Co_Size%,2016_#Companies,2016_Co_Size%
0,Alabama,\0-4,39993,54.3,39890,54.4,39562,54.1,39724,54.1,39880,53.9
1,Alabama,\5-9,13803,18.7,13622,18.6,13603,18.6,13646,18.6,13721,18.5
2,Alabama,\10-19,8277,11.2,8199,11.2,8215,11.2,8205,11.2,8281,11.2
3,Alabama,\20-99,7280,9.9,7258,9.9,7360,10.1,7358,10.0,7539,10.2
4,Alabama,\100-499,1926,2.6,1977,2.7,1974,2.7,1981,2.7,2017,2.7


In [10]:
# Save cleaned data to csv & display
# Total Companies in each state
# ind_ttl_df.to_csv(output_co_ind)
ind_ttl_df.head()

Unnamed: 0,State,Industry,2012_#Companies,2013_#Companies,2014_#Companies,2015_#Companies,2016_#Companies
0,Alabama,"Agriculture, Forestry, Fishing and Hunting",770,780,782,775,796
1,Alabama,"Mining, Quarrying, and Oil and Gas Extraction",204,189,180,171,160
2,Alabama,Utilities,111,111,111,110,110
3,Alabama,Construction,7347,7221,7146,7184,7292
4,Alabama,Manufacturing,3865,3781,3761,3749,3703


In [11]:
# Save cleaned data to csv & display
# Total Companies in each state
# ind_grp_df.to_csv(output_co_ind_grp)
ind_grp_df.head()

Unnamed: 0,State,Industry,Company_Size,2012_#Companies,2013_#Companies,2014_#Companies,2015_#Companies,2016_#Companies
0,Alabama,"Agriculture, Forestry, Fishing and Hunting",0-4,385,379,386,386,415
1,Alabama,"Agriculture, Forestry, Fishing and Hunting",5-9,222,233,234,227,218
2,Alabama,"Agriculture, Forestry, Fishing and Hunting",10-19,98,103,98,91,95
3,Alabama,"Agriculture, Forestry, Fishing and Hunting",20-99,47,47,46,54,49
4,Alabama,"Agriculture, Forestry, Fishing and Hunting",100-499,6,6,8,7,8


## Population Data Cleaning
* #### Read, clean, format data
* #### Output to path

In [12]:
pop_path = os.path.join(".", "Resources/State/Demographic", "annual-est population_2010-2017.csv")
country_path = os.path.join(".", "Output/Demographic", "country_pop_2010-17.csv")
state_path = os.path.join(".", "Output/Demographic", "state_pop_2010-17.csv")


In [13]:
pop_data = pd.read_csv(pop_path, index_col=0, skiprows=3)
pop_data = pop_data.dropna(how='any') # drop any nan record
pop_data = pop_data.reset_index().rename(columns={"index": "State"})
pop_data["State"] = pop_data["State"].str.replace(".", "") 
pop_data = pop_data[:-1] #drop bottom 1 
country_data = pd.DataFrame(pop_data.iloc[0:5]).rename(columns={"State": "Region"})
country_data.to_csv(country_path)
country_data.head()


Unnamed: 0,Region,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017
0,United States,308745538,308758105,309338421,311644280,313993272,316234505,318622525,321039839,323405935,325719178
1,Northeast,55317240,55318350,55388349,55642659,55860261,56047732,56203078,56296628,56359360,56470581
2,Midwest,66927001,66929794,66973360,67141501,67318295,67534451,67720120,67839187,67978168,68179351
3,South,114555744,114563024,114869241,116060993,117291728,118422269,119699966,121081238,122423457,123658624
4,West,71945553,71946937,72107471,72799127,73522988,74230053,74999361,75822786,76644950,77410622


In [14]:
pop_data = pop_data.drop(pop_data.index[[0,1,2,3,4]]).reset_index(drop=True)
pop_data.to_csv(state_path)
pop_data.head()

Unnamed: 0,State,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017
0,Alabama,4779736,4780135,4785579,4798649,4813946,4827660,4840037,4850858,4860545,4874747
1,Alaska,710231,710249,714015,722259,730825,736760,736759,737979,741522,739795
2,Arizona,6392017,6392309,6407002,6465488,6544211,6616124,6706435,6802262,6908642,7016270
3,Arkansas,2915918,2916031,2921737,2938640,2949208,2956780,2964800,2975626,2988231,3004279
4,California,37253956,37254518,37327690,37672654,38019006,38347383,38701278,39032444,39296476,39536653


## Gender by Charleen
* #### Read, clean, format data
* #### Output to path

In [15]:
state_info_2017_df = pd.read_csv("./Resources/State/Demographic/ACS_17_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2016_df = pd.read_csv("./Resources/State/Demographic/ACS_16_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2015_df = pd.read_csv("./Resources/State/Demographic/ACS_15_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2014_df = pd.read_csv("./Resources/State/Demographic/ACS_14_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2013_df = pd.read_csv("./Resources/State/Demographic/ACS_13_5YR_S0102_with_ann.csv", skiprows=[0])

state_info_2017_df.head()

Unnamed: 0,Id,Id2,Geography,Total; Estimate; Total population,Total; Margin of Error; Total population,60 years and over; Estimate; Total population,60 years and over; Margin of Error; Total population,Total; Estimate; SEX AND AGE - Male,Total; Margin of Error; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,...,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),Total; Margin of Error; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Margin of Error; GROSS RENT - Median gross rent (dollars)
0,0400000US01,1,Alabama,4850771,*****,1065625,3368,48.5,0.1,44.4,...,55.2,1.0,42.6,0.5,44.8,1.0,747,3,623,8
1,0400000US02,2,Alaska,738565,*****,117047,1030,52.3,0.1,50.8,...,56.5,2.6,41.6,1.1,43.5,2.6,1200,13,902,29
2,0400000US04,4,Arizona,6809946,*****,1502688,3385,49.7,0.1,46.3,...,48.3,0.8,45.2,0.4,51.7,0.8,972,4,880,8
3,0400000US05,5,Arkansas,2977944,*****,655552,2090,49.1,0.1,45.3,...,56.6,1.3,40.7,0.6,43.4,1.3,709,5,613,9
4,0400000US06,6,California,38982847,*****,7292299,6791,49.7,0.1,45.4,...,40.0,0.3,53.1,0.1,60.0,0.3,1358,3,1109,4


In [16]:
gender_state_cols_2017_df = state_info_2017_df.filter(regex="Estimate")
gender_state_2017_df = state_info_2017_df.filter(regex="Geography")
gender_state_cols_2016_df = state_info_2016_df.filter(regex="Estimate")
gender_state_2016_df = state_info_2016_df.filter(regex="Geography")
gender_state_cols_2015_df = state_info_2015_df.filter(regex="Estimate")
gender_state_2015_df = state_info_2015_df.filter(regex="Geography")
gender_state_cols_2014_df = state_info_2014_df.filter(regex="Estimate")
gender_state_2014_df = state_info_2014_df.filter(regex="Geography")
gender_state_cols_2013_df = state_info_2013_df.filter(regex="Estimate")
gender_state_2013_df = state_info_2013_df.filter(regex="Geography")

# gender_state_2017_df.head()
gender_state_cols_2017_df.head()

Unnamed: 0,Total; Estimate; Total population,60 years and over; Estimate; Total population,Total; Estimate; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,Total; Estimate; SEX AND AGE - Female,60 years and over; Estimate; SEX AND AGE - Female,Total; Estimate; Median age (years),60 years and over; Estimate; Median age (years),Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,...,Total; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars),60 years and over; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars),Total; Estimate; Renter-occupied housing units,60 years and over; Estimate; Renter-occupied housing units,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars)
0,4850771,1065625,48.5,44.4,51.5,55.6,38.7,69.4,98.2,99.2,...,348,347,583478,114092,57.4,55.2,42.6,44.8,747,623
1,738565,117047,52.3,50.8,47.7,49.2,33.9,67.3,91.5,97.0,...,571,562,91682,13010,58.4,56.5,41.6,43.5,1200,902
2,6809946,1502688,49.7,46.3,50.3,53.7,37.2,69.9,96.5,98.9,...,394,390,914973,172208,54.8,48.3,45.2,51.7,972,880
3,2977944,655552,49.1,45.3,50.9,54.7,37.9,69.7,97.6,98.8,...,336,335,394658,77395,59.3,56.6,40.7,43.4,709,613
4,38982847,7292299,49.7,45.4,50.3,54.6,36.1,69.3,95.3,98.0,...,542,519,5863813,1137249,46.9,40.0,53.1,60.0,1358,1109


In [17]:
total_gender_state_2017_df = pd.concat([gender_state_2017_df, gender_state_cols_2017_df], axis=1)
total_gender_state_2016_df = pd.concat([gender_state_2016_df, gender_state_cols_2016_df], axis=1)
total_gender_state_2015_df = pd.concat([gender_state_2015_df, gender_state_cols_2015_df], axis=1)
total_gender_state_2014_df = pd.concat([gender_state_2014_df, gender_state_cols_2014_df], axis=1)
total_gender_state_2013_df = pd.concat([gender_state_2013_df, gender_state_cols_2013_df], axis=1)

total_gender_state_2017_df.head()

Unnamed: 0,Geography,Total; Estimate; Total population,60 years and over; Estimate; Total population,Total; Estimate; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,Total; Estimate; SEX AND AGE - Female,60 years and over; Estimate; SEX AND AGE - Female,Total; Estimate; Median age (years),60 years and over; Estimate; Median age (years),Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,...,Total; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars),60 years and over; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars),Total; Estimate; Renter-occupied housing units,60 years and over; Estimate; Renter-occupied housing units,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars)
0,Alabama,4850771,1065625,48.5,44.4,51.5,55.6,38.7,69.4,98.2,...,348,347,583478,114092,57.4,55.2,42.6,44.8,747,623
1,Alaska,738565,117047,52.3,50.8,47.7,49.2,33.9,67.3,91.5,...,571,562,91682,13010,58.4,56.5,41.6,43.5,1200,902
2,Arizona,6809946,1502688,49.7,46.3,50.3,53.7,37.2,69.9,96.5,...,394,390,914973,172208,54.8,48.3,45.2,51.7,972,880
3,Arkansas,2977944,655552,49.1,45.3,50.9,54.7,37.9,69.7,97.6,...,336,335,394658,77395,59.3,56.6,40.7,43.4,709,613
4,California,38982847,7292299,49.7,45.4,50.3,54.6,36.1,69.3,95.3,...,542,519,5863813,1137249,46.9,40.0,53.1,60.0,1358,1109


In [18]:
total_gender_state_2017_df.columns
total_gender_state_2016_df.columns
total_gender_state_2015_df.columns
total_gender_state_2014_df.columns
total_gender_state_2013_df.columns

Index(['Geography', 'Total; Estimate; Total population',
       '60 years and over; Estimate; Total population',
       'Total; Estimate; SEX AND AGE - Male',
       '60 years and over; Estimate; SEX AND AGE - Male',
       'Total; Estimate; SEX AND AGE - Female',
       '60 years and over; Estimate; SEX AND AGE - Female',
       'Total; Estimate; Median age (years)',
       '60 years and over; Estimate; Median age (years)',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       ...
       'Total; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars)',
       '60 years and over; Estimate; OWNER CHARACTERISTICS - Median selected monthly owner costs without a mortgage (dollars)',
       'Total; Estimate; Renter-occupied housing units',
       '60 years and over; Estimate; Renter-occupied housing units',
       'Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 1

In [19]:
gender_state_2017_data = total_gender_state_2017_df[["Geography", "Total; Estimate; Total population", "Total; Estimate; SEX AND AGE - Male", "Total; Estimate; SEX AND AGE - Female"]]
# gender_state_2017_data.head()

gender_state_2016_data = total_gender_state_2016_df[["Geography", "Total; Estimate; Total population", "Total; Estimate; SEX AND AGE - Male", "Total; Estimate; SEX AND AGE - Female"]]
# gender_state_2016_data.head()

gender_state_2015_data = total_gender_state_2015_df[["Geography", "Total; Estimate; Total population", "Total; Estimate; SEX AND AGE - Male", "Total; Estimate; SEX AND AGE - Female"]]
# gender_state_2015_data.head()

gender_state_2014_data = total_gender_state_2014_df[["Geography", "Total; Estimate; Total population", "Total; Estimate; SEX AND AGE - Male", "Total; Estimate; SEX AND AGE - Female"]]
# gender_state_2014_data.head()

gender_state_2013_data = total_gender_state_2013_df[["Geography", "Total; Estimate; Total population", "Total; Estimate; SEX AND AGE - Male", "Total; Estimate; SEX AND AGE - Female"]]
gender_state_2013_data.head()

Unnamed: 0,Geography,Total; Estimate; Total population,Total; Estimate; SEX AND AGE - Male,Total; Estimate; SEX AND AGE - Female
0,Alabama,4799277,48.5,51.5
1,Alaska,720316,52.2,47.8
2,Arizona,6479703,49.7,50.3
3,Arkansas,2933369,49.1,50.9
4,California,37659181,49.7,50.3


In [20]:
gender_state_2017_data.columns = ["State", "Total_Population 17", "%_M_17", "%_F_17"]
# gender_state_2017_data.head()

gender_state_2016_data.columns = ["State", "Total_Population 16", "%_M_16", "%_F_16"]
# gender_state_2016_data.head()

gender_state_2015_data.columns = ["State", "Total_Population 15", "%_M_15", "%_F_15"]
# gender_state_2015_data.head()

gender_state_2014_data.columns = ["State", "Total_Population 14", "%_M_14", "%_F_14"]
# gender_state_2014_data.head()

gender_state_2013_data.columns = ["State", "Total_Population 13", "%_M_13", "%_F_13"]
gender_state_2013_data.head()

Unnamed: 0,State,Total_Population 13,%_M_13,%_F_13
0,Alabama,4799277,48.5,51.5
1,Alaska,720316,52.2,47.8
2,Arizona,6479703,49.7,50.3
3,Arkansas,2933369,49.1,50.9
4,California,37659181,49.7,50.3


In [21]:
total_gender_2013_2014_data = pd.merge(gender_state_2013_data, gender_state_2014_data, on=["State"])
total_gender_2013_2014_2015_data = pd.merge(total_gender_2013_2014_data, gender_state_2015_data, on=["State"])
total_gender_2013_2014_2015_2016_data = pd.merge(total_gender_2013_2014_2015_data, gender_state_2016_data, on=["State"])
total_gender_state_df = pd.merge(total_gender_2013_2014_2015_2016_data, gender_state_2017_data, on=["State"])

total_gender_state_df.head()

Unnamed: 0,State,Total_Population 13,%_M_13,%_F_13,Total_Population 14,%_M_14,%_F_14,Total_Population 15,%_M_15,%_F_15,Total_Population 16,%_M_16,%_F_16,Total_Population 17,%_M_17,%_F_17
0,Alabama,4799277,48.5,51.5,4817678,48.5,51.5,4830620,48.5,51.5,4841164,48.5,51.5,4850771,48.5,51.5
1,Alaska,720316,52.2,47.8,728300,52.2,47.8,733375,52.4,47.6,736855,52.3,47.7,738565,52.3,47.7
2,Arizona,6479703,49.7,50.3,6561516,49.7,50.3,6641928,49.7,50.3,6728577,49.7,50.3,6809946,49.7,50.3
3,Arkansas,2933369,49.1,50.9,2947036,49.1,50.9,2958208,49.1,50.9,2968472,49.1,50.9,2977944,49.1,50.9
4,California,37659181,49.7,50.3,38066920,49.7,50.3,38421464,49.7,50.3,38654206,49.7,50.3,38982847,49.7,50.3


In [22]:
total_gender_state_df.to_csv("./Output/Demographic/genderallyearsbystate.csv")

In [23]:
pop_state_2017_data = total_gender_state_df[["State", "Total_Population 17"]]
pop_state_2017_data.set_index("State", inplace=True)
pop_state_2017_data.head()

Unnamed: 0_level_0,Total_Population 17
State,Unnamed: 1_level_1
Alabama,4850771
Alaska,738565
Arizona,6809946
Arkansas,2977944
California,38982847


In [24]:
total_pop_state_2017_data = pop_state_2017_data["Total_Population 17"].sum()

state_pop_per_17 = (pop_state_2017_data / total_pop_state_2017_data) * 100

state_pop_per_17.head()

Unnamed: 0_level_0,Total_Population 17
State,Unnamed: 1_level_1
Alabama,1.511123
Alaska,0.230079
Arizona,2.121449
Arkansas,0.927696
California,12.144022


In [25]:
state_pop_per_17.to_csv("./Output/Demographic/statepopulationpercentage2017.csv")

## Race by Charleen

In [26]:
state_info_2017_df = pd.read_csv("./Resources/State/Demographic/ACS_17_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2016_df = pd.read_csv("./Resources/State/Demographic/ACS_16_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2015_df = pd.read_csv("./Resources/State/Demographic/ACS_15_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2014_df = pd.read_csv("./Resources/State/Demographic/ACS_14_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2013_df = pd.read_csv("./Resources/State/Demographic/ACS_13_5YR_S0102_with_ann.csv", skiprows=[0])

state_info_2013_df.head()

Unnamed: 0,Id,Id2,Geography,Total; Estimate; Total population,Total; Margin of Error; Total population,60 years and over; Estimate; Total population,60 years and over; Margin of Error; Total population,Total; Estimate; SEX AND AGE - Male,Total; Margin of Error; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,...,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),Total; Margin of Error; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Margin of Error; GROSS RENT - Median gross rent (dollars)
0,0400000US01,1,Alabama,4799277,*****,966454,2674,48.5,0.1,44.1,...,56.0,1.0,44.8,0.6,44.0,1.0,705,3,569,9
1,0400000US02,2,Alaska,720316,*****,96430,985,52.2,0.1,50.3,...,52.6,2.7,40.6,0.9,47.4,2.7,1098,13,846,23
2,0400000US04,4,Arizona,6479703,*****,1294109,3052,49.7,0.1,46.1,...,46.1,0.9,47.5,0.4,53.9,0.9,896,4,836,8
3,0400000US05,5,Arkansas,2933369,*****,601493,2218,49.1,0.1,44.9,...,58.1,1.3,43.0,0.6,41.9,1.3,661,4,565,9
4,0400000US06,6,California,37659181,*****,6360929,7359,49.7,0.1,45.0,...,40.2,0.3,54.1,0.2,59.8,0.3,1224,2,994,4


In [27]:
demo_state_cols_2017_df = state_info_2017_df.filter(regex="RACE AND HISPANIC OR LATINO ORIGIN")
demo_state_2017_df = state_info_2017_df.filter(regex="Geography")
demo_state_cols_2016_df = state_info_2016_df.filter(regex="RACE AND HISPANIC OR LATINO ORIGIN")
demo_state_2016_df = state_info_2016_df.filter(regex="Geography")
demo_state_cols_2015_df = state_info_2015_df.filter(regex="RACE AND HISPANIC OR LATINO ORIGIN")
demo_state_2015_df = state_info_2015_df.filter(regex="Geography")
demo_state_cols_2014_df = state_info_2014_df.filter(regex="RACE AND HISPANIC OR LATINO ORIGIN")
demo_state_2014_df = state_info_2014_df.filter(regex="Geography")
demo_state_cols_2013_df = state_info_2013_df.filter(regex="RACE AND HISPANIC OR LATINO ORIGIN")
demo_state_2013_df = state_info_2013_df.filter(regex="Geography")

# demo_state_2013_df.head()
demo_state_cols_2013_df.head()

Unnamed: 0,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American,...,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races
0,98.5,0.1,99.2,0.1,69.3,0.1,78.9,0.1,26.3,0.1,...,0.0,0.1,1.2,0.1,0.2,0.1,1.5,0.1,0.8,0.1
1,92.1,0.2,97.6,0.3,66.9,0.1,76.8,0.4,3.5,0.1,...,0.5,0.1,1.2,0.1,0.6,0.2,7.9,0.2,2.4,0.3
2,97.0,0.1,99.0,0.1,79.2,0.2,90.1,0.1,4.2,0.1,...,0.1,0.1,6.2,0.1,2.4,0.1,3.0,0.1,1.0,0.1
3,98.0,0.1,98.9,0.1,78.3,0.1,87.6,0.1,15.5,0.1,...,0.0,0.1,2.2,0.1,0.4,0.1,2.0,0.1,1.1,0.1
4,95.7,0.1,98.1,0.1,62.3,0.1,72.6,0.1,6.0,0.1,...,0.3,0.1,12.9,0.1,5.4,0.1,4.3,0.1,1.9,0.1


In [28]:
total_demo_state_2017_df = pd.concat([demo_state_2017_df, demo_state_cols_2017_df], axis=1)
total_demo_state_2016_df = pd.concat([demo_state_2016_df, demo_state_cols_2016_df], axis=1)
total_demo_state_2015_df = pd.concat([demo_state_2015_df, demo_state_cols_2015_df], axis=1)
total_demo_state_2014_df = pd.concat([demo_state_2014_df, demo_state_cols_2014_df], axis=1)
total_demo_state_2013_df = pd.concat([demo_state_2013_df, demo_state_cols_2013_df], axis=1)

total_demo_state_2013_df.head()

Unnamed: 0,Geography,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American,...,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races,60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races
0,Alabama,98.5,0.1,99.2,0.1,69.3,0.1,78.9,0.1,26.3,...,0.0,0.1,1.2,0.1,0.2,0.1,1.5,0.1,0.8,0.1
1,Alaska,92.1,0.2,97.6,0.3,66.9,0.1,76.8,0.4,3.5,...,0.5,0.1,1.2,0.1,0.6,0.2,7.9,0.2,2.4,0.3
2,Arizona,97.0,0.1,99.0,0.1,79.2,0.2,90.1,0.1,4.2,...,0.1,0.1,6.2,0.1,2.4,0.1,3.0,0.1,1.0,0.1
3,Arkansas,98.0,0.1,98.9,0.1,78.3,0.1,87.6,0.1,15.5,...,0.0,0.1,2.2,0.1,0.4,0.1,2.0,0.1,1.1,0.1
4,California,95.7,0.1,98.1,0.1,62.3,0.1,72.6,0.1,6.0,...,0.3,0.1,12.9,0.1,5.4,0.1,4.3,0.1,1.9,0.1


In [29]:
total_demo_state_2017_df.columns
total_demo_state_2016_df.columns
total_demo_state_2015_df.columns
total_demo_state_2014_df.columns
total_demo_state_2013_df.columns

Index(['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       '60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       '60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       '60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       '60 years and over; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Margin of Error; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       '60 years and over; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One r

In [30]:
demo_state_2017_data = total_demo_state_2017_df[['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races']]
# demo_state_2017_data.head()

demo_state_2016_data = total_demo_state_2016_df[['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races']]
# demo_state_2016_data.head()

demo_state_2015_data = total_demo_state_2015_df[['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races']]
# demo_state_2015_data.head()

demo_state_2014_data = total_demo_state_2014_df[['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races']]
# demo_state_2014_data.head()

demo_state_2013_data = total_demo_state_2013_df[['Geography',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race',
       'Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races']]
demo_state_2013_data.head()

Unnamed: 0,Geography,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - White,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Black or African American,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - American Indian and Alaska Native,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Asian,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Native Hawaiian and Other Pacific Islander,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - One race - Some other race,Total; Estimate; RACE AND HISPANIC OR LATINO ORIGIN - Two or more races
0,Alabama,98.5,69.3,26.3,0.5,1.2,0.0,1.2,1.5
1,Alaska,92.1,66.9,3.5,14.1,5.4,1.1,1.2,7.9
2,Arizona,97.0,79.2,4.2,4.4,2.8,0.2,6.2,3.0
3,Arkansas,98.0,78.3,15.5,0.6,1.3,0.2,2.2,2.0
4,California,95.7,62.3,6.0,0.8,13.3,0.4,12.9,4.3


In [31]:
demo_state_2017_data.columns = ["State",
       "All_Races_17",
       "White_17",
       "Black_17",
       "Native_17",
       "Asian_17",
       "Hawaiian_Other_17",
       "Other_17",
       "Mixed_Race_17"]
demo_state_2017_data.head()

demo_state_2016_data.columns = ["State",
       "All_Races_16",
       "White_16",
       "Black_16",
       "Native_16",
       "Asian_16",
       "Hawaiian_Other_16",
       "Other_16",
       "Mixed_Race_16"]
demo_state_2016_data.head()

demo_state_2015_data.columns = ["State",
       "All_Races_15",
       "White_15",
       "Black_15",
       "Native_15",
       "Asian_15",
       "Hawaiian_Other_15",
       "Other_15",
       "Mixed_Race_15"]
demo_state_2015_data.head()

demo_state_2014_data.columns = ["State",
       "All_Races_14",
       "White_14",
       "Black_14",
       "Native_14",
       "Asian_14",
       "Hawaiian_Other_14",
       "Other_14",
       "Mixed_Race_14"]
demo_state_2014_data.head()

demo_state_2013_data.columns = ["State",
       "All_Races_13",
       "White_13",
       "Black_13",
       "Native_13",
       "Asian_13",
       "Hawaiian_Other_13",
       "Other_13",
       "Mixed_Race_13"]
demo_state_2013_data.head()

Unnamed: 0,State,All_Races_13,White_13,Black_13,Native_13,Asian_13,Hawaiian_Other_13,Other_13,Mixed_Race_13
0,Alabama,98.5,69.3,26.3,0.5,1.2,0.0,1.2,1.5
1,Alaska,92.1,66.9,3.5,14.1,5.4,1.1,1.2,7.9
2,Arizona,97.0,79.2,4.2,4.4,2.8,0.2,6.2,3.0
3,Arkansas,98.0,78.3,15.5,0.6,1.3,0.2,2.2,2.0
4,California,95.7,62.3,6.0,0.8,13.3,0.4,12.9,4.3


In [32]:
total_demo_2013_2014_data = pd.merge(demo_state_2013_data, demo_state_2014_data, on=["State"])
total_demo_2013_2014_2015_data = pd.merge(total_demo_2013_2014_data, demo_state_2015_data, on=["State"])
total_demo_2013_2014_2015_2016_data = pd.merge(total_demo_2013_2014_2015_data, demo_state_2016_data, on=["State"])
total_demo_state_df = pd.merge(total_demo_2013_2014_2015_2016_data, demo_state_2017_data, on=["State"])

total_demo_state_df.to_csv("./output/Demographic/raceallyearsbystate.csv")
total_demo_state_df.head()

Unnamed: 0,State,All_Races_13,White_13,Black_13,Native_13,Asian_13,Hawaiian_Other_13,Other_13,Mixed_Race_13,All_Races_14,...,Other_16,Mixed_Race_16,All_Races_17,White_17,Black_17,Native_17,Asian_17,Hawaiian_Other_17,Other_17,Mixed_Race_17
0,Alabama,98.5,69.3,26.3,0.5,1.2,0.0,1.2,1.5,98.4,...,1.3,1.8,98.2,68.4,26.5,0.5,1.3,0.0,1.4,1.8
1,Alaska,92.1,66.9,3.5,14.1,5.4,1.1,1.2,7.9,92.0,...,1.3,8.5,91.5,65.3,3.2,14.2,6.2,1.2,1.4,8.5
2,Arizona,97.0,79.2,4.2,4.4,2.8,0.2,6.2,3.0,96.9,...,7.0,3.3,96.5,77.5,4.3,4.4,3.1,0.2,7.0,3.5
3,Arkansas,98.0,78.3,15.5,0.6,1.3,0.2,2.2,2.0,98.0,...,2.3,2.2,97.6,77.3,15.4,0.7,1.4,0.2,2.5,2.4
4,California,95.7,62.3,6.0,0.8,13.3,0.4,12.9,4.3,95.5,...,13.3,4.6,95.3,60.6,5.8,0.7,14.1,0.4,13.7,4.7


## Education by Charleen

In [33]:
state_info_2017_df = pd.read_csv("./Resources/State/Demographic/ACS_17_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2016_df = pd.read_csv("./Resources/State/Demographic/ACS_16_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2015_df = pd.read_csv("./Resources/State/Demographic/ACS_15_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2014_df = pd.read_csv("./Resources/State/Demographic/ACS_14_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2013_df = pd.read_csv("./Resources/State/Demographic/ACS_13_5YR_S0102_with_ann.csv", skiprows=[0])

state_info_2013_df.head()

Unnamed: 0,Id,Id2,Geography,Total; Estimate; Total population,Total; Margin of Error; Total population,60 years and over; Estimate; Total population,60 years and over; Margin of Error; Total population,Total; Estimate; SEX AND AGE - Male,Total; Margin of Error; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,...,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),Total; Margin of Error; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Margin of Error; GROSS RENT - Median gross rent (dollars)
0,0400000US01,1,Alabama,4799277,*****,966454,2674,48.5,0.1,44.1,...,56.0,1.0,44.8,0.6,44.0,1.0,705,3,569,9
1,0400000US02,2,Alaska,720316,*****,96430,985,52.2,0.1,50.3,...,52.6,2.7,40.6,0.9,47.4,2.7,1098,13,846,23
2,0400000US04,4,Arizona,6479703,*****,1294109,3052,49.7,0.1,46.1,...,46.1,0.9,47.5,0.4,53.9,0.9,896,4,836,8
3,0400000US05,5,Arkansas,2933369,*****,601493,2218,49.1,0.1,44.9,...,58.1,1.3,43.0,0.6,41.9,1.3,661,4,565,9
4,0400000US06,6,California,37659181,*****,6360929,7359,49.7,0.1,45.0,...,40.2,0.3,54.1,0.2,59.8,0.3,1224,2,994,4


In [34]:
edu_state_cols_2017_df = state_info_2017_df.filter(regex="Total; Estimate; EDUCATIONAL ATTAINMENT")
edu_state_2017_df = state_info_2017_df.filter(regex="Geography")
edu_state_cols_2016_df = state_info_2016_df.filter(regex="Total; Estimate; EDUCATIONAL ATTAINMENT")
edu_state_2016_df = state_info_2016_df.filter(regex="Geography")
edu_state_cols_2015_df = state_info_2015_df.filter(regex="Total; Estimate; EDUCATIONAL ATTAINMENT")
edu_state_2015_df = state_info_2015_df.filter(regex="Geography")
edu_state_cols_2014_df = state_info_2014_df.filter(regex="Total; Estimate; EDUCATIONAL ATTAINMENT")
edu_state_2014_df = state_info_2014_df.filter(regex="Geography")
edu_state_cols_2013_df = state_info_2013_df.filter(regex="Total; Estimate; EDUCATIONAL ATTAINMENT")
edu_state_2013_df = state_info_2013_df.filter(regex="Geography")

edu_state_2013_df.head()
edu_state_cols_2013_df.head()

Unnamed: 0,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate,"Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative",Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher
0,3193338,16.9,31.1,29.4,22.6
1,454301,8.4,27.3,36.8,27.5
2,4211194,14.3,24.5,34.3,26.9
3,1936554,16.3,35.1,28.5,20.1
4,24455010,18.8,20.7,29.9,30.7


In [35]:
total_edu_state_2017_df = pd.concat([edu_state_2017_df, edu_state_cols_2017_df], axis=1)
total_edu_state_2016_df = pd.concat([edu_state_2016_df, edu_state_cols_2016_df], axis=1)
total_edu_state_2015_df = pd.concat([edu_state_2015_df, edu_state_cols_2015_df], axis=1)
total_edu_state_2014_df = pd.concat([edu_state_2014_df, edu_state_cols_2014_df], axis=1)
total_edu_state_2013_df = pd.concat([edu_state_2013_df, edu_state_cols_2013_df], axis=1)

total_edu_state_2013_df.head()

Unnamed: 0,Geography,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate,"Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative",Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher
0,Alabama,3193338,16.9,31.1,29.4,22.6
1,Alaska,454301,8.4,27.3,36.8,27.5
2,Arizona,4211194,14.3,24.5,34.3,26.9
3,Arkansas,1936554,16.3,35.1,28.5,20.1
4,California,24455010,18.8,20.7,29.9,30.7


In [36]:
total_edu_state_2017_df.columns
total_edu_state_2016_df.columns
total_edu_state_2015_df.columns
total_edu_state_2014_df.columns
total_edu_state_2013_df.columns


Index(['Geography',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher'],
      dtype='object')

In [37]:
edu_state_2017_data = total_edu_state_2017_df[['Geography', 
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree",
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher"]]
edu_state_2017_data.head()

edu_state_2016_data = total_edu_state_2016_df[['Geography', 
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree",
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher"]]
edu_state_2016_data.head()

edu_state_2015_data = total_edu_state_2015_df[['Geography', 
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree",
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher"]]
edu_state_2015_data.head()

edu_state_2014_data = total_edu_state_2014_df[['Geography', 
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree",
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher"]]
edu_state_2014_data.head()

edu_state_2013_data = total_edu_state_2013_df[['Geography', 
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate',
       'Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative',
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree",
       "Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher"]]
edu_state_2013_data.head()

Unnamed: 0,Geography,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Less than high school graduate,"Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - High school graduate, GED, or alternative",Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Some college or associate's degree,Total; Estimate; EDUCATIONAL ATTAINMENT - Population 25 years and over - Bachelor's degree or higher
0,Alabama,3193338,16.9,31.1,29.4,22.6
1,Alaska,454301,8.4,27.3,36.8,27.5
2,Arizona,4211194,14.3,24.5,34.3,26.9
3,Arkansas,1936554,16.3,35.1,28.5,20.1
4,California,24455010,18.8,20.7,29.9,30.7


In [38]:
edu_state_2017_data.columns = ["State", 
       "Edu_25+yrs_ttl17",
       "25+yrs_<high_school_17",
       "25+yrs_high_school_17",
       "25+yrs_college_17",
       "25+yrs_>B.S._17"]
edu_state_2017_data.head()

edu_state_2016_data.columns = ["State", 
       "Edu_25+yrs_ttl16",
       "25+yrs_<high_school_16",
       "25+yrs_high_school_16",
       "25+yrs_college_16",
       "25+yrs_>B.S._16"]
edu_state_2016_data.head()

edu_state_2015_data.columns = ["State", 
       "Edu_25+yrs_ttl15",
       "25+yrs_<high_school_15",
       "25+yrs_high_school_15",
       "25+yrs_college_15",
       "25+yrs_>B.S._15"]
edu_state_2015_data.head()

edu_state_2014_data.columns = ["State", 
       "Edu_25+yrs_ttl14",
       "25+yrs_<high_school_14",
       "25+yrs_high_school_14",
       "25+yrs_college_14",
       "25+yrs_>B.S._14"]
edu_state_2014_data.head()

edu_state_2013_data.columns = ["State", 
       "Edu_25+yrs_ttl13",
       "25+yrs_<high_school_13",
       "25+yrs_high_school_13",
       "25+yrs_college_13",
       "25+yrs_>B.S._13"]
edu_state_2013_data.head()

Unnamed: 0,State,Edu_25+yrs_ttl13,25+yrs_<high_school_13,25+yrs_high_school_13,25+yrs_college_13,25+yrs_>B.S._13
0,Alabama,3193338,16.9,31.1,29.4,22.6
1,Alaska,454301,8.4,27.3,36.8,27.5
2,Arizona,4211194,14.3,24.5,34.3,26.9
3,Arkansas,1936554,16.3,35.1,28.5,20.1
4,California,24455010,18.8,20.7,29.9,30.7


In [39]:
total_edu_2013_2014_data = pd.merge(edu_state_2013_data, edu_state_2014_data, on='State')
total_edu_2013_2014_2015_data = pd.merge(total_edu_2013_2014_data, edu_state_2015_data, on='State')
total_edu_2013_2014_2015_2016_data = pd.merge(total_edu_2013_2014_2015_data, edu_state_2016_data, on='State')
total_edu_state_df = pd.merge(total_edu_2013_2014_2015_2016_data, edu_state_2017_data, on='State')
total_edu_state_df.to_csv("./output/Demographic/eduallyearsbystate.csv")
total_edu_state_df.head()

Unnamed: 0,State,Edu_25+yrs_ttl13,25+yrs_<high_school_13,25+yrs_high_school_13,25+yrs_college_13,25+yrs_>B.S._13,Edu_25+yrs_ttl14,25+yrs_<high_school_14,25+yrs_high_school_14,25+yrs_college_14,...,Edu_25+yrs_ttl16,25+yrs_<high_school_16,25+yrs_high_school_16,25+yrs_college_16,25+yrs_>B.S._16,Edu_25+yrs_ttl17,25+yrs_<high_school_17,25+yrs_high_school_17,25+yrs_college_17,25+yrs_>B.S._17
0,Alabama,3193338,16.9,31.1,29.4,22.6,3217902,16.3,31.1,29.6,...,3261408,15.2,31.0,29.8,24.0,3276637,14.7,30.9,29.9,24.5
1,Alaska,454301,8.4,27.3,36.8,27.5,460319,8.2,27.5,36.6,...,470699,7.7,27.7,35.8,28.8,475442,7.6,27.6,35.8,29.0
2,Arizona,4211194,14.3,24.5,34.3,26.9,4284776,14.1,24.5,34.3,...,4440635,13.8,24.3,34.0,28.0,4516175,13.5,24.2,33.9,28.4
3,Arkansas,1936554,16.3,35.1,28.5,20.1,1949963,15.7,35.0,28.7,...,1973591,14.8,34.7,29.0,21.5,1985770,14.4,34.3,29.2,22.0
4,California,24455010,18.8,20.7,29.9,30.7,24865866,18.5,20.7,29.8,...,25554412,17.9,20.6,29.5,32.0,25950818,17.5,20.6,29.3,32.6


In [40]:
state_info_2017_df = pd.read_csv("./Resources/State/Demographic/ACS_17_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2016_df = pd.read_csv("./Resources/State/Demographic/ACS_16_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2015_df = pd.read_csv("./Resources/State/Demographic/ACS_15_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2014_df = pd.read_csv("./Resources/State/Demographic/ACS_14_5YR_S0102_with_ann.csv", skiprows=[0])
state_info_2013_df = pd.read_csv("./Resources/State/Demographic/ACS_13_5YR_S0102_with_ann.csv", skiprows=[0])

state_info_2017_df.head()

Unnamed: 0,Id,Id2,Geography,Total; Estimate; Total population,Total; Margin of Error; Total population,60 years and over; Estimate; Total population,60 years and over; Margin of Error; Total population,Total; Estimate; SEX AND AGE - Male,Total; Margin of Error; SEX AND AGE - Male,60 years and over; Estimate; SEX AND AGE - Male,...,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - Less than 30 percent,Total; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Estimate; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,60 years and over; Margin of Error; Renter-occupied housing units - GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME IN THE PAST 12 MONTHS - 30 percent or more,Total; Estimate; GROSS RENT - Median gross rent (dollars),Total; Margin of Error; GROSS RENT - Median gross rent (dollars),60 years and over; Estimate; GROSS RENT - Median gross rent (dollars),60 years and over; Margin of Error; GROSS RENT - Median gross rent (dollars)
0,0400000US01,1,Alabama,4850771,*****,1065625,3368,48.5,0.1,44.4,...,55.2,1.0,42.6,0.5,44.8,1.0,747,3,623,8
1,0400000US02,2,Alaska,738565,*****,117047,1030,52.3,0.1,50.8,...,56.5,2.6,41.6,1.1,43.5,2.6,1200,13,902,29
2,0400000US04,4,Arizona,6809946,*****,1502688,3385,49.7,0.1,46.3,...,48.3,0.8,45.2,0.4,51.7,0.8,972,4,880,8
3,0400000US05,5,Arkansas,2977944,*****,655552,2090,49.1,0.1,45.3,...,56.6,1.3,40.7,0.6,43.4,1.3,709,5,613,9
4,0400000US06,6,California,38982847,*****,7292299,6791,49.7,0.1,45.4,...,40.0,0.3,53.1,0.1,60.0,0.3,1358,3,1109,4


In [41]:
income_state_cols_2017_df = state_info_2017_df.filter(regex="Total; Estimate; INCOME IN THE PAST 12 MONTHS")
income_state_2017_df = state_info_2017_df.filter(regex="Geography")
income_state_cols_2016_df = state_info_2016_df.filter(regex="Total; Estimate; INCOME IN THE PAST 12 MONTHS")
income_state_2016_df = state_info_2016_df.filter(regex="Geography")
income_state_cols_2015_df = state_info_2015_df.filter(regex="Total; Estimate; INCOME IN THE PAST 12 MONTHS")
income_state_2015_df = state_info_2015_df.filter(regex="Geography")
income_state_cols_2014_df = state_info_2014_df.filter(regex="Total; Estimate; INCOME IN THE PAST 12 MONTHS")
income_state_2014_df = state_info_2014_df.filter(regex="Geography")
income_state_cols_2013_df = state_info_2013_df.filter(regex="Total; Estimate; INCOME IN THE PAST 12 MONTHS")
income_state_2013_df = state_info_2013_df.filter(regex="Geography")

income_state_2017_df.head()
income_state_cols_2017_df.head()

Unnamed: 0,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits
0,1856695,72.4,67093,35.4,18123,6.7,9329,1.7,2693,21.4,23470,15.0
1,252536,86.0,90061,21.1,17091,4.5,9901,6.3,4020,19.4,30401,10.3
2,2482311,74.7,74501,32.9,19852,4.3,10165,2.0,2739,20.2,25884,12.5
3,1147291,73.1,63854,35.8,17917,6.8,9245,2.1,2933,18.0,20108,13.6
4,12888128,80.6,96538,27.1,18270,6.2,10033,3.6,4596,16.0,31540,9.3


In [42]:
total_income_state_2017_df = pd.concat([income_state_2017_df, income_state_cols_2017_df], axis=1)
total_income_state_2016_df = pd.concat([income_state_2016_df, income_state_cols_2016_df], axis=1)
total_income_state_2015_df = pd.concat([income_state_2015_df, income_state_cols_2015_df], axis=1)
total_income_state_2014_df = pd.concat([income_state_2014_df, income_state_cols_2014_df], axis=1)
total_income_state_2013_df = pd.concat([income_state_2013_df, income_state_cols_2013_df], axis=1)

total_income_state_2017_df.head()

Unnamed: 0,Geography,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits
0,Alabama,1856695,72.4,67093,35.4,18123,6.7,9329,1.7,2693,21.4,23470,15.0
1,Alaska,252536,86.0,90061,21.1,17091,4.5,9901,6.3,4020,19.4,30401,10.3
2,Arizona,2482311,74.7,74501,32.9,19852,4.3,10165,2.0,2739,20.2,25884,12.5
3,Arkansas,1147291,73.1,63854,35.8,17917,6.8,9245,2.1,2933,18.0,20108,13.6
4,California,12888128,80.6,96538,27.1,18270,6.2,10033,3.6,4596,16.0,31540,9.3


In [43]:
total_income_state_2017_df.columns
total_income_state_2016_df.columns
total_income_state_2015_df.columns
total_income_state_2014_df.columns
total_income_state_2013_df.columns

Index(['Geography',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Sup

In [44]:
income_state_2017_data = total_income_state_2017_df[['Geography',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits']]
income_state_2017_data.head()

income_state_2016_data = total_income_state_2016_df[['Geography', 
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2016 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits']]
income_state_2016_data.head()

income_state_2015_data = total_income_state_2015_df[['Geography', 
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2015 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits']]
income_state_2015_data.head()

income_state_2014_data = total_income_state_2014_df[['Geography', 
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits']]
income_state_2014_data.head()

income_state_2013_data = total_income_state_2013_df[['Geography', 
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars)',
       'Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits']]
income_state_2013_data.head()

Unnamed: 0,Geography,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With earnings - Mean earnings (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Social Security income - Mean Social Security income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Supplemental Security Income - Mean Supplemental Security Income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With cash public assistance income - Mean cash public assistance income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income,Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With retirement income - Mean retirement income (dollars),Total; Estimate; INCOME IN THE PAST 12 MONTHS (IN 2013 INFLATION-ADJUSTED DOLLARS) - Households - With Food Stamp/SNAP benefits
0,Alabama,1838683,73.5,62041,33.3,16503,6.4,8617,1.8,2611,20.4,21452,15.2
1,Alaska,251899,87.0,82512,19.4,15114,4.0,9013,6.4,4258,18.5,28400,10.4
2,Arizona,2370289,75.8,67498,30.6,18179,3.9,9431,2.6,3544,19.4,24632,13.2
3,Arkansas,1129723,74.1,57511,34.5,16371,6.6,8498,2.4,3286,17.4,19138,14.6
4,California,12542460,80.7,85703,25.4,16777,5.8,9791,4.0,5489,15.5,28083,8.1


In [45]:
income_state_2017_data.columns = ["State", 
       "Ttl_Households_17", 
       "Households_Earnings%_17",
       "Mean_Earnings$_17",
       "SS_Income%_17",
       "Mean_SS_Income$_17",
       "SSupp_Income%_17",
       "Mean_SSupp_Income$_17",
       "Public_Assist_Income%_17",
       "Mean_Public_Assist_Income$_17",
       "Retirement_Income%_17_%",
       "Mean_Retirement_Income$_17",
       "Food_Stamp_Benefits%_17"]
income_state_2017_data.head()

income_state_2016_data.columns = ["State", 
       "Ttl_Households_16", 
       "Households_Earnings%_16",
       "Mean_Earnings$_16",
       "SS_Income%_16",
       "Mean_SS_Income$_16",
       "SSupp_Income%_16",
       "Mean_SSupp_Income$_16",
       "Public_Assist_Income%_16",
       "Mean_Public_Assist_Income$_16",
       "Retirement_Income%_16_%",
       "Mean_Retirement_Income$_16",
       "Food_Stamp_Benefits%_16"]
income_state_2016_data.head()

income_state_2015_data.columns = ["State", 
       "Ttl_Households_15", 
       "Households_Earnings%_15",
       "Mean_Earnings$_15",
       "SS_Income%_15",
       "Mean_SS_Income$_15",
       "SSupp_Income%_15",
       "Mean_SSupp_Income$_15",
       "Public_Assist_Income%_15",
       "Mean_Public_Assist_Income$_15",
       "Retirement_Income%_15_%",
       "Mean_Retirement_Income$_15",
       "Food_Stamp_Benefits%_15"]
income_state_2015_data.head()

income_state_2014_data.columns = ["State", 
       "Ttl_Households_14", 
       "Households_Earnings%_14",
       "Mean_Earnings$_14",
       "SS_Income%_14",
       "Mean_SS_Income$_14",
       "SSupp_Income%_14",
       "Mean_SSupp_Income$_14",
       "Public_Assist_Income%_14",
       "Mean_Public_Assist_Income$_14",
       "Retirement_Income%_14_%",
       "Mean_Retirement_Income$_14",
       "Food_Stamp_Benefits%_14"]
income_state_2014_data.head()

income_state_2013_data.columns = ["State", 
       "Ttl_Households_13", 
       "Households_Earnings%_13",
       "Mean_Earnings$_13",
       "SS_Income%_13",
       "Mean_SS_Income$_13",
       "SSupp_Income%_13",
       "Mean_SSupp_Income$_13",
       "Public_Assist_Income%_13",
       "Mean_Public_Assist_Income$_13",
       "Retirement_Income%_13_%",
       "Mean_Retirement_Income$_13",
       "Food_Stamp_Benefits%_13"]
income_state_2013_data.head()

Unnamed: 0,State,Ttl_Households_13,Households_Earnings%_13,Mean_Earnings$_13,SS_Income%_13,Mean_SS_Income$_13,SSupp_Income%_13,Mean_SSupp_Income$_13,Public_Assist_Income%_13,Mean_Public_Assist_Income$_13,Retirement_Income%_13_%,Mean_Retirement_Income$_13,Food_Stamp_Benefits%_13
0,Alabama,1838683,73.5,62041,33.3,16503,6.4,8617,1.8,2611,20.4,21452,15.2
1,Alaska,251899,87.0,82512,19.4,15114,4.0,9013,6.4,4258,18.5,28400,10.4
2,Arizona,2370289,75.8,67498,30.6,18179,3.9,9431,2.6,3544,19.4,24632,13.2
3,Arkansas,1129723,74.1,57511,34.5,16371,6.6,8498,2.4,3286,17.4,19138,14.6
4,California,12542460,80.7,85703,25.4,16777,5.8,9791,4.0,5489,15.5,28083,8.1


In [46]:
total_income_2013_2014_data = pd.merge(income_state_2013_data, income_state_2014_data, on=["State"])
total_income_2013_2014_2015_data = pd.merge(total_income_2013_2014_data, income_state_2015_data, on=["State"])
total_income_2013_2014_2015_2016_data = pd.merge(total_income_2013_2014_2015_data, income_state_2016_data, on=["State"])
total_income_state_df = pd.merge(total_income_2013_2014_2015_2016_data, income_state_2017_data, on=["State"])

total_income_state_df.to_csv("./output/demographic/incomeallyearsbystate.csv")
total_income_state_df.head()

Unnamed: 0,State,Ttl_Households_13,Households_Earnings%_13,Mean_Earnings$_13,SS_Income%_13,Mean_SS_Income$_13,SSupp_Income%_13,Mean_SSupp_Income$_13,Public_Assist_Income%_13,Mean_Public_Assist_Income$_13,...,Mean_Earnings$_17,SS_Income%_17,Mean_SS_Income$_17,SSupp_Income%_17,Mean_SSupp_Income$_17,Public_Assist_Income%_17,Mean_Public_Assist_Income$_17,Retirement_Income%_17_%,Mean_Retirement_Income$_17,Food_Stamp_Benefits%_17
0,Alabama,1838683,73.5,62041,33.3,16503,6.4,8617,1.8,2611,...,67093,35.4,18123,6.7,9329,1.7,2693,21.4,23470,15.0
1,Alaska,251899,87.0,82512,19.4,15114,4.0,9013,6.4,4258,...,90061,21.1,17091,4.5,9901,6.3,4020,19.4,30401,10.3
2,Arizona,2370289,75.8,67498,30.6,18179,3.9,9431,2.6,3544,...,74501,32.9,19852,4.3,10165,2.0,2739,20.2,25884,12.5
3,Arkansas,1129723,74.1,57511,34.5,16371,6.6,8498,2.4,3286,...,63854,35.8,17917,6.8,9245,2.1,2933,18.0,20108,13.6
4,California,12542460,80.7,85703,25.4,16777,5.8,9791,4.0,5489,...,96538,27.1,18270,6.2,10033,3.6,4596,16.0,31540,9.3


## Age Group by Charleen

In [47]:
state_info_df = pd.read_csv("./Resources/State/Demographic/PEP_2017_PEPAGESEX_with_ann.csv", skiprows=[0])

state_info_df.head()

Unnamed: 0,Id,Id2,Geography,"April 1, 2010 - Census - Both Sexes; Total","April 1, 2010 - Census - Male; Total","April 1, 2010 - Census - Female; Total","April 1, 2010 - Estimates Base - Both Sexes; Total","April 1, 2010 - Estimates Base - Male; Total","April 1, 2010 - Estimates Base - Female; Total",Population Estimate (as of July 1) - 2010 - Both Sexes; Total,...,Population Estimate (as of July 1) - 2014 - Females; Median age (years),Population Estimate (as of July 1) - 2015 - Both Sexes; Median age (years),Population Estimate (as of July 1) - 2015 - Male; Median age (years),Population Estimate (as of July 1) - 2015 - Female; Median age (years),Population Estimate (as of July 1) - 2016 - Both Sexes; Median age (years),Population Estimate (as of July 1) - 2016 - Male; Median age (years),Population Estimate (as of July 1) - 2016 - Female; Median age (years),Population Estimate (as of July 1) - 2017 - Both Sexes; Median age (years),Population Estimate (as of July 1) - 2017 - Male; Median age (years),Population Estimate (as of July 1) - 2017 - Female; Median age (years)
0,0400000US01,1,Alabama,4779736,2320188,2459548,4780135,2320482,2459653,4785579,...,40.0,38.7,37.2,40.1,38.9,37.4,40.3,39.0,37.5,40.5
1,0400000US02,2,Alaska,710231,369628,340603,710249,369642,340607,714015,...,34.2,33.9,33.5,34.4,34.0,33.6,34.5,34.3,33.9,34.8
2,0400000US04,4,Arizona,6392017,3175823,3216194,6392309,3176053,3216256,6407002,...,38.3,37.2,35.9,38.6,37.4,36.2,38.8,37.7,36.4,39.1
3,0400000US05,5,Arkansas,2915918,1431637,1484281,2916031,1431690,1484341,2921737,...,39.2,37.9,36.5,39.2,38.0,36.6,39.4,38.1,36.8,39.5
4,0400000US06,6,California,37253956,18517830,18736126,37254518,18518136,18736382,37327690,...,37.2,36.1,35.0,37.3,36.4,35.3,37.5,36.6,35.5,37.7


In [48]:
age_state_cols_df = state_info_df.filter(regex="Both Sexes; Total")
age_state_df = state_info_df.filter(regex="Geography")

age_state_df.head()
age_state_cols_df.head()

Unnamed: 0,"April 1, 2010 - Census - Both Sexes; Total","April 1, 2010 - Estimates Base - Both Sexes; Total",Population Estimate (as of July 1) - 2010 - Both Sexes; Total,Population Estimate (as of July 1) - 2011 - Both Sexes; Total,Population Estimate (as of July 1) - 2012 - Both Sexes; Total,Population Estimate (as of July 1) - 2013 - Both Sexes; Total,Population Estimate (as of July 1) - 2014 - Both Sexes; Total,Population Estimate (as of July 1) - 2015 - Both Sexes; Total,Population Estimate (as of July 1) - 2016 - Both Sexes; Total,Population Estimate (as of July 1) - 2017 - Both Sexes; Total,...,"April 1, 2010 - Census - Both Sexes; Total - 85 years and over","April 1, 2010 - Estimates Base - Both Sexes; Total - 85 years and over",Population Estimate (as of July 1) - 2010 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2011 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2012 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 85 years and over
0,4779736,4780135,4785579,4798649,4813946,4827660,4840037,4850858,4860545,4874747,...,75684,75715,76240,77906,80188,81790,83741,85465,86919,88179
1,710231,710249,714015,722259,730825,736760,736759,737979,741522,739795,...,4711,4711,4763,4973,5167,5448,5761,6045,6316,6554
2,6392017,6392309,6407002,6465488,6544211,6616124,6706435,6802262,6908642,7016270,...,103400,103500,104701,109208,113777,117960,122994,129004,133252,136727
3,2915918,2916031,2921737,2938640,2949208,2956780,2964800,2975626,2988231,3004279,...,51402,51415,51735,52696,53753,54331,55348,56361,57246,57882
4,37253956,37254518,37327690,37672654,38019006,38347383,38701278,39032444,39296476,39536653,...,600968,601063,606864,628779,649713,667219,686439,706895,721049,732793


In [49]:
total_age_state_df = pd.concat([age_state_df, age_state_cols_df], axis=1)

total_age_state_df.head()

Unnamed: 0,Geography,"April 1, 2010 - Census - Both Sexes; Total","April 1, 2010 - Estimates Base - Both Sexes; Total",Population Estimate (as of July 1) - 2010 - Both Sexes; Total,Population Estimate (as of July 1) - 2011 - Both Sexes; Total,Population Estimate (as of July 1) - 2012 - Both Sexes; Total,Population Estimate (as of July 1) - 2013 - Both Sexes; Total,Population Estimate (as of July 1) - 2014 - Both Sexes; Total,Population Estimate (as of July 1) - 2015 - Both Sexes; Total,Population Estimate (as of July 1) - 2016 - Both Sexes; Total,...,"April 1, 2010 - Census - Both Sexes; Total - 85 years and over","April 1, 2010 - Estimates Base - Both Sexes; Total - 85 years and over",Population Estimate (as of July 1) - 2010 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2011 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2012 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 85 years and over
0,Alabama,4779736,4780135,4785579,4798649,4813946,4827660,4840037,4850858,4860545,...,75684,75715,76240,77906,80188,81790,83741,85465,86919,88179
1,Alaska,710231,710249,714015,722259,730825,736760,736759,737979,741522,...,4711,4711,4763,4973,5167,5448,5761,6045,6316,6554
2,Arizona,6392017,6392309,6407002,6465488,6544211,6616124,6706435,6802262,6908642,...,103400,103500,104701,109208,113777,117960,122994,129004,133252,136727
3,Arkansas,2915918,2916031,2921737,2938640,2949208,2956780,2964800,2975626,2988231,...,51402,51415,51735,52696,53753,54331,55348,56361,57246,57882
4,California,37253956,37254518,37327690,37672654,38019006,38347383,38701278,39032444,39296476,...,600968,601063,606864,628779,649713,667219,686439,706895,721049,732793


In [50]:
# total_age_state_df = total_age_state_df[total_age_state_df.columns.drop(list(total_age_state_df.filter(regex='2010')))]
total_age_state_df = total_age_state_df[total_age_state_df.columns.drop(list(total_age_state_df.filter(regex='2011')))]
total_age_state_df = total_age_state_df[total_age_state_df.columns.drop(list(total_age_state_df.filter(regex='2012')))]

total_age_state_df.columns

Index(['Geography', 'April 1, 2010 - Census - Both Sexes; Total',
       'April 1, 2010 - Estimates Base - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2010 - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total',
       'April 1, 2010 - Census - Both Sexes; Total - Under 5 years',
       ...
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 80 to 84 years',
       'April 1, 2010 - Census - Both Sexes; Total - 85 years and over',
       'April 1, 2010 - Estimates Base - Both Sexes; Total - 85 years and over',
       'Population Estimate (

In [51]:
age_state_data = total_age_state_df[['Geography', 'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - Under 5 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - Under 5 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - Under 5 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - Under 5 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - Under 5 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 5 to 9 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 5 to 9 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 5 to 9 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 5 to 9 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 5 to 9 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 10 to 14 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 10 to 14 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 10 to 14 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 10 to 14 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 10 to 14 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 15 to 19 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 15 to 19 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 15 to 19 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 15 to 19 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 15 to 19 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 20 to 24 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 20 to 24 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 20 to 24 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 20 to 24 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 20 to 24 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 25 to 29 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 25 to 29 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 25 to 29 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 25 to 29 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 25 to 29 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 30 to 34 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 30 to 34 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 30 to 34 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 30 to 34 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 30 to 34 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 35 to 39 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 35 to 39 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 35 to 39 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 35 to 39 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 35 to 39 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 40 to 44 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 40 to 44 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 40 to 44 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 40 to 44 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 40 to 44 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 45 to 49 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 45 to 49 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 45 to 49 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 45 to 49 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 45 to 49 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 50 to 54 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 50 to 54 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 50 to 54 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 50 to 54 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 50 to 54 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 55 to 59 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 55 to 59 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 55 to 59 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 55 to 59 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 55 to 59 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 60 to 64 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 60 to 64 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 60 to 64 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 60 to 64 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 60 to 64 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 65 to 69 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 65 to 69 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 65 to 69 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 65 to 69 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 65 to 69 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 70 to 74 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 70 to 74 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 70 to 74 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 70 to 74 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 70 to 74 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 75 to 79 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 75 to 79 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 75 to 79 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 75 to 79 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 75 to 79 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 80 to 84 years',
       'Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 85 years and over',
       'Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 85 years and over',
       'Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 85 years and over',
       'Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 85 years and over',
       'Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 85 years and over']]
age_state_data.head()

Unnamed: 0,Geography,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - Under 5 years,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - Under 5 years,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - Under 5 years,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - Under 5 years,Population Estimate (as of July 1) - 2017 - Both Sexes; Total - Under 5 years,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 5 to 9 years,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 5 to 9 years,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 5 to 9 years,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 5 to 9 years,...,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 80 to 84 years,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 80 to 84 years,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 80 to 84 years,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 80 to 84 years,Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 80 to 84 years,Population Estimate (as of July 1) - 2013 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2014 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2015 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2016 - Both Sexes; Total - 85 years and over,Population Estimate (as of July 1) - 2017 - Both Sexes; Total - 85 years and over
0,Alabama,294541,293552,293904,293581,293554,306895,306281,305293,303815,...,90206,90879,92034,93715,95101,81790,83741,85465,86919,88179
1,Alaska,55261,55245,55022,54664,54083,52020,51736,51677,52429,...,6527,6670,6760,7031,7339,5448,5761,6045,6316,6554
2,Arizona,433645,433312,434662,435789,437262,458750,458614,456082,454833,...,127272,129333,133074,137291,142462,117960,122994,129004,133252,136727
3,Arkansas,191474,190937,190993,191243,191435,200062,198801,197866,196597,...,57087,57489,58387,59144,60107,54331,55348,56361,57246,57882
4,California,2502704,2506873,2508080,2484772,2471513,2557993,2552838,2542841,2537586,...,616518,621007,627245,635363,647135,667219,686439,706895,721049,732793


In [52]:
age_state_data.columns = ["State", '2013 Under 5 years',
       '2014 - Under 5 years',
       '2015 - Under 5 years',
       '2016 - Under 5 years',
       '2017 - Under 5 years',
       '2013 - 5 to 9 years',
       '2014 - 5 to 9 years',
       '2015 - 5 to 9 years',
       '2016 - 5 to 9 years',
       '2017 - 5 to 9 years',
       '2013 - 10 to 14 years',
       '2014 - 10 to 14 years',
       '2015 - 10 to 14 years',
       '2016 - 10 to 14 years',
       '2017 - 10 to 14 years',
       '2013 - 15 to 19 years',
       '2014 - 15 to 19 years',
       '2015 - 15 to 19 years',
       '2016 - 15 to 19 years',
       '2017 - 15 to 19 years',
       '2013 - 20 to 24 years',
       '2014 - 20 to 24 years',
       '2015 - 20 to 24 years',
       '2016 - 20 to 24 years',
       '2017 - 20 to 24 years',
       '2013 - 25 to 29 years',
       '2014 - 25 to 29 years',
       '2015 - 25 to 29 years',
       '2016 - 25 to 29 years',
       '2017 - 25 to 29 years',
       '2013 - 30 to 34 years',
       '2014 - 30 to 34 years',
       '2015 - 30 to 34 years',
       '2016 - 30 to 34 years',
       '2017 - 30 to 34 years',
       '2013 - 35 to 39 years',
       '2014 - 35 to 39 years',
       '2015 - 35 to 39 years',
       '2016 - 35 to 39 years',
       '2017 - 35 to 39 years',
       '2013 - 40 to 44 years',
       '2014 - 40 to 44 years',
       '2015 - 40 to 44 years',
       '2016 - 40 to 44 years',
       '2017 - 40 to 44 years',
       '2013 - 45 to 49 years',
       '2014 - 45 to 49 years',
       '2015 - 45 to 49 years',
       '2016 - 45 to 49 years',
       '2017 - 45 to 49 years',
       '2013 - 50 to 54 years',
       '2014 - 50 to 54 years',
       '2015 - 50 to 54 years',
       '2016 - 50 to 54 years',
       '2017 - 50 to 54 years',
       '2013 - 55 to 59 years',
       '2014 - 55 to 59 years',
       '2015 - 55 to 59 years',
       '2016 - 55 to 59 years',
       '2017 - 55 to 59 years',
       '2013 - 60 to 64 years',
       '2014 - 60 to 64 years',
       '2015 - 60 to 64 years',
       '2016 - 60 to 64 years',
       '2017 - 60 to 64 years',
       '2013 - 65 to 69 years',
       '2014 - 65 to 69 years',
       '2015 - 65 to 69 years',
       '2016 - 65 to 69 years',
       '2017 - 65 to 69 years',
       '2013 - 70 to 74 years',
       '2014 - 70 to 74 years',
       '2015 - 70 to 74 years',
       '2016 - 70 to 74 years',
       '2017 - 70 to 74 years',
       '2013 - 75 to 79 years',
       '2014 - 75 to 79 years',
       '2015 - 75 to 79 years',
       '2016 - 75 to 79 years',
       '2017 - 75 to 79 years',
       '2013 - 80 to 84 years',
       '2014 - 80 to 84 years',
       '2015 - 80 to 84 years',
       '2016 - 80 to 84 years',
       '2017 - 80 to 84 years',
       '2013 - 85 years and over',
       '2014 - 85 years and over',
       '2015 - 85 years and over',
       '2016 - 85 years and over',
       '2017 - 85 years and over']
age_state_data.head()

Unnamed: 0,State,2013 Under 5 years,2014 - Under 5 years,2015 - Under 5 years,2016 - Under 5 years,2017 - Under 5 years,2013 - 5 to 9 years,2014 - 5 to 9 years,2015 - 5 to 9 years,2016 - 5 to 9 years,...,2013 - 80 to 84 years,2014 - 80 to 84 years,2015 - 80 to 84 years,2016 - 80 to 84 years,2017 - 80 to 84 years,2013 - 85 years and over,2014 - 85 years and over,2015 - 85 years and over,2016 - 85 years and over,2017 - 85 years and over
0,Alabama,294541,293552,293904,293581,293554,306895,306281,305293,303815,...,90206,90879,92034,93715,95101,81790,83741,85465,86919,88179
1,Alaska,55261,55245,55022,54664,54083,52020,51736,51677,52429,...,6527,6670,6760,7031,7339,5448,5761,6045,6316,6554
2,Arizona,433645,433312,434662,435789,437262,458750,458614,456082,454833,...,127272,129333,133074,137291,142462,117960,122994,129004,133252,136727
3,Arkansas,191474,190937,190993,191243,191435,200062,198801,197866,196597,...,57087,57489,58387,59144,60107,54331,55348,56361,57246,57882
4,California,2502704,2506873,2508080,2484772,2471513,2557993,2552838,2542841,2537586,...,616518,621007,627245,635363,647135,667219,686439,706895,721049,732793


In [53]:
# total_age_state_df.to_csv("./output/demographic/ageallyearsbystate.csv")
age_state_data.to_csv("./output/demographic/agegroupsbystatenotfinished.csv")

## Crime by Tanique
* #### Please check Tanique work in team folder

In [108]:
# Open Excel File
crime_data_file = "Team/Tanique/Resources/CrimeDataFinal.xlsx"

# Read Excel File
excel_reader_crime = pd.read_excel(crime_data_file)
# Create a Crime DataFrame
crime_df = excel_reader_crime.groupby("State")[["Crime 2011", "Crime 2012", "Crime 2013", "Crime 2014", "Crime 2015"]].mean()
crime_df.head()

Unnamed: 0_level_0,Crime 2011,Crime 2012,Crime 2013,Crime 2014,Crime 2015
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALABAMA,2101,1918,2119,1815,140450
ALASKA,39607,29690,29568,25935,31402
ARIZONA,285528,256968,279551,261448,226367
ARKANSAS,108281,137962,138054,119850,127083
CALIFORNIA,1183470,1228185,1212801,1217089,1158479


In [54]:
path =r'./Team/Tanique/Resources/'

allFiles = glob.glob(path + "/*.xls")

list_ = []

for file_ in allFiles:
    df = pd.read_excel(file_,index_col=None)
    list_.append(df)

frame = pd.concat(list_, axis = 0, ignore_index = True, sort=False)

frame.head()

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ALABAMA,2101.0,15.0,531,0,0.0,11,4,509,5,0,9,16,10,20,0,300,287,30
1,ALASKA,39607.0,2006.0,3824,32,81.0,225,1668,3097,266,44,245,102,887,401,356,2341,4420,1115
2,ARIZONA,285528.0,8589.0,40144,244,196.0,1736,6413,33888,1402,239,2101,200,11164,2400,1702,30886,35496,17374
3,ARKANSAS,108281.0,3229.0,12948,59,139.0,435,2596,10213,285,59,1952,35,1125,943,133,8783,7758,2681
4,CALIFORNIA,1183470.0,107165.0,147842,1512,1757.0,17431,86465,84090,10350,1047,6998,1100,21921,27690,11143,188188,104345,11246


In [55]:
alabama_crime_pd = frame.loc[frame["State"] == "ALABAMA", :]
alabama_crime_pd_reindexed = alabama_crime_pd.reset_index(drop=True)
alabama_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ALABAMA,2101.0,15.0,531,0,0.0,11,4,509,5,0,9,16,10,20,0,300,287,30
1,ALABAMA,1918.0,23.0,489,1,1.0,14,7,462,3,0,22,20,4,16,4,306,286,17
2,ALABAMA,2119.0,26.0,684,2,2.0,12,10,670,2,1,15,17,6,27,0,325,259,21
3,ALABAMA,1815.0,33.0,695,1,1.0,20,11,679,0,1,19,7,5,15,0,220,162,21
4,ALABAMA,140450.0,5537.0,19386,283,336.0,1088,3830,15558,727,93,4119,240,1489,1374,457,8967,7863,2187


In [56]:
alaska_crime_pd = frame.loc[frame["State"] == "ALASKA", :]
alaska_crime_pd_reindexed = alaska_crime_pd.reset_index(drop=True)
alaska_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ALASKA,39607.0,2006.0,3824,32,81.0,225,1668,3097,266,44,245,102,887,401,356,2341,4420,1115
1,ALASKA,29690.0,1618.0,3602,16,86.0,203,1313,3055,203,35,158,104,770,225,275,1547,2951,928
2,ALASKA,29568.0,1638.0,4006,22,106.0,222,1288,3461,215,66,172,87,776,234,219,1475,2689,939
3,ALASKA,25935.0,1577.0,3451,19,87.0,208,1263,2824,204,141,182,62,732,218,250,1159,2336,731
4,ALASKA,31402.0,2193.0,3453,47,118.0,294,1734,2540,361,67,230,72,898,303,240,1212,3163,795


In [57]:
arizona_crime_pd = frame.loc[frame["State"] == "ARIZONA", :]
arizona_crime_pd_reindexed = arizona_crime_pd.reset_index(drop=True)
arizona_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ARIZONA,285528.0,8589.0,40144,244,196.0,1736,6413,33888,1402,239,2101,200,11164,2400,1702,30886,35496,17374
1,ARIZONA,256968.0,8101.0,38802,235,220.0,1836,5810,32314,1535,638,2037,213,10437,2281,1489,27273,31263,14071
2,ARIZONA,279551.0,9493.0,42399,253,265.0,1815,7160,36258,1623,216,2287,201,11283,2637,1688,30962,31662,15533
3,ARIZONA,261448.0,8899.0,41250,181,303.0,1593,6822,35580,1579,198,2076,156,10747,2370,1629,29670,26843,14777
4,ARIZONA,226367.0,7161.0,30609,134,230.0,1047,5750,26717,1064,156,1505,139,7934,1929,1080,24892,22367,16750


In [58]:
arkansas_crime_pd = frame.loc[frame["State"] == "ARKANSAS", :]
arkansas_crime_pd_reindexed = arkansas_crime_pd.reset_index(drop=True)
arkansas_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ARKANSAS,108281.0,3229.0,12948,59,139.0,435,2596,10213,285,59,1952,35,1125,943,133,8783,7758,2681
1,ARKANSAS,137962.0,4137.0,16680,119,172.0,626,3220,13532,352,53,1824,46,1430,1158,184,11083,9226,3453
2,ARKANSAS,138054.0,4013.0,16631,95,211.0,650,3057,13443,369,57,1584,27,1308,997,192,11391,7811,3292
3,ARKANSAS,119850.0,3998.0,15484,86,243.0,582,3087,12845,413,50,1256,44,1252,898,118,11168,6844,2462
4,ARKANSAS,127083.0,4667.0,15771,123,273.0,589,3682,13357,364,73,1145,54,1350,886,134,12700,6919,2185


In [59]:
cali_crime_pd = frame.loc[frame["State"] == "CALIFORNIA", :]
cali_crime_pd_reindexed = cali_crime_pd.reset_index(drop=True)
cali_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,CALIFORNIA,1183470.0,107165.0,147842,1512,1757.0,17431,86465,84090,10350,1047,6998,1100,21921,27690,11143,188188,104345,11246
1,CALIFORNIA,1228185.0,103122.0,144300,1594,1642.0,16676,83210,78489,13303,1065,7054,1074,18488,25841,11139,194290,172345,8831
2,CALIFORNIA,1212801.0,101342.0,139624,1425,1600.0,15930,82387,74234,13909,1091,6722,1053,17445,25676,10286,217520,161055,7205
3,CALIFORNIA,1217089.0,106058.0,134746,1427,2443.0,14793,87395,71440,13755,1053,6551,1047,17175,24484,9727,229083,155285,6308
4,CALIFORNIA,1158479.0,107454.0,127671,1431,2465.0,15841,87717,65037,17303,1185,6499,972,17143,25946,9395,206194,141458,4674


In [60]:
col_crime_pd = frame.loc[frame["State"] == "COLORADO", :]
col_crime_pd_reindexed = col_crime_pd.reset_index(drop=True)
col_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,COLORADO,208352.0,5701.0,23445,107,400.0,936,4258,19719,1084,235,1700,121,4971,1701,818,15109,27314,9672
1,COLORADO,213769.0,5375.0,23128,116,333.0,868,4058,19726,1002,223,1491,103,4963,1743,619,15953,26180,8970
2,COLORADO,230910.0,5691.0,28833,120,333.0,1000,4238,24873,1340,195,1758,89,5141,1841,687,12370,28723,10359
3,COLORADO,239994.0,5936.0,32318,128,458.0,939,4411,27857,1565,225,1928,88,5296,2164,525,13381,27874,9801
4,COLORADO,226807.0,6242.0,31067,165,460.0,1005,4612,26358,2150,178,2083,99,5309,2219,484,14430,25562,8875


In [61]:
conn_crime_pd = frame.loc[frame["State"] == "CONNECTICUT", :]
conn_crime_pd_reindexed = conn_crime_pd.reset_index(drop=True)
conn_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,CONNECTICUT,119285.0,4817.0,17366,128,267.0,1397,3025,13813,612,170,1297,161,2310,1244,560,13667,8487,12458
1,CONNECTICUT,110186.0,4438.0,16280,134,209.0,1299,2796,12884,584,118,1182,168,2199,1227,598,9651,8612,12796
2,CONNECTICUT,95685.0,3562.0,15338,48,178.0,1117,2219,12300,574,399,1018,185,1936,1030,427,9137,8207,10032
3,CONNECTICUT,107855.0,4051.0,17132,94,238.0,1166,2553,13857,581,285,963,164,2011,1129,389,9927,8918,11949
4,CONNECTICUT,85327.0,3116.0,14016,85,179.0,983,1869,11594,574,72,786,158,1473,924,394,7635,8148,8159


In [62]:
del_crime_pd = frame.loc[frame["State"] == "DELAWARE", :]
del_crime_pd_reindexed = del_crime_pd.reset_index(drop=True)
del_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,DELAWARE,37832.0,2328.0,7325,29,93.0,583,1623,6187,116,41,1803,186,880,327,181,5110,242,2141
1,DELAWARE,39680.0,2387.0,8017,26,67.0,596,1698,6608,142,34,1911,188,922,327,211,5463,292,1973
2,DELAWARE,37321.0,2175.0,7407,9,64.0,511,1591,6295,93,28,1795,196,907,332,188,6250,325,1782
3,DELAWARE,36780.0,2126.0,7963,22,78.0,537,1489,6780,128,28,1884,204,774,394,137,6163,334,1645
4,DELAWARE,34687.0,2219.0,7328,42,76.0,496,1605,6195,104,32,1705,214,860,332,117,5734,386,1235


In [63]:
dc_crime_pd = frame.loc[frame["State"] == "DISTRICT OF COLUMBIA", :]
dc_crime_pd_reindexed = dc_crime_pd.reset_index(drop=True)
dc_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,DISTRICT OF COLUMBIA,8725.0,157.0,175,0,0.0,118,39,165,10,0,17,0,29,59,0,204,43,159
1,DISTRICT OF COLUMBIA,6154.0,108.0,99,0,0.0,77,31,94,5,0,2,1,28,67,0,323,10,57
2,DISTRICT OF COLUMBIA,5709.0,110.0,128,0,1.0,64,45,119,8,1,3,0,38,92,0,389,10,76
3,DISTRICT OF COLUMBIA,7218.0,124.0,94,0,0.0,87,37,88,6,0,0,4,31,67,0,232,8,111


In [64]:
flo_crime_pd = frame.loc[frame["State"] == "FLORIDA", :]
flo_crime_pd_reindexed = flo_crime_pd.reset_index(drop=True)
flo_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,FLORIDA,952845.0,43364.0,128489,709,1527.0,9148,31980,94047,5345,319,13992,939,7364,5785,2734,130296,43784,0
1,FLORIDA,960086.0,41610.0,125581,707,1620.0,8298,30985,93335,5444,351,14988,910,6881,6177,2797,127919,44894,0
2,FLORIDA,904135.0,40567.0,122990,662,1842.0,7940,30123,94392,5452,324,13988,950,6680,5088,2663,126137,41994,0
3,FLORIDA,865272.0,39249.0,119875,685,1839.0,7712,29013,92468,6204,289,12236,954,6464,4911,2583,122190,42745,0
4,FLORIDA,772956.0,38453.0,115264,671,1800.0,7191,28791,88770,7290,285,11880,1024,6566,4877,2243,114988,31783,0


In [65]:
geo_crime_pd = frame.loc[frame["State"] == "GEORGIA", :]
geo_crime_pd_reindexed = geo_crime_pd.reset_index(drop=True)
geo_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,GEORGIA,349708.0,14419.0,53874,460,350.0,2787,10822,41729,2148,401,6294,406,4147,3971,4032,42812,31176,26769
1,GEORGIA,330261.0,13663.0,50380,408,311.0,2829,10115,40239,1721,205,5496,418,4124,3500,3829,42889,27903,25067
2,GEORGIA,323435.0,12146.0,49993,455,346.0,2714,8631,40956,1579,200,5334,518,3667,3392,3564,44539,25912,23724
3,GEORGIA,307221.0,11535.0,47951,419,326.0,2625,8165,39374,1624,214,5485,399,3913,3363,3229,42619,23004,19913
4,GEORGIA,231108.0,8537.0,35014,306,211.0,1756,6264,29109,1113,125,4246,285,2789,2655,2199,34837,19217,12086


In [66]:
haw_crime_pd = frame.loc[frame["State"] == "HAWAII", :]
haw_crime_pd_reindexed = haw_crime_pd.reset_index(drop=True)
haw_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,HAWAII,8832.0,267.0,972,3,22.0,76,166,699,106,4,6,0,56,108,32,892,879,122
1,HAWAII,9556.0,300.0,1350,1,19.0,57,223,972,144,9,5,0,80,112,40,1087,998,139
2,HAWAII,9464.0,332.0,1072,6,22.0,63,241,723,130,4,11,0,72,89,20,1122,810,154
3,HAWAII,37603.0,1261.0,4353,24,145.0,367,725,3342,469,30,272,34,467,190,195,2160,5250,821


In [67]:
ida_crime_pd = frame.loc[frame["State"] == "IDAHO", :]
ida_crime_pd_reindexed = ida_crime_pd.reset_index(drop=True)
ida_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,IDAHO,63005.0,1303.0,7037,17,68.0,82,1136,5859,183,47,437,54,1162,427,275,6558,9161,2220
1,IDAHO,64164.0,1482.0,7225,19,90.0,100,1273,5894,195,79,419,50,1152,525,300,6848,9097,2185
2,IDAHO,61668.0,1519.0,6690,19,86.0,107,1307,5422,182,60,373,52,981,437,266,7591,8109,2006
3,IDAHO,59003.0,1403.0,6673,16,108.0,92,1187,5417,207,63,371,60,1051,354,245,7409,7439,1666
4,IDAHO,52162.0,1447.0,6002,14,112.0,102,1219,4807,208,60,352,47,909,320,222,6541,5844,1497


In [68]:
ill_crime_pd = frame.loc[frame["State"] == "ILLINOIS", :]
ill_crime_pd_reindexed = ill_crime_pd.reset_index(drop=True)
ill_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,ILLINOIS,132863.0,6939.0,18924,290,409.0,2524,3716,13683,2610,61,232,0,3904,3941,609,38701,3619,11031
1,ILLINOIS,125398.0,6685.0,18344,319,381.0,2440,3545,13579,2387,61,212,1,3504,3982,549,35715,4192,10613
2,ILLINOIS,120760.0,6076.0,16792,363,9.0,2541,3163,12315,2487,58,202,1,3096,3438,562,35001,3868,11639
3,ILLINOIS,108211.0,4655.0,14984,329,349.0,2058,1919,10688,2544,74,267,0,2896,3343,534,29478,3653,9651
4,ILLINOIS,91218.0,4102.0,13237,272,280.0,1728,1822,8866,2986,64,226,0,2343,3491,414,24101,3659,6763


In [69]:
ind_crime_pd = frame.loc[frame["State"] == "INDIANA", :]
ind_crime_pd_reindexed = ind_crime_pd.reset_index(drop=True)
ind_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,INDIANA,194774.0,8398.0,30383,243,187.0,1687,6281,24499,1527,134,1699,21,2011,1880,1277,23345,20043,6873
1,INDIANA,151935.0,5987.0,22874,169,114.0,1104,4600,18588,1063,129,1421,79,1872,1462,1327,18372,14844,5491
2,INDIANA,129146.0,6138.0,21723,158,167.0,1059,4754,18047,1042,111,1047,145,1464,1437,1057,12470,12295,4278
3,INDIANA,143682.0,7463.0,22937,189,170.0,1185,5919,19081,1271,97,1355,130,1390,1559,828,16167,14878,3900
4,INDIANA,150628.0,7327.0,21625,200,183.0,1192,5752,17903,1394,83,1569,143,1338,2069,886,16526,14428,3619


In [70]:
io_crime_pd = frame.loc[frame["State"] == "IOWA", :]
io_crime_pd_reindexed = io_crime_pd.reset_index(drop=True)
io_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,IOWA,105015.0,3939.0,14424,33,110.0,202,3594,11985,381,134,784,68,2520,566,213,9139,11889,6155
1,IOWA,102554.0,4128.0,13911,28,89.0,244,3767,11584,342,128,766,95,2197,618,191,8666,10845,5954
2,IOWA,101402.0,4055.0,15466,32,102.0,348,3573,13079,457,140,724,72,2069,711,224,9182,9825,5515
3,IOWA,100607.0,4158.0,14885,28,157.0,361,3612,12882,394,110,761,74,2289,742,193,9115,9952,5443
4,IOWA,89891.0,4512.0,14137,43,136.0,362,3971,11998,449,105,653,82,1931,705,167,8114,9028,4137


In [71]:
kan_crime_pd = frame.loc[frame["State"] == "KANSAS", :]
kan_crime_pd_reindexed = kan_crime_pd.reset_index(drop=True)
kan_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,KANSAS,78391.0,2236.0,7624,44,187.0,189,1816,6193,382,67,931,163,1875,600,301,7364,11470,2746
1,KANSAS,78153.0,2269.0,7934,43,146.0,221,1859,6444,426,55,841,196,1974,616,279,7629,10378,2833
2,KANSAS,78182.0,2145.0,7209,63,130.0,206,1746,5904,383,55,940,202,2002,594,246,8130,9730,2337
3,KANSAS,68291.0,1992.0,6214,37,151.0,205,1599,4943,395,57,648,159,1808,450,194,7677,8438,2146
4,KANSAS,62188.0,1991.0,6167,53,140.0,221,1577,5046,347,53,582,159,1624,425,172,7756,7186,1954


In [72]:
ken_crime_pd = frame.loc[frame["State"] == "KENTUCKY", :]
ken_crime_pd_reindexed = ken_crime_pd.reset_index(drop=True)
ken_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,KENTUCKY,176087.0,3563.0,19953,114,244.0,1146,2059,15397,596,142,1164,333,1236,1082,394,21383,22973,4998
1,KENTUCKY,179435.0,3704.0,21538,161,248.0,1231,2064,16744,674,181,976,432,1126,1122,400,21141,23157,4960
2,KENTUCKY,178212.0,3687.0,22506,129,187.0,1275,2096,18060,607,170,1063,428,1096,1007,418,21830,21946,4715
3,KENTUCKY,178916.0,3790.0,23106,159,249.0,1260,2122,18825,629,145,1135,483,1097,1015,370,21644,19953,4537
4,KENTUCKY,195415.0,3484.0,20873,159,238.0,1136,1951,16949,681,74,1135,467,1059,872,375,22106,17825,4453


In [73]:
lou_crime_pd = frame.loc[frame["State"] == "LOUISIANA", :]
lou_crime_pd_reindexed = lou_crime_pd.reset_index(drop=True)
lou_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,LOUISIANA,119085.0,8420.0,19892,158,208.0,959,7095,15419,577,122,1411,24,2537,1515,613,14925,6032,9772
1,LOUISIANA,122931.0,7741.0,21778,182,222.0,937,6400,17310,577,100,1217,20,2102,1519,563,15748,5259,9572
2,LOUISIANA,149789.0,8563.0,28322,205,263.0,954,7141,23026,843,100,1643,158,2758,1815,760,20727,6182,7929
3,LOUISIANA,121145.0,5215.0,20526,165,218.0,669,4163,16290,721,70,1154,98,1968,1585,619,17693,5598,5777
4,LOUISIANA,111454.0,4813.0,18898,151,231.0,645,3786,15168,741,70,854,20,1569,1763,507,17686,5339,5350


In [74]:
mai_crime_pd = frame.loc[frame["State"] == "MAINE", :]
mai_crime_pd_reindexed = mai_crime_pd.reset_index(drop=True)
mai_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MAINE,51592.0,751.0,8142,17,75.0,204,455,6368,301,77,755,56,1487,403,253,5627,5802,1709
1,MAINE,51052.0,770.0,8260,19,51.0,233,467,6618,264,76,659,55,1472,406,277,5544,5834,1883
2,MAINE,49548.0,808.0,7749,17,82.0,177,532,6358,250,46,669,43,1255,304,222,5608,5826,1663
3,MAINE,47815.0,762.0,7286,10,65.0,164,523,6117,222,42,643,58,1168,270,207,5802,5531,1568
4,MAINE,45230.0,800.0,6480,21,59.0,182,538,5361,236,29,563,46,1005,228,184,5947,5756,1317


In [75]:
mary_crime_pd = frame.loc[frame["State"] == "MARYLAND", :]
mary_crime_pd_reindexed = mary_crime_pd.reset_index(drop=True)
mary_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MARYLAND,229010.0,9551.0,28345,202,305.0,2819,6225,20403,1585,375,1554,289,3267,3235,1175,41022,17402,6306
1,MARYLAND,214837.0,9665.0,27862,218,336.0,2989,6122,19861,1619,299,1288,266,3205,3189,1143,39733,16769,6025
2,MARYLAND,168692.0,6925.0,20773,166,207.0,2156,4396,15382,926,259,1041,191,2222,2384,578,34006,14793,3930
3,MARYLAND,195120.0,8322.0,24566,195,284.0,2692,5151,18584,1112,246,1048,227,2527,2739,874,37780,17839,5875
4,MARYLAND,159178.0,8244.0,22032,257,336.0,2698,4953,16408,1098,240,817,193,2227,2716,692,26154,17100,4475


In [76]:
mass_crime_pd = frame.loc[frame["State"] == "MASSACHUSETTS", :]
mass_crime_pd_reindexed = mass_crime_pd.reset_index(drop=True)
mass_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MASSACHUSETTS,137379.0,11512.0,18428,90,288.0,1808,9326,13955,697,83,1181,90,3138,1479,566,10849,9887,7384
1,MASSACHUSETTS,137532.0,10719.0,18900,52,243.0,1590,8834,14741,563,110,1329,143,3217,1311,547,11599,8541,6881
2,MASSACHUSETTS,135362.0,10512.0,19719,50,270.0,1573,8619,15672,560,124,1490,133,2864,1384,450,11206,8381,6522
3,MASSACHUSETTS,125708.0,10181.0,17915,47,274.0,1464,8396,14437,603,87,1288,133,2759,1293,436,10516,7530,5594
4,MASSACHUSETTS,116347.0,9841.0,15577,52,270.0,1200,8319,12664,564,53,1186,127,2528,1300,432,9761,8258,4349


In [77]:
mic_crime_pd = frame.loc[frame["State"] == "MICHIGAN", :]
mic_crime_pd_reindexed = mic_crime_pd.reset_index(drop=True)
mic_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MICHIGAN,251200.0,11856.0,34176,158,589.0,2141,8968,25970,1895,266,4648,917,3125,3926,862,31486,29443,8563
1,MICHIGAN,252589.0,11537.0,33189,157,557.0,2003,8820,25928,1612,263,4282,825,3088,3843,859,31823,29206,8561
2,MICHIGAN,251825.0,11591.0,32025,247,627.0,2104,8613,25344,1547,249,4326,1010,2831,4035,837,34128,27643,7326
3,MICHIGAN,246442.0,11260.0,27848,278,641.0,1631,8710,22631,1259,215,4000,912,2326,3606,604,33567,28072,6740
4,MICHIGAN,240942.0,10836.0,25150,242,622.0,1516,8456,20815,1092,215,3671,856,2386,3289,532,34716,26845,6402


In [78]:
min_crime_pd = frame.loc[frame["State"] == "MINNESOTA", :]
min_crime_pd_reindexed = min_crime_pd.reset_index(drop=True)
min_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MINNESOTA,176051.0,5140.0,28405,69,152.0,1196,3723,24268,977,167,2382,30,3886,1688,1331,17727,24543,13184
1,MINNESOTA,,,24025,44,,579,2598,20612,928,154,1748,33,3002,931,1076,15087,22278,9381
2,MINNESOTA,158799.0,5636.0,28649,91,564.0,1196,3785,24895,1015,101,2145,25,3213,1736,1201,19056,21206,10832
3,MINNESOTA,151155.0,5564.0,27659,87,543.0,1151,3783,24079,1086,83,2286,64,3094,1797,1159,19148,20586,10149
4,MINNESOTA,148307.0,5792.0,26814,119,670.0,1197,3806,23406,1135,118,2699,40,3203,1867,1083,17342,20830,10090


In [79]:
misspi_crime_pd = frame.loc[frame["State"] == "MISSISSIPPI", :]
misspi_crime_pd_reindexed = misspi_crime_pd.reset_index(drop=True)
misspi_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MISSISSIPPI,102765.0,2223.0,11509,173,161.0,625,1264,8639,402,95,1551,517,937,828,314,10862,11251,6783
1,MISSISSIPPI,94386.0,1724.0,10320,83,127.0,406,1108,8032,347,47,1355,559,887,841,246,10678,10221,6294
2,MISSISSIPPI,72824.0,1404.0,8978,95,86.0,325,898,7394,263,76,954,437,673,705,187,8037,7764,4606
3,MISSISSIPPI,79522.0,1626.0,10006,124,83.0,406,1013,8187,295,67,1120,443,739,858,184,8958,8174,4457
4,MISSISSIPPI,71916.0,1271.0,8453,103,85.0,324,759,6850,309,41,1021,387,651,875,162,9010,6889,3777


In [80]:
missor_crime_pd = frame.loc[frame["State"] == "MISSOURI", :]
missor_crime_pd_reindexed = missor_crime_pd.reset_index(drop=True)
missor_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MISSOURI,295080.0,10401.0,41476,252,382.0,2117,7650,33181,1571,249,2643,255,5300,3353,2018,35383,29447,13006
1,MISSOURI,289864.0,10020.0,42838,289,389.0,1968,7374,35286,1684,284,2493,284,5126,3230,1801,34943,26860,12274
2,MISSOURI,276973.0,9234.0,40261,238,232.0,1938,6826,33284,1659,199,2601,299,4637,3050,1509,35131,23950,11253
3,MISSOURI,256349.0,9220.0,37807,248,488.0,1747,6737,31453,1767,184,2244,330,4229,2735,1395,33496,22187,9259
4,MISSOURI,222609.0,9682.0,34559,253,538.0,1893,6998,28339,1899,142,2238,323,4052,2936,1218,34052,19449,8042


In [81]:
mon_crime_pd = frame.loc[frame["State"] == "MONTANA", :]
mon_crime_pd_reindexed = mon_crime_pd.reset_index(drop=True)
mon_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,MONTANA,29678.0,829.0,4118,10,40.0,45,734,3698,163,32,171,34,965,71,61,1790,4251,3458
1,MONTANA,32384.0,907.0,4671,10,54.0,57,786,4098,197,45,139,40,1072,91,96,2077,4418,3445
2,MONTANA,30089.0,837.0,4785,15,36.0,63,723,4251,209,14,207,38,921,86,79,1958,4300,3272
3,MONTANA,29766.0,946.0,4927,11,52.0,49,834,4387,226,35,269,40,855,97,85,2205,4461,3086
4,MONTANA,25687.0,907.0,4038,7,50.0,44,806,3582,219,25,223,53,792,75,79,1961,3674,2585


In [82]:
neb_crime_pd = frame.loc[frame["State"] == "NEBRASKA", :]
neb_crime_pd_reindexed = neb_crime_pd.reset_index(drop=True)
neb_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEBRASKA,81890.0,2111.0,11062,53,213.0,355,1490,9707,364,89,1230,75,2587,835,560,10423,12005,3761
1,NEBRASKA,77665.0,2080.0,10893,30,200.0,334,1516,9586,369,87,1456,77,2571,914,570,10433,10639,3364
2,NEBRASKA,69957.0,1912.0,9981,42,156.0,270,1444,8839,325,64,1348,82,2103,956,502,10366,9193,2724
3,NEBRASKA,73802.0,2237.0,10825,45,145.0,294,1753,9697,341,53,1419,94,2368,1016,470,11965,8372,3188
4,NEBRASKA,45859.0,1170.0,5932,9,106.0,124,931,5241,248,48,829,28,1288,617,253,8589,5348,1786


In [83]:
nev_crime_pd = frame.loc[frame["State"] == "NEVADA", :]
nev_crime_pd_reindexed = nev_crime_pd.reset_index(drop=True)
nev_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEVADA,141908.0,5690.0,14355,107,168.0,1491,3924,10895,505,376,1354,403,2230,1499,1243,15562,11834,2765
1,NEVADA,142459.0,6557.0,15569,120,199.0,1519,4719,11875,664,113,1531,436,1782,1464,1257,15001,11576,1905
2,NEVADA,122498.0,6200.0,13930,117,202.0,1517,4364,10380,744,138,1416,393,1586,1560,1080,11207,8841,1495
3,NEVADA,122274.0,7808.0,13639,122,279.0,1809,5598,9987,851,65,1513,344,1500,1492,1002,12508,8072,1589
4,NEVADA,115675.0,8337.0,12475,131,340.0,1996,5870,8936,805,57,1639,338,1472,1460,797,11594,7612,1250


In [84]:
nh_crime_pd = frame.loc[frame["State"] == "NEW HAMPSHIRE", :]
nh_crime_pd_reindexed = nh_crime_pd.reset_index(drop=True)
nh_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEW HAMPSHIRE,40699.0,860.0,4152,4,42.0,159,655,3522,75,39,689,35,1122,77,142,3631,3616,1116
1,NEW HAMPSHIRE,45492.0,879.0,4602,8,58.0,208,605,3879,93,51,793,39,1273,74,132,3941,4109,1358
2,NEW HAMPSHIRE,44554.0,871.0,5222,8,52.0,241,570,4637,85,30,899,47,1242,96,164,4270,3700,1191
3,NEW HAMPSHIRE,46180.0,864.0,4918,1,54.0,205,604,4390,83,34,873,95,1158,108,143,6224,4528,1046
4,NEW HAMPSHIRE,44368.0,772.0,4355,9,57.0,169,537,3863,96,18,822,124,927,138,123,7371,4746,943


In [85]:
nj_crime_pd = frame.loc[frame["State"] == "NEW JERSEY", :]
nj_crime_pd_reindexed = nj_crime_pd.reset_index(drop=True)
nj_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEW JERSEY,333136.0,11947.0,33726,207,279.0,3561,7900,26191,688,263,3323,211,5227,4081,1298,46377,26206,17791
1,NEW JERSEY,328432.0,11341.0,32995,220,316.0,3528,7277,25560,674,235,3106,204,4638,3957,1260,51111,25697,17853
2,NEW JERSEY,302955.0,10871.0,31093,232,289.0,3557,6793,24643,666,218,2956,271,3980,3969,1180,50775,23765,13653
3,NEW JERSEY,303639.0,10377.0,30216,247,315.0,3327,6488,24468,537,213,4891,321,3792,3832,1129,52721,23031,14943
4,NEW JERSEY,264250.0,8697.0,26181,154,338.0,2650,5555,21342,617,153,3498,283,3530,3268,902,46376,22201,11395


In [86]:
nm_crime_pd = frame.loc[frame["State"] == "NEW MEXICO", :]
nm_crime_pd_reindexed = nm_crime_pd.reset_index(drop=True)
nm_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEW MEXICO,108731.0,4386.0,13257,83,121.0,380,3802,10800,408,214,573,230,1277,692,220,8859,11460,3502
1,NEW MEXICO,98410.0,4016.0,12560,83,122.0,337,3474,10282,307,254,468,194,1132,609,153,8044,10126,3352
2,NEW MEXICO,65526.0,3149.0,10782,71,93.0,226,2759,8740,217,643,366,165,891,402,119,4773,6352,1762
3,NEW MEXICO,81447.0,3813.0,10166,73,116.0,230,3394,8461,287,43,553,166,906,408,105,5529,7384,2251
4,NEW MEXICO,87310.0,3984.0,10496,70,107.0,365,3442,8758,365,328,524,158,1137,529,117,7188,8542,2543


In [87]:
ny_crime_pd = frame.loc[frame["State"] == "NEW YORK", :]
ny_crime_pd_reindexed = ny_crime_pd.reset_index(drop=True)
ny_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NEW YORK,303435.0,11306.0,53185,176,402.0,2851,7877,43965,1607,237,5745,116,14771,3065,3144,56508,35541,11322
1,NEW YORK,338463.0,14318.0,62766,206,455.0,3720,9937,51088,2124,374,6062,97,17728,4011,3419,65840,33053,11942
2,NEW YORK,300442.0,12902.0,59628,254,354.0,3605,8689,49635,1794,327,5444,90,15131,3248,2191,61633,29977,9750
3,NEW YORK,264268.0,11907.0,51764,210,422.0,3415,7860,43326,1627,281,4874,46,13593,3218,1938,58782,26587,7596
4,NEW YORK,271540.0,13418.0,51569,262,1121.0,3574,8461,43300,1770,276,4760,72,14382,3436,1893,62035,28988,6471


In [88]:
nc_crime_pd = frame.loc[frame["State"] == "NORTH CAROLINA", :]
nc_crime_pd_reindexed = nc_crime_pd.reset_index(drop=True)
nc_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NORTH CAROLINA,494561.0,19735.0,71541,573,560.0,3900,14702,52358,1430,422,18242,1472,8696,7372,1686,42225,53700,16003
1,NORTH CAROLINA,435932.0,18825.0,66248,560,530.0,3774,13961,49443,1217,370,14901,1352,7621,6618,1636,38240,46855,12450
2,NORTH CAROLINA,395015.0,17148.0,62371,567,411.0,3876,12294,46754,1137,317,13751,1291,6579,6120,1394,35470,41247,10608
3,NORTH CAROLINA,369728.0,16763.0,60597,494,401.0,3330,12538,46549,1099,275,12783,1277,6624,6398,1216,34695,39743,8729
4,NORTH CAROLINA,278625.0,13275.0,46827,430,294.0,3002,9549,36598,867,218,8564,1130,5022,4981,968,21487,35967,6878


In [89]:
nd_crime_pd = frame.loc[frame["State"] == "NORTH DAKOTA", :]
nd_crime_pd_reindexed = nd_crime_pd.reset_index(drop=True)
nd_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,NORTH DAKOTA,26012.0,495.0,2936,8,30.0,22,435,2602,109,12,413,41,466,131,66,2243,4836,1698
1,NORTH DAKOTA,29084.0,528.0,3119,7,40.0,46,435,2736,145,19,423,43,504,213,84,2446,5730,1992
2,NORTH DAKOTA,30642.0,585.0,2947,8,32.0,44,501,2556,157,16,377,36,453,333,77,3345,7077,1848
3,NORTH DAKOTA,30653.0,616.0,3132,16,54.0,57,489,2703,190,15,374,61,435,356,69,4004,6664,1640
4,NORTH DAKOTA,32565.0,682.0,3420,19,41.0,44,578,2908,211,19,299,37,454,406,114,4475,6351,1927


In [90]:
ohi_crime_pd = frame.loc[frame["State"] == "OHIO", :]
ohi_crime_pd_reindexed = ohi_crime_pd.reset_index(drop=True)
ohi_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,OHIO,256625.0,7642.0,37720,235,474.0,2795,4138,30043,901,234,2550,19,4534,3217,734,28943,36528,17659
1,OHIO,227243.0,6236.0,35439,181,376.0,2020,3659,29238,550,158,2162,11,3969,2518,659,26936,35629,14904
2,OHIO,224248.0,6589.0,35489,134,368.0,2191,3896,29585,705,227,2376,39,3684,2781,680,28612,33808,13498
3,OHIO,244134.0,6681.0,37913,212,476.0,2084,3909,32230,751,196,2460,48,3838,3204,665,36331,34626,15787
4,OHIO,226325.0,5361.0,34444,136,361.0,1656,3208,29967,654,153,2157,21,3289,2893,595,32827,34254,14714


In [91]:
ok_crime_pd = frame.loc[frame["State"] == "OKLAHOMA", :]
ok_crime_pd_reindexed = ok_crime_pd.reset_index(drop=True)
ok_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,OKLAHOMA,134295.0,5155.0,17945,144,231.0,669,4111,14507,513,182,1897,437,1240,1764,538,15931,14563,2712
1,OKLAHOMA,127825.0,4760.0,17664,133,228.0,591,3808,14444,495,193,1711,408,1302,1984,543,16688,13801,2571
2,OKLAHOMA,125534.0,4908.0,18620,156,207.0,658,3887,15517,517,225,1666,436,1241,2148,498,16946,13244,2419
3,OKLAHOMA,123408.0,4673.0,19023,129,236.0,717,3591,15985,576,161,1602,475,1153,1988,466,17721,12741,2062
4,OKLAHOMA,110974.0,4770.0,18127,179,230.0,700,3661,15364,555,143,1642,487,1280,2139,427,17771,11101,2264


In [92]:
or_crime_pd = frame.loc[frame["State"] == "OREGON", :]
or_crime_pd_reindexed = or_crime_pd.reset_index(drop=True)
or_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,OREGON,133414.0,4289.0,24655,68,229.0,933,3059,20634,1219,261,1625,33,4655,1744,1145,19262,14966,10202
1,OREGON,127754.0,4205.0,25279,83,221.0,1110,2791,21158,1404,229,1700,36,4049,1712,1144,18929,13592,10185
2,OREGON,54323.0,1648.0,9446,29,53.0,288,1278,7816,567,103,901,19,2080,847,338,10009,6699,3282
3,OREGON,71804.0,2374.0,14717,43,132.0,535,1664,12482,885,109,1411,24,2382,1060,531,11165,6020,5445
4,OREGON,76969.0,2310.0,13709,35,117.0,377,1781,11448,875,150,1227,30,2525,1181,428,11152,9019,4849


In [93]:
pen_crime_pd = frame.loc[frame["State"] == "PENNSYLVANIA", :]
pen_crime_pd_reindexed = pen_crime_pd.reset_index(drop=True)
pen_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,PENNSYLVANIA,418838.0,23179.0,58946,445,953.0,6470,15311,46076,2593,547,6685,442,9509,4222,2452,52483,48519,48699
1,PENNSYLVANIA,433863.0,23412.0,62665,548,947.0,6432,15485,49597,2543,566,6866,440,9279,4154,2584,55610,49103,48505
2,PENNSYLVANIA,413486.0,22113.0,61049,460,1152.0,5873,14628,48997,2328,444,6765,441,8089,4153,2304,54452,45744,44251
3,PENNSYLVANIA,415346.0,21135.0,61383,402,1095.0,5590,14048,50564,2323,473,7499,455,7676,4739,2230,57392,47019,40711
4,PENNSYLVANIA,377078.0,18944.0,55827,466,1052.0,5043,12383,46249,2266,453,8003,424,7209,4262,2251,52281,44615,37017


In [94]:
ri_crime_pd = frame.loc[frame["State"] == "RHODE ISLAND", :]
ri_crime_pd_reindexed = ri_crime_pd.reset_index(drop=True)
ri_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,RHODE ISLAND,29852.0,702.0,3368,6,56.0,141,499,2439,111,45,542,102,1108,281,87,3111,2508,2880
1,RHODE ISLAND,33191.0,901.0,3638,10,69.0,172,650,2757,123,30,549,115,1106,404,104,3665,2548,2990
2,RHODE ISLAND,30598.0,848.0,3590,10,72.0,188,578,2707,132,44,497,101,953,342,114,2240,2576,2767
3,RHODE ISLAND,30447.0,782.0,3357,14,61.0,123,584,2546,118,49,510,120,934,353,94,1913,2721,2783
4,RHODE ISLAND,27171.0,882.0,2746,17,93.0,162,610,2112,117,29,436,92,986,394,87,1757,2591,2417


In [95]:
sc_crime_pd = frame.loc[frame["State"] == "SOUTH CAROLINA", :]
sc_crime_pd_reindexed = sc_crime_pd.reset_index(drop=True)
sc_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,SOUTH CAROLINA,169884.0,7870.0,26954,273,322.0,1309,5966,21521,800,154,5124,399,3409,1931,491,26287,15674,13384
1,SOUTH CAROLINA,169384.0,7257.0,27199,261,320.0,1320,5356,21598,776,160,4477,319,2731,1966,543,27174,17906,13064
2,SOUTH CAROLINA,181016.0,7607.0,30727,295,347.0,1320,5645,25399,726,142,4692,364,3018,2033,453,28838,18919,14052
3,SOUTH CAROLINA,157253.0,6693.0,26158,224,312.0,1211,4946,21845,701,123,3835,391,2679,1781,369,26720,11226,11374
4,SOUTH CAROLINA,151136.0,6061.0,24120,248,295.0,1009,4509,20228,721,105,3079,363,2678,1752,298,25666,16272,9044


In [96]:
sd_crime_pd = frame.loc[frame["State"] == "SOUTH DAKOTA", :]
sd_crime_pd_reindexed = sd_crime_pd.reset_index(drop=True)
sd_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,SOUTH DAKOTA,29023.0,577.0,3544,14,46.0,35,482,3025,128,30,562,29,600,128,73,3694,5269,1984
1,SOUTH DAKOTA,35365.0,769.0,3865,10,67.0,49,643,3294,152,30,674,52,580,164,71,4372,5816,2184
2,SOUTH DAKOTA,35061.0,780.0,3301,12,33.0,35,700,2763,161,20,649,29,517,179,69,5382,6328,2038
3,SOUTH DAKOTA,35375.0,866.0,3565,9,51.0,59,747,3099,130,27,627,35,452,145,49,5406,5990,2285
4,SOUTH DAKOTA,48048.0,1190.0,3638,17,52.0,67,1054,3118,210,27,725,34,516,210,64,7058,7305,2895


In [97]:
ten_crime_pd = frame.loc[frame["State"] == "TENNESSEE", :]
ten_crime_pd_reindexed = ten_crime_pd.reset_index(drop=True)
ten_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,TENNESSEE,386758.0,16546.0,45236,316,429.0,2331,13470,34706,2131,286,9055,745,4706,3532,913,40911,25559,11877
1,TENNESSEE,356340.0,16800.0,44154,261,378.0,2171,13990,34996,2087,219,7908,782,4414,3568,842,43448,26858,10847
2,TENNESSEE,371938.0,17896.0,45903,361,353.0,2189,14993,37063,1927,198,7165,727,4399,3351,799,46923,26276,9598
3,TENNESSEE,358850.0,17122.0,44517,309,352.0,2068,14393,36210,2124,236,6666,763,4626,2971,620,41493,26657,8442
4,TENNESSEE,335281.0,16399.0,42241,311,340.0,1906,13842,34613,2213,129,6065,801,4843,2472,654,38486,23150,7184


In [98]:
tex_crime_pd = frame.loc[frame["State"] == "TEXAS", :]
tex_crime_pd_reindexed = tex_crime_pd.reset_index(drop=True)
tex_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,TEXAS,1026398.0,30462.0,128461,640,1708.0,6495,21619,106018,3883,600,11801,490,11431,9637,4061,126695,85715,32867
1,TEXAS,1009136.0,30245.0,129856,708,1724.0,6365,21448,108304,3985,538,10205,429,10796,10222,4087,133110,85436,29385
2,TEXAS,931814.0,29631.0,128617,648,1572.0,6642,20769,108288,4167,491,9102,439,9678,10042,3799,133711,78352,20602
3,TEXAS,849887.0,29135.0,122822,710,1912.0,5995,20518,103547,4494,508,7817,501,8989,9246,3547,135683,70482,12141
4,TEXAS,808119.0,31493.0,117371,749,2095.0,6956,21693,98494,5528,454,6818,659,8480,10861,3244,131851,64971,9905


In [99]:
ut_crime_pd = frame.loc[frame["State"] == "UTAH", :]
ut_crime_pd_reindexed = ut_crime_pd.reset_index(drop=True)
ut_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,UTAH,47321.0,703.0,8138,13,117.0,88,485,7237,156,23,473,9,1486,493,471,5207,3184,1669
1,UTAH,117185.0,2045.0,19502,32,154.0,373,1486,17697,408,82,817,26,3741,1291,871,12710,6061,3999
2,UTAH,131389.0,2064.0,18253,33,164.0,371,1496,16721,365,64,977,26,3161,1114,850,13377,9038,4361
3,UTAH,118405.0,2206.0,19420,43,235.0,440,1488,17828,410,84,1026,31,2979,1080,808,14630,8063,3615
4,UTAH,115453.0,2418.0,19416,29,244.0,439,1706,17745,450,68,1051,34,2857,1137,740,14913,8813,3459


In [100]:
ver_crime_pd = frame.loc[frame["State"] == "VERMONT", :]
ver_crime_pd_reindexed = ver_crime_pd.reset_index(drop=True)
ver_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,VERMONT,10907.0,421.0,1487,4,36.0,9,372,1124,95,10,231,47,346,15,25,1041,2264,592
1,VERMONT,13073.0,519.0,1824,8,61.0,21,429,1380,88,26,266,48,364,20,32,1390,2517,679
2,VERMONT,13418.0,510.0,1989,6,59.0,31,414,1614,60,29,271,50,328,22,37,1250,2476,743
3,VERMONT,11381.0,473.0,1473,11,41.0,25,396,1220,30,14,205,46,261,18,33,661,2189,684
4,VERMONT,11183.0,472.0,1402,7,40.0,35,390,1176,29,8,184,60,275,17,25,610,2144,703


In [101]:
vir_crime_pd = frame.loc[frame["State"] == "VIRGINIA", :]
vir_crime_pd_reindexed = vir_crime_pd.reset_index(drop=True)
vir_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,VIRGINIA,347903.0,6721.0,38670,269,336.0,1982,4134,32650,1008,279,6731,1621,4852,3749,1083,35416,28950,5345
1,VIRGINIA,339362.0,6765.0,37598,271,312.0,1699,4483,31879,835,310,6460,1522,4504,3556,1079,38038,28908,5141
2,VIRGINIA,321040.0,6858.0,37223,331,326.0,1754,4447,32079,867,203,6151,1520,4255,3501,1031,39536,25813,4536
3,VIRGINIA,300102.0,6923.0,35123,278,471.0,1591,4583,30626,803,204,5718,1609,3929,3197,833,36988,23611,4136
4,VIRGINIA,268167.0,6247.0,31971,289,448.0,1392,4118,28164,810,164,5079,1371,3746,3160,674,33558,20477,3106


In [102]:
wash_crime_pd = frame.loc[frame["State"] == "WASHINGTON", :]
wash_crime_pd_reindexed = wash_crime_pd.reset_index(drop=True)
wash_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,WASHINGTON,129201.0,5295.0,24813,99,454.0,1139,3603,20072,799,177,731,53,4270,1549,616,12570,11101,3548
1,WASHINGTON,172362.0,7200.0,31512,122,437.0,1704,4937,25737,970,201,846,56,4773,1772,615,12706,30501,3191
2,WASHINGTON,189806.0,7699.0,36129,121,436.0,1868,5274,29491,1232,175,931,78,5645,1855,602,11346,29153,3533
3,WASHINGTON,183687.0,7707.0,35757,137,479.0,1851,5240,29320,1248,207,991,68,5132,1739,661,11111,26952,3260
4,WASHINGTON,186866.0,7822.0,33708,155,488.0,1831,5348,27244,1376,188,930,56,5223,1771,606,11026,24627,3009


In [103]:
wv_crime_pd = frame.loc[frame["State"] == "WEST VIRGINIA", :]
wv_crime_pd_reindexed = wv_crime_pd.reset_index(drop=True)
wv_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,WEST VIRGINIA,50204.0,1947.0,6537,46,61.0,236,1604,5239,250,41,785,138,923,391,152,6798,5356,1033
1,WEST VIRGINIA,46364.0,1750.0,6465,43,61.0,201,1445,5251,229,45,711,130,854,354,150,6647,5654,1081
2,WEST VIRGINIA,49225.0,1834.0,6900,29,58.0,171,1576,5816,174,32,704,145,721,354,154,6651,5960,885
3,WEST VIRGINIA,43394.0,1870.0,7115,42,86.0,154,1588,6189,196,25,571,150,649,314,126,5994,4723,634
4,WEST VIRGINIA,40568.0,1743.0,6480,35,77.0,144,1487,5563,197,35,583,138,746,378,116,6187,4543,654


In [104]:
wis_crime_pd = frame.loc[frame["State"] == "WISCONSIN", :]
wis_crime_pd_reindexed = wis_crime_pd.reset_index(drop=True)
wis_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,WISCONSIN,317773.0,7448.0,35805,127,693.0,1775,4853,30489,1035,155,3684,168,8302,3799,2510,24140,28798,48195
1,WISCONSIN,324207.0,8481.0,38666,151,559.0,1639,6132,33269,1091,170,3773,198,8620,3715,2779,26073,29126,45856
2,WISCONSIN,305446.0,8002.0,36871,146,619.0,1687,5550,31782,1064,209,3789,259,7115,3708,2371,25582,26562,43614
3,WISCONSIN,281399.0,7487.0,36709,171,677.0,1580,5059,32264,1138,144,3022,258,7073,3955,2239,24854,24330,39032
4,WISCONSIN,265628.0,7522.0,31519,180,690.0,1562,5090,27432,1163,135,2890,283,6476,4022,2067,25302,24588,36191


In [105]:
wy_crime_pd = frame.loc[frame["State"] == "WYOMING", :]
wy_crime_pd_reindexed = wy_crime_pd.reset_index(drop=True)
wy_crime_pd_reindexed

Unnamed: 0,State,Total Crime,Violent Crime,Property Crime,Murder,Rape,Robbery,Assault,Theft,Vehicle Theft,Arson,Fraud,Embezzlement,Vandalism,Weapons Carrying,Sex Offenses,Drug Violations,DUI,Disorderly Conduct
0,WYOMING,34041.0,560.0,2735,8,31.0,28,493,2369,94,31,196,3,662,118,151,3205,4970,1257
1,WYOMING,30404.0,458.0,2509,13,27.0,29,389,2121,86,36,183,3,605,92,120,3036,4147,1004
2,WYOMING,31636.0,529.0,3052,13,37.0,24,455,2684,107,30,200,8,560,91,139,3527,3893,1026
3,WYOMING,28872.0,453.0,2770,19,39.0,18,377,2418,84,43,168,12,554,90,128,3459,3742,921
4,WYOMING,27058.0,553.0,2541,10,35.0,34,474,2157,95,61,156,10,490,76,130,3695,3157,961


In [106]:
total_crime = frame.groupby("State")["Total Crime"].mean()
total_crime_sorted = total_crime.sort_values(ascending=True)
total_crime_sorted

State
DISTRICT OF COLUMBIA       6951.50
VERMONT                   11992.40
HAWAII                    16363.75
MONTANA                   29520.80
ALABAMA                   29680.60
NORTH DAKOTA              29791.20
RHODE ISLAND              30251.80
WYOMING                   30402.20
ALASKA                    31240.40
SOUTH DAKOTA              36574.40
DELAWARE                  37260.00
NEW HAMPSHIRE             44258.60
WEST VIRGINIA             45951.00
MAINE                     49047.40
IDAHO                     60000.40
NEBRASKA                  69834.60
KANSAS                    73041.00
MISSISSIPPI               84282.60
NEW MEXICO                88284.80
OREGON                    92852.80
IOWA                      99893.80
CONNECTICUT              103667.60
UTAH                     105950.60
ILLINOIS                 115690.00
OKLAHOMA                 124407.20
LOUISIANA                124880.80
ARKANSAS                 126246.00
NEVADA                   128962.80
MASSACHUSETTS 