In [None]:
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
from census import Census
# from us import states
import censusdata
from config import census_key

In [None]:
# Create an instance of the Census library
c = Census(
    census_key    
)

In [None]:
# Geographical codes for states to access census data
states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)
states
# Geographical codes for Counties in Michigan to access census data
counties = censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '*')]), 'acs5', 2015)
# print(counties)

In [None]:
print(censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '081')]), 'acs5', 2015))
print(censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '121')]), 'acs5', 2015))
print(censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '125')]), 'acs5', 2015))
print(censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '145')]), 'acs5', 2015))

In [None]:
# MEDIAN HOUSEHOLD INCOME FOR KENT, OAKLAND, MUSKEGON, SAGINAW COUNTIES

# Median household income variable from CensusAPI
vars = ["B19013_001E"]

# Median Household income Kent county
median_kent = []

for y in range(2012,2021+1):
    # '26' is Michigan, '081' is kent county
    kent = c.acs5.state_county(vars, "26", "081",year=y)
    kent_med_inc= pd.DataFrame(kent)
    kent_med_inc['year'] = y
    median_kent.append(kent_med_inc)

median_income_kent  = pd.concat(median_kent,axis=0)
median_income_kent = median_income_kent.rename(columns={"B19013_001E":"Median Household Income_K",
                                                        
                                                        "county":"Kent county"})
# print(median_income_kent)

# Median Household income Oakland county
median_oak=[]
for y in range(2012,2021+1):
    # '26' is Michigan, '125' is oakland county
    oak = c.acs5.state_county(vars, "26", "125",year=y)
    oak_med_inc= pd.DataFrame(oak)
    oak_med_inc['year'] = y
    median_oak.append(oak_med_inc)

median_income_oak  = pd.concat(median_oak,axis=0)
median_income_oak = median_income_oak.rename(columns={"B19013_001E":"Median Household Income_O",
                                
                                                      "county":"Oakland county"})
# print(median_income_oak)


median_household_income1 = pd.merge(median_income_kent,median_income_oak,on=['state','year'])
median_household_income1

# # Median household income for Saginaw county
median_sag = []

for y in range(2012,2021+1):
    # '26' is Michigan, '145' is saginaw county
    sag = c.acs5.state_county(vars, '26', "145",year=y)
    sag_med_inc= pd.DataFrame(sag)
    sag_med_inc['year'] = y
    median_sag.append(sag_med_inc)

median_income_saginaw  = pd.concat(median_sag,axis=0)
median_income_saginaw = median_income_saginaw.rename(columns={"B19013_001E":"Median Household Income_S","county":"Saginaw county"
                                                             })
                                                             
# print(median_income_saginaw)

# # Median Household income Muskegon
median_musk = []
for y in range(2012,2021+1):
    # '26' is Michigan, '121' is muskegon county
    musk = c.acs5.state_county(vars, '26', "121",year=y)
    musk_med_inc= pd.DataFrame(musk)
    musk_med_inc['year'] = y
    median_musk.append(musk_med_inc)

median_income_muskegon  = pd.concat(median_musk,axis=0)
median_income_muskegon = median_income_muskegon.rename(columns={"B19013_001E":"Median Household Income_M","county":"Muskegon county"
                                                               })

median_household_income2 = pd.merge(median_income_saginaw,median_income_muskegon,on=['state','year'])
median_household_income2

# Merging to a single DataFrame
median_household_income = pd.merge(median_household_income1,median_household_income2,on=['state','year'])
median_household_income = median_household_income[["state","year","Kent county","Median Household Income_K","Oakland county",
                                                  "Median Household Income_O","Saginaw county","Median Household Income_S","Muskegon county","Median Household Income_M"]]
median_household_income=median_household_income.sort_values(by=['year'],ascending=False,ignore_index=True)
median_household_income

In [None]:
# TOTAL POPULATION 
# Total Population variable from CensusAPI
vars = ["B01003_001E"]

# Total population in Saginaw county
total_saginaw = []

for y in range(2012,2021+1):
    # '26' is Michigan, '145' is saginaw county
    saginaw_p = c.acs5.state_county(vars, '26', "145",year=y)
    saginaw_pop= pd.DataFrame(saginaw_p)
    saginaw_pop['year'] = y
    total_saginaw.append(saginaw_pop)

total_population_saginaw  = pd.concat(total_saginaw,axis=0)
total_population_saginaw= total_population_saginaw.rename(columns={"B01003_001E":"Total Population_S",
                                                                   "county":"Saginaw county"})
# print(total_population_saginaw)

# Total population Muskegon
total_muskegon = []

for y in range(2012,2021+1):
    # '26' is Michigan, '121' is muskegon county
    muskegon_p = c.acs5.state_county(vars, '26', "121",year=y)
    muskegon_pop= pd.DataFrame(muskegon_p)
    muskegon_pop['year'] = y
    total_muskegon.append(muskegon_pop)

total_population_muskegon  = pd.concat(total_muskegon,axis=0)
total_population_muskegon= total_population_muskegon.rename(columns={"B01003_001E":"Total Population_M",
                                                                      "county":"Muskegon county"})
                                                                     
# print(total_population_muskegon)

total_population1 = pd.merge(total_population_saginaw,total_population_muskegon,on=['state','year'])
# total_population1

# Total population for Kent county

total_kent=[]

for y in range(2012,2021+1):
    kent_p = c.acs5.state_county(vars, '26', "081",year=y)
    kent_pop= pd.DataFrame(kent_p)
    kent_pop['year'] = y
    total_kent.append(kent_pop)

total_population_kent  = pd.concat(total_kent,axis=0)
total_population_kent= total_population_kent.rename(columns={"B01003_001E":"Total Population_K",
                                                              "county":"Kent county"})
                                                                
# print(total_population_kent)

# Total population for Oakland county

total_oak=[]

for y in range(2012,2021+1):
    oak_p = c.acs5.state_county(vars, '26', "125",year=y)
    oak_pop= pd.DataFrame(oak_p)
    oak_pop['year'] = y
    total_oak.append(oak_pop)

total_population_oak  = pd.concat(total_oak,axis=0)
total_population_oak= total_population_oak.rename(columns={"B01003_001E":"Total Population_O",
                                                            "county":"Oakland county"})
                                                                
# print(total_population_oak)
total_population2 = pd.merge(total_population_kent,total_population_oak,on=['state','year'])
# Merging into a single DataFrame
total_population = pd.merge(total_population1,total_population2,on=['state','year'])
total_population=total_population[["state","year","Saginaw county","Total Population_S","Muskegon county","Total Population_M",
                                   "Kent county","Total Population_K","Oakland county","Total Population_O"]]
total_population=total_population.sort_values(by=['year'],ascending=False,ignore_index=True)
total_population

In [None]:
#Vital Statistics DF set-up (4 files)
# Infant Mortality Rate
# Creating Paths
infant_kentcsv = "datasets/KentVitalStatistics.csv"
infant_oaklandcsv = "datasets/OaklandVitalStatistics.csv"
infant_muskegoncsv = "datasets/MuskegonVitalStatistics.csv"
infant_saginawcsv = "datasets/SaginawVitalStatistics.csv"

# Read with Pandas
infant_kent_df = pd.read_csv(infant_kentcsv)
infant_oakland_df = pd.read_csv(infant_oaklandcsv)
infant_muskegon_df = pd.read_csv(infant_muskegoncsv)
infant_saginaw_df = pd.read_csv(infant_saginawcsv)
# infant_kent_df.head()
# infant_muskegon_df.head()
# infant_oakland_df.head()
# infant_saginaw_df.head()

# Merging DataFrame
combined_df = pd.merge(infant_kent_df ,infant_muskegon_df,
                                  how='left', on=['State','Year'])
  
combined_df = combined_df.rename(columns={"County_x":"County1",
                                          "Infant Deaths_x":"Infant_Deaths_Kent",
                                          "Live Births_x":"Live_Births_Kent",
                                          "County_y":"County2",
                                          "Infant Deaths_y":"Infant_Deaths_Muskegon",
                                          "Live Births_y":"Live_Births_Muskegon",
                                                        })
combined_df 
combined_df = combined_df[["Year","State","County1","Infant_Deaths_Kent","Live_Births_Kent",
                             "County2","Infant_Deaths_Muskegon","Live_Births_Muskegon"]]
combined_df 

combined_df1 = pd.merge(infant_oakland_df ,infant_saginaw_df ,
                                    how='left', on=['State','Year'])
combined_df1 
combined_df1 = combined_df1.rename(columns={"County_x":"County3",
                                          "Infant Deaths_x":"Infant_Deaths_Oakland",
                                          "Live Births_x":"Live_Births_Oakland",
                                          "County_y":"County4",
                                          "Infant Deaths_y":"Infant_Deaths_Saginaw",
                                          "Live Births_y":"Live_Births_Saginaw",
                                                        })
combined_df1
infant_mortality=pd.merge(combined_df,combined_df1,how='left', on=['State','Year'])
infant_mortality


In [None]:
# Calculating Infant mortality rate
infant_mortality["Kent_mortalityrate"]= np.round(((infant_mortality["Infant_Deaths_Kent"]/infant_mortality["Live_Births_Kent"])*1000),decimals=2)
infant_mortality["Muskegon_mortalityrate"]= np.round(((infant_mortality["Infant_Deaths_Muskegon"]/infant_mortality["Live_Births_Muskegon"])*1000),decimals=2)
infant_mortality["Oakland_mortalityrate"]= np.round(((infant_mortality["Infant_Deaths_Oakland"]/infant_mortality["Live_Births_Oakland"])*1000),decimals=2)
infant_mortality["Saginaw_mortalityrate"]= np.round(((infant_mortality["Infant_Deaths_Saginaw"]/infant_mortality["Live_Births_Saginaw"])*1000),decimals=2)

# Creating New dataframe
infant_mortality_df = infant_mortality[["Year","County1","Kent_mortalityrate","County2","Muskegon_mortalityrate",
                                        "County3","Oakland_mortalityrate","County4","Saginaw_mortalityrate"]]
infant_mortality_df

In [None]:
# plotting the time series of infant mortality rate in Kent county 
plt.plot(infant_mortality_df["Year"], infant_mortality_df["Kent_mortalityrate"],color='red')
 
# plotting the time series of infant mortality rate in Muskegon county
plt.plot(infant_mortality_df["Year"], infant_mortality_df["Muskegon_mortalityrate"], color='green')

# plotting the time series of infant mortality rate in Oakland county
plt.plot(infant_mortality_df["Year"], infant_mortality_df["Oakland_mortalityrate"], color='blue')

# plotting the time series of infant mortality rate in Saginaw county
plt.plot(infant_mortality_df["Year"], infant_mortality_df["Saginaw_mortalityrate"], color='orange')
 
# Giving title to the graph
plt.title('Infant Mortality rate')
 
# rotating the x-axis tick labels at 30degree 
# towards right
plt.xticks(rotation=30, ha='right')
 
# Giving x and y label to the graph
plt.xlabel('Year')
plt.ylabel('Mortality rate')
plt.legend(['Kent', 'Muskegon', 'Oakland', 'Saginaw'], loc='upper left', title='Legend Title')
plt.show()

In [None]:
# POVERTY RATE 
pov_kentcsv = "Datasets/Poverty_rate/Kent_poverty_rate.csv"
pov_oaklandcsv = "Datasets/Poverty_rate/Oakland_poverty_rate.csv"
pov_muskegoncsv = "Datasets/Poverty_rate/Muskegon_poverty_rate.csv"
pov_saginawcsv = "Datasets/Poverty_rate/Saginaw_poverty_rate.csv"

# # Read with Pandas
pov_kent_df = pd.read_csv(pov_kentcsv)
pov_oakland_df = pd.read_csv(pov_oaklandcsv)
pov_muskegon_df = pd.read_csv(pov_muskegoncsv)
pov_saginaw_df = pd.read_csv(pov_saginawcsv)
# pov_kent_df.head()
# pov_muskegon_df.head()
# pov_oakland_df.head()
# pov_saginaw_df.head()

# Merging DataFrame
combined_df = pd.merge(pov_kent_df ,pov_muskegon_df,
                        how='left', on=['State','Year'])
  
combined_df = combined_df.rename(columns={"ID_x":"Kent ID",
                                          "Name_x":"Kent County",
                                          "Poverty Universe_x":"Poverty Universe_K",
                                          "Number in Poverty_x":"Number in Poverty_K",
                                          "Percent in Poverty_x":"Poverty Percent_Kent",
                                          "ID_y":"Muskegon ID",
                                          "Name_y":"Muskegon County",
                                          "Poverty Universe_y":"Poverty Universe_M",
                                          "Number in Poverty_y":"Number in Poverty_M",
                                          "Percent in Poverty_y":"Poverty Percent_Muskegon",
                                                        })
# combined_df 
combined_df22 = combined_df[["State","Year","Kent County","Poverty Percent_Kent",
                             "Muskegon County","Poverty Percent_Muskegon"]]
# # # combined_df22 

combined_df1 = pd.merge(pov_oakland_df,pov_saginaw_df,
                         how='left', on=['State','Year'])
# combined_df1
combined_df1 = combined_df1.rename(columns={"ID_x":"Oakland ID",
                                          "Name_x":"Oakland County",
                                          "Poverty Universe_x":"Poverty Universe_O",
                                          "Number in Poverty_x":"Number in Poverty_O",
                                          "Percent in Poverty_x":"Poverty Percent_Oakland",
                                          "ID_y":"Saginaw ID",
                                          "Name_y":"Saginaw County",
                                          "Poverty Universe_y":"Poverty Universe_S",
                                          "Number in Poverty_y":"Number in Poverty_S",
                                          "Percent in Poverty_y":"Poverty Percent_Saginaw",
                                                        })
# combined_df1 
combined_df11 = combined_df1[["Year","State","Oakland County","Poverty Percent_Oakland",
                              "Saginaw County","Poverty Percent_Saginaw"]]
combined_df11
poverty_rate=pd.merge(combined_df22,combined_df11,how='left', on=['State','Year'])
# poverty_rate
poverty_rate1=poverty_rate.sort_values(by=['Year'],ascending=False,ignore_index=True)
poverty_rate1

In [None]:
# REGRESSION PLOT BETWEEN POVERTY AND INFANT MORTALITY
def linear_reg_plot(x_values,y_values,coordinates,titles):
   (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
   regress_values = x_values * slope + intercept
    
   line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
#    print(line_eq)

   # Plot scatter plot
   plt.scatter(x_values,y_values)
   plt.rcParams["figure.figsize"] = (4,4)
    
# Plot regression line
   plt.plot(x_values,regress_values,"r-")
  
   
# Label plot
   plt.xlabel('Poverty rate')
   plt.ylabel('Infant Mortality rate')
   plt.annotate(line_eq,coordinates,fontsize=15,color="red")
   plt.title(titles)

# Print r square value
   print(f"The r-value is : {rvalue}")
   print(f"R squared: {rvalue**2}")
   
# Show plot
   plt.show()
   
   
# Oakland county
x_values =poverty_rate1["Poverty Percent_Oakland"] 
y_values = infant_mortality_df["Oakland_mortalityrate"]

linear_reg_plot(x_values,y_values,(8.5,4.5),"Poverty vs Infant Mortality in Oakland County")


# Muskegon county
x_values =poverty_rate1["Poverty Percent_Muskegon"] 
y_values = infant_mortality_df["Muskegon_mortalityrate"]

linear_reg_plot(x_values,y_values,(16,6),"Poverty vs Infant Mortality in Muskegon County")



In [None]:
#Median Rent Over Time DF set-up (1 file)
# Creating Paths
rent_df = pd.read_csv("datasets/Median Rent Over Time (2011 - 2023).csv")
rent_df.head()

In [None]:
rent_df.columns

In [None]:
rent_df.rename(columns = {'Unnamed: 0':'Counties'}, inplace = True)
rent_df.head()

In [None]:
# Loop through each year column
for year in range(2011, 2024):
    # Get the column name for the current year
    year_column = str(year)
    
    # Remove the "$" symbol from the values in the year column
    rent_df[year_column] = rent_df[year_column].str.replace('$', '')
    rent_df[year_column] = rent_df[year_column].str.replace(',', '')

    # Convert the year column to numeric
    rent_df[year_column] = pd.to_numeric(rent_df[year_column])

rent_df.head()

In [None]:
kentcounty = rent_df.loc[(rent_df["Counties"])== "Kent County"]
kentcounty

In [None]:
for index, row in rent_df.iterrows():
    # Get the name from the 'Name' column
    county = row['Counties']
    
    # Get the rates for each year
    rent_rates = row['2011':'2023']
    
    # Plot the rates as a line graph
    plt.plot(rent_rates.index, rent_rates.values,marker= "o")

# Giving title to the graph
plt.title('Median Rent Rate')
 
# rotating the x-axis tick labels at 30degree 
# towards right
plt.xticks(rotation=30, ha='right')
 
# Giving x and y label to the graph
plt.xlabel('Year')
plt.ylabel('Rent Rate')
plt.ylim(500,1600)
plt.legend(['Kent', 'Muskegon', 'Oakland', 'Saginaw'], loc='upper left', title='Legend Title')
plt.show()

In [None]:
#Health Insurance Percentage DF set-up
#Text file was editted to solve errors and trimmed down to necessary counties + state average

insurance_rates_csv = "datasets/health_data.csv"

insurance_rates_df = pd.read_csv(insurance_rates_csv)
insurance_rates_df = insurance_rates_df.drop('Rank within US (of 3141 counties)', axis = 1)

insurance_rates_df.head()

In [None]:
insurance_rates_df.set_index('County', inplace = True)
# insurance_rates_df.head()
insurance_rates_df = insurance_rates_df.drop('Insured (People)', axis = 1)

In [None]:
insurance_figure = insurance_rates_df.plot(kind = "bar", facecolor = "blue", figsize = (6, 6),
                                          title = "Insurance Enrollment by County",
                                          xlabel = "Counties vs. State Average",
                                          ylabel = "Insurance Enrollment (%)")
xticklabels = insurance_rates_df.index
insurance_figure.set_xticklabels(xticklabels, rotation = 45, rotation_mode = "anchor", ha = "right", wrap = True)
plt.ylim(0, 100)
plt.show
insurance_rates_df.head()

In [None]:
#Broadband Availability Percentage DF set-up
#Text file was editted to solve errors and trimmed down to necessary counties + state average

broadband_rates_csv = "datasets/broadband_data.csv"

broadband_rates_df = pd.read_csv(broadband_rates_csv)
broadband_rates_df = broadband_rates_df.drop(broadband_rates_df.columns[[1]], axis=1)

broadband_rates_df.head()

In [None]:
broadband_rates_df.set_index('County', inplace = True)

In [None]:
broadband_figure = broadband_rates_df.plot(kind = "bar", facecolor = "blue", figsize = (6, 6),
                                          title = "Broadband Availability by County",
                                          xlabel = "Counties vs. State Average",
                                          ylabel = "Broadband Availability (%)")
xticklabels = broadband_rates_df.index
broadband_figure.set_xticklabels(xticklabels, rotation = 45, rotation_mode = "anchor", ha = "right", wrap = True)
plt.ylim(0, 100)
plt.show
broadband_rates_df.head()

In [None]:
#Broadband availability data analyzed

broadband_full_csv = "datasets/broadband_full.csv"
broadband_full_df = pd.read_csv(broadband_full_csv)

broadband_full_df = broadband_full_df.drop('FIPS', axis = 1)
broadband_full_df.head(10)


In [None]:
arr = np.array(broadband_full_df['Percent'])
bb_mean = np.mean(arr)
bb_median = np.median(arr)
bb_std = np.std(arr)
print("Population Average: 86%")
print(f"County Average: {bb_mean}%")
print(f"Median: {bb_median}%")
print(f"Standard Deviation: {bb_std}")


In [None]:
plt.boxplot(broadband_full_df['Percent'])
plt.ylim(70, 100)
plt.ylabel("Broadband Availability (%)")
plt.title("")
plt.show

In [None]:
#Assessing education levels across specified counties

bachelors_csv = "datasets/bachelors_data.csv"
bachelors_df = pd.read_csv(bachelors_csv)
bachelors_df = bachelors_df.drop('FIPS', axis = 1)
bachelors_df = bachelors_df.drop('Rank within US (of 3142)', axis = 1)
bachelors_df = bachelors_df.drop('Bachelors Degree Totals', axis = 1)
# bachelors_df.head()

In [None]:
bachelors_df.set_index('County', inplace = True)
bachelors_df.head()

In [None]:
bachelors_trimmed_df = bachelors_df.loc[["Michigan","Kent County","Oakland County","Muskegon County","Saginaw County"], :]

bachelors_figure = bachelors_trimmed_df.plot(kind = "bar", facecolor = "blue", figsize = (6, 6),
                                          title = "Bachelors Degrees by County (%)",
                                          xlabel = "Counties vs. State Average",
                                          ylabel = "Bachelors Degrees (%)")
xticklabels = bachelors_trimmed_df.index
bachelors_figure.set_xticklabels(xticklabels, rotation = 45, rotation_mode = "anchor", ha = "right", wrap = True)
plt.ylim(0, 60)
plt.show
bachelors_trimmed_df.head()

In [None]:
bach_mean = np.mean(bachelors_df['Value (Percent)'])
bach_std = np.mean(bachelors_df['Value (Percent)'])
bach_median = np.median(bachelors_df['Value (Percent)'])

plt.boxplot(bachelors_df['Value (Percent)'])
plt.ylim(0, 60)
plt.ylabel("Bachelors Degrees (%)")
plt.title("")
plt.show
print(f"Mean: {bach_mean}")
print(f"Median: {bach_median}")
print(f"Standard Deviation: {bach_std}")
