In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import patsy 
import math

pd.set_option('display.max_columns', 100)
%matplotlib inline

In [2]:
# Read the DF created from the feature_selection notebook
counties = pd.read_pickle('counties_df.pkl')

In [3]:
# Make the column names standardized for use in patsy formulas
counties.columns = [x.replace(' ', '_').replace('-', '_').replace('%', 'Percent').replace('.', '_').replace('(', '').replace(')', '') for x in list(counties.columns)]

In [4]:
from sklearn.feature_selection import SelectKBest, f_regression
X = counties.copy().drop(columns=['Drug_Overdose_Mortality_Rate'])
X = X.select_dtypes(exclude='object')
y = counties['Drug_Overdose_Mortality_Rate']
kbest = SelectKBest(f_regression, k=20).fit(X, y)

In [5]:
new_features = []
mask = kbest.get_support()

for boolean, feature in zip(mask, list(counties.columns.values)):
    if boolean:
        new_features.append(feature)

In [6]:
new_features

['State',
 'Age_Adjusted_Mortality',
 'Age_Adjusted_Mortality_Black',
 'Child_Mortality_Rate',
 'Child_Mortality_Rate_Black',
 'Child_Mortality_Rate_White',
 'Infant_Mortality_Rate',
 'Percent_Frequent_Physical_Distress',
 'Percent_Uninsured_1',
 'Segregation_index',
 'Percent_Not_Proficient_in_English',
 'State_ranked',
 'Years_of_Potential_Life_Lost_Rate',
 'Years_of_Potential_Life_Lost_Rate_Black',
 'Physically_Unhealthy_Days',
 'Mentally_Unhealthy_Days',
 'Percent_Smokers',
 'Teen_Birth_Rate_White',
 'Percent_Some_College',
 '20th_Percentile_Income']

In [13]:
#counties.loc[(counties['State'] == 'Ohio') & (counties['County'] == 'Clermont')]

In [8]:
# Try to find the 50th percentile for each category and split them at that point. 
# Then, find the mean drug overdose value for the lower 50% and for the upper 50% and compare them. 

In [57]:
# STATE 
counties_state = counties.sort_values('State')

end_point = len(counties_state)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_state_low = 0 
total_state_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_state_low = total_state_low + counties_state.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_state_high = total_state_high + counties_state.iloc[n, 16]
        n = n + 1 
mean_low = total_state_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of State: ', mean_low)
mean_high = total_state_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of State: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of State:  19.23537802000737
Mean Drug Overdose Mortality Rate for upper 50% of State:  20.52340131726678


In [24]:
# AGE ADJUSTED MORTALITY 
counties_age_adjusted_mortality = counties.sort_values('Age_Adjusted_Mortality')

end_point = len(counties_age_adjusted_mortality)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_age_adjusted_mortality_low = 0 
total_age_adjusted_mortality_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_age_adjusted_mortality_low = total_age_adjusted_mortality_low + counties_age_adjusted_mortality.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_age_adjusted_mortality_high = total_age_adjusted_mortality_high + counties_age_adjusted_mortality.iloc[n, 16]
        n = n + 1 
mean_low = total_age_adjusted_mortality_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Age Adjusted Mortality: ', mean_low)
mean_high = total_age_adjusted_mortality_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Age Adjusted Mortality: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Age Adjusted Mortality:  16.43768041025713
Mean Drug Overdose Mortality Rate for upper 50% of Age Adjusted Mortality:  23.321098927017037


In [25]:
# AGE ADJUSTED MORTALITY BLACK 
counties_age_adjusted_mortality_black = counties.sort_values('Age_Adjusted_Mortality_Black')

end_point = len(counties_age_adjusted_mortality_black)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_age_adjusted_mortality_black_low = 0 
total_age_adjusted_mortality_black_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_age_adjusted_mortality_black_low = total_age_adjusted_mortality_black_low + counties_age_adjusted_mortality_black.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_age_adjusted_mortality_black_high = total_age_adjusted_mortality_black_high + counties_age_adjusted_mortality_black.iloc[n, 16]
        n = n + 1 
mean_low = total_age_adjusted_mortality_black_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Age Adjusted Mortality Black: ', mean_low)
mean_high = total_age_adjusted_mortality_black_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Age Adjusted Mortality Black: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Age Adjusted Mortality Black:  17.148575330206747
Mean Drug Overdose Mortality Rate for upper 50% of Age Adjusted Mortality Black:  22.610204007067377


In [26]:
# CHILD MORTALITY RATE 
counties_child_mortality_rate = counties.sort_values('Child_Mortality_Rate')

end_point = len(counties_child_mortality_rate)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_child_mortality_rate_low = 0 
total_child_mortality_rate_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_child_mortality_rate_low = total_child_mortality_rate_low + counties_child_mortality_rate.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_child_mortality_rate_high = total_child_mortality_rate_high + counties_child_mortality_rate.iloc[n, 16]
        n = n + 1 
mean_low = total_child_mortality_rate_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate: ', mean_low)
mean_high = total_child_mortality_rate_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate:  17.685557626139595
Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate:  22.073221711134558


In [27]:
# CHILD MORTALITY RATE BLACK 
counties_child_mortality_rate_black = counties.sort_values('Child_Mortality_Rate_Black')

end_point = len(counties_child_mortality_rate_black)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_child_mortality_rate_black_low = 0 
total_child_mortality_rate_black_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_child_mortality_rate_black_low = total_child_mortality_rate_black_low + counties_child_mortality_rate_black.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_child_mortality_rate_black_high = total_child_mortality_rate_black_high + counties_child_mortality_rate_black.iloc[n, 16]
        n = n + 1 
mean_low = total_child_mortality_rate_black_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate Black: ', mean_low)
mean_high = total_child_mortality_rate_black_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate Black: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate Black:  17.67767307184385
Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate Black:  22.08110626543029


In [28]:
# CHILD MORTALITY RATE WHITE  
counties_child_mortality_rate_white = counties.sort_values('Child_Mortality_Rate_White')

end_point = len(counties_child_mortality_rate_white)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_child_mortality_rate_white_low = 0 
total_child_mortality_rate_white_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_child_mortality_rate_white_low = total_child_mortality_rate_white_low + counties_child_mortality_rate_white.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_child_mortality_rate_white_high = total_child_mortality_rate_white_high + counties_child_mortality_rate_white.iloc[n, 16]
        n = n + 1 
mean_low = total_child_mortality_rate_white_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate White: ', mean_low)
mean_high = total_child_mortality_rate_white_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate White: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Child Mortality Rate White:  17.735426720786233
Mean Drug Overdose Mortality Rate for upper 50% of Child Mortality Rate White:  22.023352616487895


In [29]:
# INFANT MORTALITY RATE  
counties_infant_mortality_rate = counties.sort_values('Infant_Mortality_Rate')

end_point = len(counties_infant_mortality_rate)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_infant_mortality_rate_low = 0 
total_infant_mortality_rate_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_infant_mortality_rate_low = total_infant_mortality_rate_low + counties_infant_mortality_rate.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_infant_mortality_rate_high = total_infant_mortality_rate_high + counties_infant_mortality_rate.iloc[n, 16]
        n = n + 1 
mean_low = total_infant_mortality_rate_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Infant Mortality Rate: ', mean_low)
mean_high = total_infant_mortality_rate_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Infant Mortality Rate: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Infant Mortality Rate:  17.328560998674032
Mean Drug Overdose Mortality Rate for upper 50% of Infant Mortality Rate:  22.430218338600124


In [30]:
# PERCENT FREQUENT PHYSICAL DISTRESS  
counties_percent_frequent_physical_distress = counties.sort_values('Percent_Frequent_Physical_Distress')

end_point = len(counties_percent_frequent_physical_distress)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_percent_frequent_physical_distress_low = 0 
total_percent_frequent_physical_distress_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_percent_frequent_physical_distress_low = total_percent_frequent_physical_distress_low + counties_percent_frequent_physical_distress.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_percent_frequent_physical_distress_high = total_percent_frequent_physical_distress_high + counties_percent_frequent_physical_distress.iloc[n, 16]
        n = n + 1 
mean_low = total_percent_frequent_physical_distress_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Percent Frequent Physical Distress: ', mean_low)
mean_high = total_percent_frequent_physical_distress_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Percent Frequent Physical Distress: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Percent Frequent Physical Distress:  17.689743383168473
Mean Drug Overdose Mortality Rate for upper 50% of Percent Frequent Physical Distress:  22.069035954105676


In [31]:
# PERCENT UNINSURED 1  
counties_percent_uninsured_1 = counties.sort_values('Percent_Uninsured_1')

end_point = len(counties_percent_uninsured_1)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_percent_uninsured_1_low = 0 
total_percent_uninsured_1_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_percent_uninsured_1_low = total_percent_uninsured_1_low + counties_percent_uninsured_1.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_percent_uninsured_1_high = total_percent_uninsured_1_high + counties_percent_uninsured_1.iloc[n, 16]
        n = n + 1 
mean_low = total_percent_uninsured_1_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Percent Uninsured 1: ', mean_low)
mean_high = total_percent_uninsured_1_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Percent Uninsured 1: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Percent Uninsured 1:  20.13180047601728
Mean Drug Overdose Mortality Rate for upper 50% of Percent Uninsured 1:  19.626978861256863


In [32]:
# SEGREGATION INDEX  
counties_segregation_index = counties.sort_values('Segregation_index')

end_point = len(counties_segregation_index)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_segregation_index_low = 0 
total_segregation_index_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_segregation_index_low = total_segregation_index_low + counties_segregation_index.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_segregation_index_high = total_segregation_index_high + counties_segregation_index.iloc[n, 16]
        n = n + 1 
mean_low = total_segregation_index_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Segregation Index: ', mean_low)
mean_high = total_segregation_index_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Segregation Index: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Segregation Index:  19.115431530644557
Mean Drug Overdose Mortality Rate for upper 50% of Segregation Index:  20.64334780662961


In [34]:
# PERCENT NOT PROFICIENT IN ENGLISH  
counties_percent_not_proficient_in_english = counties.sort_values('Percent_Not_Proficient_in_English')

end_point = len(counties_percent_not_proficient_in_english)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_percent_not_proficient_in_english_low = 0 
total_percent_not_proficient_in_english_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_percent_not_proficient_in_english_low = total_percent_not_proficient_in_english_low + counties_percent_not_proficient_in_english.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_percent_not_proficient_in_english_high = total_percent_not_proficient_in_english_high + counties_percent_not_proficient_in_english.iloc[n, 16]
        n = n + 1 
mean_low = total_percent_not_proficient_in_english_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Percent Not Proficient in English: ', mean_low)
mean_high = total_percent_not_proficient_in_english_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Percent Not Proficient in English: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Percent Not Proficient in English:  21.893118487185607
Mean Drug Overdose Mortality Rate for upper 50% of Percent Not Proficient in English:  17.865660850088524


In [35]:
# STATE RANKED 
counties_state_ranked = counties.sort_values('State_ranked')

end_point = len(counties_state_ranked)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_state_ranked_low = 0 
total_state_ranked_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_state_ranked_low = total_state_ranked_low + counties_state_ranked.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_state_ranked_high = total_state_ranked_high + counties_state_ranked.iloc[n, 16]
        n = n + 1 
mean_low = total_state_ranked_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of State Ranked: ', mean_low)
mean_high = total_state_ranked_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of State Ranked: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of State Ranked:  19.23537802000737
Mean Drug Overdose Mortality Rate for upper 50% of State Ranked:  20.52340131726678


In [37]:
# YEARS OF POTENTIAL LIFE LOST RATE  
counties_years_of_potential_life_lost_rate = counties.sort_values('Years_of_Potential_Life_Lost_Rate')

end_point = len(counties_years_of_potential_life_lost_rate)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_years_of_potential_life_lost_rate_low = 0 
total_years_of_potential_life_lost_rate_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_years_of_potential_life_lost_rate_low = total_years_of_potential_life_lost_rate_low + counties_years_of_potential_life_lost_rate.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_years_of_potential_life_lost_rate_high = total_years_of_potential_life_lost_rate_high + counties_years_of_potential_life_lost_rate.iloc[n, 16]
        n = n + 1 
mean_low = total_years_of_potential_life_lost_rate_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Years of Potential Life Lost Rate: ', mean_low)
mean_high = total_years_of_potential_life_lost_rate_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Years of Potential Life Lost Rate: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Years of Potential Life Lost Rate:  16.349545932599995
Mean Drug Overdose Mortality Rate for upper 50% of Years of Potential Life Lost Rate:  23.409233404674183


In [38]:
# YEARS OF POTENTIAL LIFE LOST RATE BLACK  
counties_years_of_potential_life_lost_rate_black = counties.sort_values('Years_of_Potential_Life_Lost_Rate_Black')

end_point = len(counties_years_of_potential_life_lost_rate_black)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_years_of_potential_life_lost_rate_black_low = 0 
total_years_of_potential_life_lost_rate_black_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_years_of_potential_life_lost_rate_black_low = total_years_of_potential_life_lost_rate_black_low + counties_years_of_potential_life_lost_rate_black.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_years_of_potential_life_lost_rate_black_high = total_years_of_potential_life_lost_rate_black_high + counties_years_of_potential_life_lost_rate_black.iloc[n, 16]
        n = n + 1 
mean_low = total_years_of_potential_life_lost_rate_black_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Years of Potential Life Lost Rate Black: ', mean_low)
mean_high = total_years_of_potential_life_lost_rate_black_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Years of Potential Life Lost Rate Black: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Years of Potential Life Lost Rate Black:  17.38630129436349
Mean Drug Overdose Mortality Rate for upper 50% of Years of Potential Life Lost Rate Black:  22.372478042910632


In [40]:
# PHYSICALLY UNHEALTHY DAYS  
counties_physically_unhealthy_days = counties.sort_values('Physically_Unhealthy_Days')

end_point = len(counties_physically_unhealthy_days)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_physically_unhealthy_days_low = 0 
total_physically_unhealthy_days_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_physically_unhealthy_days_low = total_physically_unhealthy_days_low + counties_physically_unhealthy_days.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_physically_unhealthy_days_high = total_physically_unhealthy_days_high + counties_physically_unhealthy_days.iloc[n, 16]
        n = n + 1 
mean_low = total_physically_unhealthy_days_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Physically Unhealthy Days: ', mean_low)
mean_high = total_physically_unhealthy_days_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Physically Unhealthy Days: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Physically Unhealthy Days:  17.552118409628
Mean Drug Overdose Mortality Rate for upper 50% of Physically Unhealthy Days:  22.206660927646137


In [41]:
# MENTALLY UNHEALTHY DAYS  
counties_mentally_unhealthy_days = counties.sort_values('Mentally_Unhealthy_Days')

end_point = len(counties_mentally_unhealthy_days)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_mentally_unhealthy_days_low = 0 
total_mentally_unhealthy_days_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_mentally_unhealthy_days_low = total_mentally_unhealthy_days_low + counties_mentally_unhealthy_days.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_mentally_unhealthy_days_high = total_mentally_unhealthy_days_high + counties_mentally_unhealthy_days.iloc[n, 16]
        n = n + 1 
mean_low = total_mentally_unhealthy_days_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Mentally Unhealthy Days: ', mean_low)
mean_high = total_mentally_unhealthy_days_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Mentally Unhealthy Days: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Mentally Unhealthy Days:  16.89126199717164
Mean Drug Overdose Mortality Rate for upper 50% of Mentally Unhealthy Days:  22.867517340102538


In [43]:
# PERCENT SMOKERS  
counties_percent_smokers = counties.sort_values('Percent_Smokers')

end_point = len(counties_percent_smokers)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_percent_smokers_low = 0 
total_percent_smokers_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_percent_smokers_low = total_percent_smokers_low + counties_percent_smokers.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_percent_smokers_high = total_percent_smokers_high + counties_percent_smokers.iloc[n, 16]
        n = n + 1 
mean_low = total_percent_smokers_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Percent Smokers: ', mean_low)
mean_high = total_percent_smokers_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Percent Smokers: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Percent Smokers:  17.46494190972306
Mean Drug Overdose Mortality Rate for upper 50% of Percent Smokers:  22.29383742755106


In [46]:
# TEEN BIRTH RATE WHITE  
counties_teen_birth_rate_white = counties.sort_values('Teen_Birth_Rate_White')

end_point = len(counties_teen_birth_rate_white)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_teen_birth_rate_white_low = 0 
total_teen_birth_rate_white_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_teen_birth_rate_white_low = total_teen_birth_rate_white_low + counties_teen_birth_rate_white.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_teen_birth_rate_white_high = total_teen_birth_rate_white_high + counties_teen_birth_rate_white.iloc[n, 16]
        n = n + 1 
mean_low = total_teen_birth_rate_white_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Teen Birth Rate White: ', mean_low)
mean_high = total_teen_birth_rate_white_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Teen Birth Rate White: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Teen Birth Rate White:  17.734291392365584
Mean Drug Overdose Mortality Rate for upper 50% of Teen Birth Rate White:  22.02448794490855


In [47]:
# PERCENT SOME COLLEGE  
counties_percent_some_college = counties.sort_values('Percent_Some_College')

end_point = len(counties_percent_some_college)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_percent_some_college_low = 0 
total_percent_some_college_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_percent_some_college_low = total_percent_some_college_low + counties_percent_some_college.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_state_high = total_percent_some_college_high + counties_percent_some_college.iloc[n, 16]
        n = n + 1 
mean_low = total_percent_some_college_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of Percent Some College: ', mean_low)
mean_high = total_percent_some_college_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of Percent Some College: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of Percent Some College:  21.93643457191002
Mean Drug Overdose Mortality Rate for upper 50% of Percent Some College:  0.0


In [48]:
# 20TH PERCENTILE INCOME  
counties_20th_percentile_income = counties.sort_values('20th_Percentile_Income')

end_point = len(counties_20th_percentile_income)
median_point = end_point/2
median_point = np.ceil(median_point)

n = 0 
total_20th_percentile_income_low = 0 
total_20th_percentile_income_high = 0 
mean_low = 0 
mean_high = 0 
while n < end_point:
    if n < median_point:
        total_20th_percentile_income_low = total_20th_percentile_income_low + counties_20th_percentile_income.iloc[n, 16]
        n = n + 1 
    if n >= median_point:
        total_20th_percentile_income_high = total_20th_percentile_income_high + counties_20th_percentile_income.iloc[n, 16]
        n = n + 1 
mean_low = total_20th_percentile_income_low/median_point
print('Mean Drug Overdose Mortality Rate for lower 50% of 20th Percentile Income: ', mean_low)
mean_high = total_20th_percentile_income_high/median_point
print('Mean Drug Overdose Mortality Rate for upper 50% of 20th Percentile Income: ', mean_high)

Mean Drug Overdose Mortality Rate for lower 50% of 20th Percentile Income:  22.03283349646774
Mean Drug Overdose Mortality Rate for upper 50% of 20th Percentile Income:  17.72594584080639
