In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
# read in csv 
df = pd.read_csv("../Resources/race_data.csv")

In [3]:
# view dataset of mmr stratified by race 
df.head()

Unnamed: 0,state,state_code,year,race,hispanic_origin,births_by_race,deaths_by_race,mmr_by_race,population_by_race,id,state_abbv,latitude,longitude
0,Alabama,1,2016,Black or African American,Not Hispanic or Latino,17989,14,77.83,0,US.AL,AL,32.318231,-86.902298
1,Alabama,1,2016,White,Not Hispanic or Latino,35319,18,50.96,0,US.AL,AL,32.318231,-86.902298
2,Alabama,1,2017,Black or African American,Not Hispanic or Latino,18354,25,136.21,704201,US.AL,AL,32.318231,-86.902298
3,Alabama,1,2017,White,Not Hispanic or Latino,34784,15,43.12,1655605,US.AL,AL,32.318231,-86.902298
4,Alabama,1,2018,Black or African American,Not Hispanic or Latino,17939,14,78.04,706754,US.AL,AL,32.318231,-86.902298


In [4]:
# binary encoding with preserving all columns for heatmap
df_binary_encoded = pd.get_dummies(df, columns=["race", "hispanic_origin"])
df_binary_encoded

Unnamed: 0,state,state_code,year,births_by_race,deaths_by_race,mmr_by_race,population_by_race,id,state_abbv,latitude,longitude,race_Asian or Pacific Islander,race_Black or African American,race_White,hispanic_origin_Hispanic or Latino,hispanic_origin_Not Hispanic or Latino
0,Alabama,1,2016,17989,14,77.83,0,US.AL,AL,32.318231,-86.902298,0,1,0,0,1
1,Alabama,1,2016,35319,18,50.96,0,US.AL,AL,32.318231,-86.902298,0,0,1,0,1
2,Alabama,1,2017,18354,25,136.21,704201,US.AL,AL,32.318231,-86.902298,0,1,0,0,1
3,Alabama,1,2017,34784,15,43.12,1655605,US.AL,AL,32.318231,-86.902298,0,0,1,0,1
4,Alabama,1,2018,17939,14,78.04,706754,US.AL,AL,32.318231,-86.902298,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,West Virginia,54,2018,16785,10,59.58,853052,US.WV,WV,38.597626,-80.454903,0,0,1,0,1
384,Wisconsin,55,2009,52462,10,19.06,0,US.WI,WI,43.784440,-88.787868,0,0,1,0,1
385,Wisconsin,55,2011,50500,10,19.80,0,US.WI,WI,43.784440,-88.787868,0,0,1,0,1
386,Wisconsin,55,2014,49440,13,26.29,2409803,US.WI,WI,43.784440,-88.787868,0,0,1,0,1


### Linear Regression Model 

In [6]:
import joblib
from joblib import load

In [7]:
model = load('../models/Linear_Regression_strat_by_race_model_no_scale_Lee.sav')
model

LinearRegression(n_jobs=500)

In [5]:
# setting our X and y values for mmr for all races 
X = df_binary_encoded[["race_Asian or Pacific Islander", "race_Black or African American",
                       "race_White", "hispanic_origin_Hispanic or Latino",
                       "hispanic_origin_Not Hispanic or Latino", 
                       "population_by_race"]]

# adjusting features after RFE, these did not improve score 
# X = df_binary_encoded[["race_Asian or Pacific Islander", "race_Black or African American",
#                        "race_White"]]
y = df_binary_encoded["mmr_by_race"].values.reshape(-1, 1)
print(X.shape, y.shape)

(388, 6) (388, 1)


In [73]:
X.columns

Index(['race_Asian or Pacific Islander', 'race_Black or African American',
       'race_White', 'hispanic_origin_Hispanic or Latino',
       'hispanic_origin_Not Hispanic or Latino', 'population_by_race'],
      dtype='object')

### Time-Series Forecast of Average Annual Maternal Mortality in the U.S. using highest scoring model

In [58]:
df1 = df.groupby(['year', 'race', 'hispanic_origin']).mean()['population_by_race']
df2 = df1.reset_index()
df2.head()

Unnamed: 0,year,race,hispanic_origin,population_by_race
0,2009,Black or African American,Not Hispanic or Latino,259501.4
1,2009,White,Hispanic or Latino,0.0
2,2009,White,Not Hispanic or Latino,1735035.0
3,2010,Asian or Pacific Islander,Not Hispanic or Latino,2720149.0
4,2010,Black or African American,Not Hispanic or Latino,0.0


In [57]:
white_pop_df = df2.loc[(df2['race'] == 'White') & (df2['hispanic_origin'] == 'Hispanic or Latino')]
white_pop_df.head()

Unnamed: 0,year,race,hispanic_origin,population_by_race
1,2009,White,Hispanic or Latino,0.0
5,2010,White,Hispanic or Latino,1493953.0
8,2011,White,Hispanic or Latino,0.0
11,2012,White,Hispanic or Latino,1568116.0
15,2013,White,Hispanic or Latino,1596190.0


In [56]:
from sklearn.linear_model import LinearRegression

# selecting and assigning the X and y values
X_w_his = white_pop_df['year'].values.reshape(-1, 1)
y_w_his = white_pop_df['population_by_race'].values.reshape(-1,1)

# create model, fit to data
reg_model = LinearRegression().fit(X_w_his, y_w_his)

# find r2
r2 = reg_model.score(X_w_his, y_w_his)
print(f"R2:{r2}")

# predictions list to hold variables
predictions = []

# for-loop to add predictions to list
for i in range(2020,2031): 
    y_values=reg_model.predict(np.array([i]).reshape(1, -1))
    predictions.append(y_values[0])
    
# list of years range
years = [i for i in range(2020, 2031)]

# new dataframe for mmr values with 10 year predictions
wh_mmr_predict_df = pd.DataFrame(list(zip(years, [round(item[0], 2) for item in predictions])),
                                columns=["year", "mmr_white_hispanic"])
wh_mmr_predict_df.head()

R2:0.7308048881732676


Unnamed: 0,year,mmr_white_hispanic
0,2020,4097771.97
1,2021,4449320.04
2,2022,4800868.11
3,2023,5152416.18
4,2024,5503964.25


In [59]:
white_pop2_df = df2.loc[(df2['race'] == 'White') & (df2['hispanic_origin'] == 'Not Hispanic or Latino')]
white_pop2_df.head()

Unnamed: 0,year,race,hispanic_origin,population_by_race
2,2009,White,Not Hispanic or Latino,1735035.0
6,2010,White,Not Hispanic or Latino,1142532.0
9,2011,White,Not Hispanic or Latino,695622.9
12,2012,White,Not Hispanic or Latino,662145.1
16,2013,White,Not Hispanic or Latino,1314986.0


In [60]:
# selecting and assigning the X and y values
X_w_non_his = white_pop2_df['year'].values.reshape(-1, 1)
y_w_non_his = white_pop2_df['population_by_race'].values.reshape(-1,1)

# create model, fit to data
reg_model = LinearRegression().fit(X_w_non_his, y_w_non_his)

# find r2
r2 = reg_model.score(X_w_non_his, y_w_non_his)
print(f"R2:{r2}")

# predictions list to hold variables
predictions = []

# for-loop to add predictions to list
for i in range(2020,2031): 
    y_values=reg_model.predict(np.array([i]).reshape(1, -1))
    predictions.append(y_values[0])
    
# list of years range
years = [i for i in range(2020, 2031)]

# new dataframe for mmr values with 10 year predictions
wh_non_mmr_predict_df = pd.DataFrame(list(zip(years, [round(item[0], 2) for item in predictions])),
                                columns=["year", "mmr_white_non_hispanic"])
wh_non_mmr_predict_df.head()

R2:0.7144848012915043


Unnamed: 0,year,mmr_white_non_hispanic
0,2020,3671200.05
1,2021,3950088.52
2,2022,4228976.98
3,2023,4507865.45
4,2024,4786753.92


In [68]:
asian_pop_df = df2.loc[(df2['race'] == 'Asian or Pacific Islander') & (df2['hispanic_origin'] == 'Not Hispanic or Latino')]
asian_pop_df.head()

Unnamed: 0,year,race,hispanic_origin,population_by_race
3,2010,Asian or Pacific Islander,Not Hispanic or Latino,2720149.0
13,2013,Asian or Pacific Islander,Not Hispanic or Latino,0.0
17,2014,Asian or Pacific Islander,Not Hispanic or Latino,0.0
24,2016,Asian or Pacific Islander,Not Hispanic or Latino,0.0
31,2018,Asian or Pacific Islander,Not Hispanic or Latino,3278357.0


In [69]:
# selecting and assigning the X and y values
X_a_non_his = white_pop2_df['year'].values.reshape(-1, 1)
y_a_non_his = white_pop2_df['population_by_race'].values.reshape(-1,1)

# create model, fit to data
reg_model = LinearRegression().fit(X_a_non_his, y_a_non_his)

# find r2
r2 = reg_model.score(X_a_non_his, y_a_non_his)
print(f"R2:{r2}")

# predictions list to hold variables
predictions = []

# for-loop to add predictions to list
for i in range(2020,2031): 
    y_values=reg_model.predict(np.array([i]).reshape(1, -1))
    predictions.append(y_values[0])
    
# list of years range
years = [i for i in range(2020, 2031)]

# new dataframe for mmr values with 10 year predictions
a_non_mmr_predict_df = pd.DataFrame(list(zip(years, [round(item[0], 2) for item in predictions])),
                                columns=["year", "mmr_asian_non_hispanic"])
a_non_mmr_predict_df.head()

R2:0.7144848012915043


Unnamed: 0,year,mmr_asian_non_hispanic
0,2020,3671200.05
1,2021,3950088.52
2,2022,4228976.98
3,2023,4507865.45
4,2024,4786753.92


In [70]:
black_non_pop_df = df2.loc[(df2['race'] == 'Black or African American') & (df2['hispanic_origin'] == 'Not Hispanic or Latino')]
black_non_pop_df.head()

Unnamed: 0,year,race,hispanic_origin,population_by_race
0,2009,Black or African American,Not Hispanic or Latino,259501.384615
4,2010,Black or African American,Not Hispanic or Latino,0.0
7,2011,Black or African American,Not Hispanic or Latino,313081.9
10,2012,Black or African American,Not Hispanic or Latino,59860.230769
14,2013,Black or African American,Not Hispanic or Latino,364664.545455


In [71]:
# selecting and assigning the X and y values
X_bk_non_his = white_pop2_df['year'].values.reshape(-1, 1)
y_bk_non_his = white_pop2_df['population_by_race'].values.reshape(-1,1)

# create model, fit to data
reg_model = LinearRegression().fit(X_bk_non_his, y_bk_non_his)

# find r2
r2 = reg_model.score(X_bk_non_his, y_bk_non_his)
print(f"R2:{r2}")

# predictions list to hold variables
predictions = []

# for-loop to add predictions to list
for i in range(2020,2031): 
    y_values=reg_model.predict(np.array([i]).reshape(1, -1))
    predictions.append(y_values[0])
    
# list of years range
years = [i for i in range(2020, 2031)]

# new dataframe for mmr values with 10 year predictions
bk_non_mmr_predict_df = pd.DataFrame(list(zip(years, [round(item[0], 2) for item in predictions])),
                                columns=["year", "mmr_black_non_hispanic"])
bk_non_mmr_predict_df.head()

R2:0.7144848012915043


Unnamed: 0,year,mmr_black_non_hispanic
0,2020,3671200.05
1,2021,3950088.52
2,2022,4228976.98
3,2023,4507865.45
4,2024,4786753.92


In [74]:
df_white_non = pd.DataFrame()

years = [year for year in range(2020, 2031)]

df_white_non['year'] = years
df_white_non['race_Asian or Pacific Islander'] = [0 for year in range(2020, 2031)]
df_white_non['race_Black or African American'] = [0 for year in range(2020, 2031)]
df_white_non['race_White'] = [1 for year in range(2020, 2031)]
df_white_non['hispanic_origin_Hispanic or Latino'] = [0 for year in range(2020, 2031)]
df_white_non['hispanic_origin_Not Hispanic or Latino'] = [1 for year in range(2020, 2031)]
df_white_non['population_by_race'] = wh_non_mmr_predict_df['mmr_white_non_hispanic']

df_white_non

Unnamed: 0,year,race_Asian or Pacific Islander,race_Black or African American,race_White,hispanic_origin_Hispanic or Latino,hispanic_origin_Not Hispanic or Latino,population_by_race
0,2020,0,0,1,0,1,3671200.05
1,2021,0,0,1,0,1,3950088.52
2,2022,0,0,1,0,1,4228976.98
3,2023,0,0,1,0,1,4507865.45
4,2024,0,0,1,0,1,4786753.92
5,2025,0,0,1,0,1,5065642.38
6,2026,0,0,1,0,1,5344530.85
7,2027,0,0,1,0,1,5623419.32
8,2028,0,0,1,0,1,5902307.79
9,2029,0,0,1,0,1,6181196.25


In [75]:
X = df_white_non[['race_Asian or Pacific Islander', 'race_Black or African American',
       'race_White', 'hispanic_origin_Hispanic or Latino',
       'hispanic_origin_Not Hispanic or Latino', 'population_by_race']]
wht_non_prediction = model.predict(X)
wht_non_prediction

array([[28.0707536 ],
       [27.71446675],
       [27.35817991],
       [27.00189305],
       [26.64560619],
       [26.28931935],
       [25.9330325 ],
       [25.57674564],
       [25.22045879],
       [24.86417195],
       [24.50788509]])

In [77]:
df_white_his = pd.DataFrame()

df_white_his['year'] = years
df_white_his['race_Asian or Pacific Islander'] = [0 for year in range(2020, 2031)]
df_white_his['race_Black or African American'] = [0 for year in range(2020, 2031)]
df_white_his['race_White'] = [1 for year in range(2020, 2031)]
df_white_his['hispanic_origin_Hispanic or Latino'] = [1 for year in range(2020, 2031)]
df_white_his['hispanic_origin_Not Hispanic or Latino'] = [0 for year in range(2020, 2031)]
df_white_his['population_by_race'] = wh_mmr_predict_df['mmr_white_hispanic']

df_white_his

Unnamed: 0,year,race_Asian or Pacific Islander,race_Black or African American,race_White,hispanic_origin_Hispanic or Latino,hispanic_origin_Not Hispanic or Latino,population_by_race
0,2020,0,0,1,1,0,4097771.97
1,2021,0,0,1,1,0,4449320.04
2,2022,0,0,1,1,0,4800868.11
3,2023,0,0,1,1,0,5152416.18
4,2024,0,0,1,1,0,5503964.25
5,2025,0,0,1,1,0,5855512.33
6,2026,0,0,1,1,0,6207060.4
7,2027,0,0,1,1,0,6558608.47
8,2028,0,0,1,1,0,6910156.54
9,2029,0,0,1,1,0,7261704.61


In [78]:
X = df_white_his[['race_Asian or Pacific Islander', 'race_Black or African American',
       'race_White', 'hispanic_origin_Hispanic or Latino',
       'hispanic_origin_Not Hispanic or Latino', 'population_by_race']]
wht_his_prediction = model.predict(X)
wht_his_prediction

array([[22.70934365],
       [22.26023238],
       [21.8111211 ],
       [21.36200983],
       [20.91289855],
       [20.46378726],
       [20.01467599],
       [19.56556471],
       [19.11645344],
       [18.66734217],
       [18.21823088]])

In [80]:
df_asian_non = pd.DataFrame()

df_asian_non['year'] = years
df_asian_non['race_Asian or Pacific Islander'] = [1 for year in range(2020, 2031)]
df_asian_non['race_Black or African American'] = [0 for year in range(2020, 2031)]
df_asian_non['race_White'] = [0 for year in range(2020, 2031)]
df_asian_non['hispanic_origin_Hispanic or Latino'] = [0 for year in range(2020, 2031)]
df_asian_non['hispanic_origin_Not Hispanic or Latino'] = [1 for year in range(2020, 2031)]
df_asian_non['population_by_race'] = a_non_mmr_predict_df['mmr_asian_non_hispanic']

df_asian_non

Unnamed: 0,year,race_Asian or Pacific Islander,race_Black or African American,race_White,hispanic_origin_Hispanic or Latino,hispanic_origin_Not Hispanic or Latino,population_by_race
0,2020,1,0,0,0,1,3671200.05
1,2021,1,0,0,0,1,3950088.52
2,2022,1,0,0,0,1,4228976.98
3,2023,1,0,0,0,1,4507865.45
4,2024,1,0,0,0,1,4786753.92
5,2025,1,0,0,0,1,5065642.38
6,2026,1,0,0,0,1,5344530.85
7,2027,1,0,0,0,1,5623419.32
8,2028,1,0,0,0,1,5902307.79
9,2029,1,0,0,0,1,6181196.25


In [81]:
X = df_asian_non[['race_Asian or Pacific Islander', 'race_Black or African American',
       'race_White', 'hispanic_origin_Hispanic or Latino',
       'hispanic_origin_Not Hispanic or Latino', 'population_by_race']]
asian_non_prediction = model.predict(X)
asian_non_prediction

array([[14.81326157],
       [14.45697472],
       [14.10068787],
       [13.74440102],
       [13.38811416],
       [13.03182732],
       [12.67554047],
       [12.31925361],
       [11.96296676],
       [11.60667991],
       [11.25039306]])

In [83]:
df_black_non = pd.DataFrame()

df_black_non['year'] = years
df_black_non['race_Asian or Pacific Islander'] = [0 for year in range(2020, 2031)]
df_black_non['race_Black or African American'] = [1 for year in range(2020, 2031)]
df_black_non['race_White'] = [0 for year in range(2020, 2031)]
df_black_non['hispanic_origin_Hispanic or Latino'] = [0 for year in range(2020, 2031)]
df_black_non['hispanic_origin_Not Hispanic or Latino'] = [1 for year in range(2020, 2031)]
df_black_non['population_by_race'] = bk_non_mmr_predict_df['mmr_black_non_hispanic']

df_black_non

Unnamed: 0,year,race_Asian or Pacific Islander,race_Black or African American,race_White,hispanic_origin_Hispanic or Latino,hispanic_origin_Not Hispanic or Latino,population_by_race
0,2020,0,1,0,0,1,3671200.05
1,2021,0,1,0,0,1,3950088.52
2,2022,0,1,0,0,1,4228976.98
3,2023,0,1,0,0,1,4507865.45
4,2024,0,1,0,0,1,4786753.92
5,2025,0,1,0,0,1,5065642.38
6,2026,0,1,0,0,1,5344530.85
7,2027,0,1,0,0,1,5623419.32
8,2028,0,1,0,0,1,5902307.79
9,2029,0,1,0,0,1,6181196.25


In [84]:
X = df_black_non[['race_Asian or Pacific Islander', 'race_Black or African American',
       'race_White', 'hispanic_origin_Hispanic or Latino',
       'hispanic_origin_Not Hispanic or Latino', 'population_by_race']]
black_non_prediction = model.predict(X)
black_non_prediction

array([[68.97878681],
       [68.62249995],
       [68.26621311],
       [67.90992625],
       [67.5536394 ],
       [67.19735256],
       [66.8410657 ],
       [66.48477885],
       [66.12849199],
       [65.77220515],
       [65.41591829]])

In [87]:
race_prediction_df = pd.DataFrame()

race_prediction_df['year'] = years
race_prediction_df['mmr_white_hispanic'] = [item[0] for item in wht_his_prediction]
race_prediction_df['mmr_white_non_hispanic'] = [item[0] for item in wht_non_prediction]
race_prediction_df['mmr_asian_non_hispanic'] = [item[0] for item in asian_non_prediction]
race_prediction_df['mmr_black_non_hispanic'] = [item[0] for item in black_non_prediction]

race_prediction_df

Unnamed: 0,year,mmr_white_hispanic,mmr_white_non_hispanic,mmr_asian_non_hispanic,mmr_black_non_hispanic
0,2020,22.709344,28.070754,14.813262,68.978787
1,2021,22.260232,27.714467,14.456975,68.6225
2,2022,21.811121,27.35818,14.100688,68.266213
3,2023,21.36201,27.001893,13.744401,67.909926
4,2024,20.912899,26.645606,13.388114,67.553639
5,2025,20.463787,26.289319,13.031827,67.197353
6,2026,20.014676,25.933032,12.67554,66.841066
7,2027,19.565565,25.576746,12.319254,66.484779
8,2028,19.116453,25.220459,11.962967,66.128492
9,2029,18.667342,24.864172,11.60668,65.772205


In [89]:
race_previous_df = df.groupby(['year', 'race', 'hispanic_origin']).mean()['mmr_by_race']
race_previous_df = race_previous_df.reset_index()
race_previous_df

Unnamed: 0,year,race,hispanic_origin,mmr_by_race
0,2009,Black or African American,Not Hispanic or Latino,58.173846
1,2009,White,Hispanic or Latino,24.1075
2,2009,White,Not Hispanic or Latino,27.388571
3,2010,Asian or Pacific Islander,Not Hispanic or Latino,16.82
4,2010,Black or African American,Not Hispanic or Latino,61.764444
5,2010,White,Hispanic or Latino,20.0
6,2010,White,Not Hispanic or Latino,24.042727
7,2011,Black or African American,Not Hispanic or Latino,72.118
8,2011,White,Hispanic or Latino,18.496667
9,2011,White,Not Hispanic or Latino,26.372353


In [139]:
mmr_white_his = race_previous_df.loc[(race_previous_df['race'] == 'White') & 
                                     (race_previous_df['hispanic_origin'] == 'Hispanic or Latino')].reset_index(drop=True)
mmr_white_non = race_previous_df.loc[(race_previous_df['race'] == 'White') & 
                                     (race_previous_df['hispanic_origin'] == 'Not Hispanic or Latino')].reset_index(drop=True)
mmr_asian_non = race_previous_df.loc[(race_previous_df['race'] == 'Asian or Pacific Islander') & 
                                     (race_previous_df['hispanic_origin'] == 'Not Hispanic or Latino')].reset_index(drop=True)
mmr_black_non = race_previous_df.loc[(race_previous_df['race'] == 'Black or African American') & 
                                     (race_previous_df['hispanic_origin'] == 'Not Hispanic or Latino')].reset_index(drop=True)

In [141]:
mmr_asian_non

Unnamed: 0,year,race,hispanic_origin,mmr_by_race
0,2010,Asian or Pacific Islander,Not Hispanic or Latino,16.82
1,2013,Asian or Pacific Islander,Not Hispanic or Latino,13.75
2,2014,Asian or Pacific Islander,Not Hispanic or Latino,13.83
3,2016,Asian or Pacific Islander,Not Hispanic or Latino,20.46
4,2018,Asian or Pacific Islander,Not Hispanic or Latino,19.24
5,2019,Asian or Pacific Islander,Not Hispanic or Latino,18.12


In [107]:
pre_years = [year for year in range(2009, 2020)]
pre_years

[2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]

In [154]:
race_mmr_df = pd.DataFrame()
race_mmr_df['year'] = pre_years

race_mmr_df

Unnamed: 0,year
0,2009
1,2010
2,2011
3,2012
4,2013
5,2014
6,2015
7,2016
8,2017
9,2018


In [155]:
merge_wht_race_mmr_df = race_mmr_df.merge(mmr_white_his[['year', 'mmr_by_race']], on='year', how='left')
merge_wht_race_mmr_df

Unnamed: 0,year,mmr_by_race
0,2009,24.1075
1,2010,20.0
2,2011,18.496667
3,2012,22.103333
4,2013,26.713333
5,2014,20.375
6,2015,19.556667
7,2016,29.9125
8,2017,23.6125
9,2018,20.5775


In [156]:
merge_wht_non_race_mmr_df = merge_wht_race_mmr_df.merge(mmr_white_non[['year', 'mmr_by_race']], on='year', how='left')
merge_wht_non_race_mmr_df

Unnamed: 0,year,mmr_by_race_x,mmr_by_race_y
0,2009,24.1075,27.388571
1,2010,20.0,24.042727
2,2011,18.496667,26.372353
3,2012,22.103333,28.588
4,2013,26.713333,32.7405
5,2014,20.375,30.829
6,2015,19.556667,31.2775
7,2016,29.9125,32.752083
8,2017,23.6125,32.214348
9,2018,20.5775,29.984762


In [157]:
merge_asian_race_mmr_df = merge_wht_non_race_mmr_df.merge(mmr_asian_non[['year', 'mmr_by_race']], on='year', how='left')
merge_asian_race_mmr_df

Unnamed: 0,year,mmr_by_race_x,mmr_by_race_y,mmr_by_race
0,2009,24.1075,27.388571,
1,2010,20.0,24.042727,16.82
2,2011,18.496667,26.372353,
3,2012,22.103333,28.588,
4,2013,26.713333,32.7405,13.75
5,2014,20.375,30.829,13.83
6,2015,19.556667,31.2775,
7,2016,29.9125,32.752083,20.46
8,2017,23.6125,32.214348,
9,2018,20.5775,29.984762,19.24


In [158]:
merge_black_race_mmr_df = merge_asian_race_mmr_df.merge(mmr_black_non[['year', 'mmr_by_race']], on='year', how='left')
merge_black_race_mmr_df

Unnamed: 0,year,mmr_by_race_x,mmr_by_race_y,mmr_by_race_x.1,mmr_by_race_y.1
0,2009,24.1075,27.388571,,58.173846
1,2010,20.0,24.042727,16.82,61.764444
2,2011,18.496667,26.372353,,72.118
3,2012,22.103333,28.588,,72.866154
4,2013,26.713333,32.7405,13.75,83.244545
5,2014,20.375,30.829,13.83,82.588462
6,2015,19.556667,31.2775,,78.995833
7,2016,29.9125,32.752083,20.46,81.009333
8,2017,23.6125,32.214348,,72.5
9,2018,20.5775,29.984762,19.24,64.248333


In [159]:
race_prediction_df.columns

Index(['year', 'mmr_white_hispanic', 'mmr_white_non_hispanic',
       'mmr_asian_non_hispanic', 'mmr_black_non_hispanic'],
      dtype='object')

In [160]:
merge_black_race_mmr_df.columns = ['year', 'mmr_white_hispanic', 'mmr_white_non_hispanic',
       'mmr_asian_non_hispanic', 'mmr_black_non_hispanic']
merge_black_race_mmr_df

Unnamed: 0,year,mmr_white_hispanic,mmr_white_non_hispanic,mmr_asian_non_hispanic,mmr_black_non_hispanic
0,2009,24.1075,27.388571,,58.173846
1,2010,20.0,24.042727,16.82,61.764444
2,2011,18.496667,26.372353,,72.118
3,2012,22.103333,28.588,,72.866154
4,2013,26.713333,32.7405,13.75,83.244545
5,2014,20.375,30.829,13.83,82.588462
6,2015,19.556667,31.2775,,78.995833
7,2016,29.9125,32.752083,20.46,81.009333
8,2017,23.6125,32.214348,,72.5
9,2018,20.5775,29.984762,19.24,64.248333


In [162]:
all_year_race_mmr = pd.concat([merge_black_race_mmr_df, race_prediction_df], ignore_index=True)
all_year_race_mmr

Unnamed: 0,year,mmr_white_hispanic,mmr_white_non_hispanic,mmr_asian_non_hispanic,mmr_black_non_hispanic
0,2009,24.1075,27.388571,,58.173846
1,2010,20.0,24.042727,16.82,61.764444
2,2011,18.496667,26.372353,,72.118
3,2012,22.103333,28.588,,72.866154
4,2013,26.713333,32.7405,13.75,83.244545
5,2014,20.375,30.829,13.83,82.588462
6,2015,19.556667,31.2775,,78.995833
7,2016,29.9125,32.752083,20.46,81.009333
8,2017,23.6125,32.214348,,72.5
9,2018,20.5775,29.984762,19.24,64.248333


In [164]:
all_year_race_mmr.to_csv('../Resources/all_race_2009_2030.csv', index=False)