In [2]:
# import dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
import sklearn as skl

In [4]:
#  Import and read the csv.
data_df = pd.read_csv("Resources/sample_data.csv")
data_df.head()

Unnamed: 0,Year,FIPS,C_S,month,cases,Pcincome,POPESTIMATE,MFratio,POPEST_MALE,POPEST_FEM,...,WA,BA,IA,AA,NA,TOM,NH,H,Cost,RUCC
0,2017,1001,"Autauga County,Alabama",1,0,27824,55448,0.951913,27041,28407,...,42606,10816,254,693,59,1020,53908,1540,186712,2
1,2017,1001,"Autauga County,Alabama",2,0,27824,55448,0.951913,27041,28407,...,42606,10816,254,693,59,1020,53908,1540,186577,2
2,2017,1001,"Autauga County,Alabama",3,0,27824,55448,0.951913,27041,28407,...,42606,10816,254,693,59,1020,53908,1540,186397,2
3,2017,1001,"Autauga County,Alabama",4,0,27824,55448,0.951913,27041,28407,...,42606,10816,254,693,59,1020,53908,1540,186839,2
4,2017,1001,"Autauga County,Alabama",5,0,27824,55448,0.951913,27041,28407,...,42606,10816,254,693,59,1020,53908,1540,187155,2


In [5]:
#Using drop() to delete rows before 2019
data_df.drop(data_df[data_df['Year'] < 2019].index, inplace = True)
data_df.head()

Unnamed: 0,Year,FIPS,C_S,month,cases,Pcincome,POPESTIMATE,MFratio,POPEST_MALE,POPEST_FEM,...,WA,BA,IA,AA,NA,TOM,NH,H,Cost,RUCC
24,2019,1001,"Autauga County,Alabama",1,0,29819,55769,0.94378,27078,28691,...,42490,11252,266,668,58,1035,54116,1653,200156,2
25,2019,1001,"Autauga County,Alabama",2,0,29819,55769,0.94378,27078,28691,...,42490,11252,266,668,58,1035,54116,1653,200999,2
26,2019,1001,"Autauga County,Alabama",3,0,29819,55769,0.94378,27078,28691,...,42490,11252,266,668,58,1035,54116,1653,201698,2
27,2019,1001,"Autauga County,Alabama",4,0,29819,55769,0.94378,27078,28691,...,42490,11252,266,668,58,1035,54116,1653,202308,2
28,2019,1001,"Autauga County,Alabama",5,0,29819,55769,0.94378,27078,28691,...,42490,11252,266,668,58,1035,54116,1653,202854,2


In [6]:
# data_df = data_df.drop(columns=["C_S"])
# getting poor results with only dropping country, state so I will drop other features
data_df = data_df.drop(columns=["C_S", "WA","BA","IA","AA","NA","TOM","NH","H"])
data_df.head()

Unnamed: 0,Year,FIPS,month,cases,Pcincome,POPESTIMATE,MFratio,POPEST_MALE,POPEST_FEM,MEDIAN_AGE_TOT,UNDER5_TOT,AGE513_TOT,AGE1544_TOT,AGE4564_TOT,AGE65PLUS_TOT,Cost,RUCC
24,2019,1001,1,0,29819,55769,0.94378,27078,28691,39.2,3251,6499,21487,14933,8783,200156,2
25,2019,1001,2,0,29819,55769,0.94378,27078,28691,39.2,3251,6499,21487,14933,8783,200999,2
26,2019,1001,3,0,29819,55769,0.94378,27078,28691,39.2,3251,6499,21487,14933,8783,201698,2
27,2019,1001,4,0,29819,55769,0.94378,27078,28691,39.2,3251,6499,21487,14933,8783,202308,2
28,2019,1001,5,0,29819,55769,0.94378,27078,28691,39.2,3251,6499,21487,14933,8783,202854,2


In [7]:
# Create the StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
scaler.fit(data_df)

# Scale the data
scaled_data = scaler.transform(data_df)

In [8]:
# Create a DataFrame with the scaled data
scaled_data = pd.DataFrame(scaled_data, columns=data_df.columns)
scaled_data.head()

Unnamed: 0,Year,FIPS,month,cases,Pcincome,POPESTIMATE,MFratio,POPEST_MALE,POPEST_FEM,MEDIAN_AGE_TOT,UNDER5_TOT,AGE513_TOT,AGE1544_TOT,AGE4564_TOT,AGE65PLUS_TOT,Cost,RUCC
0,-1.224745,-1.705574,-1.593255,-0.560116,-0.098628,-1.156716,-0.761345,-1.134408,-1.179301,-0.265989,-0.999694,-1.013378,-1.008686,-1.283194,-1.610403,0.52124,-0.390567
1,-1.224745,-1.705574,-1.303572,-0.560116,-0.098628,-1.156716,-0.761345,-1.134408,-1.179301,-0.265989,-0.999694,-1.013378,-1.008686,-1.283194,-1.610403,0.53923,-0.390567
2,-1.224745,-1.705574,-1.01389,-0.560116,-0.098628,-1.156716,-0.761345,-1.134408,-1.179301,-0.265989,-0.999694,-1.013378,-1.008686,-1.283194,-1.610403,0.554147,-0.390567
3,-1.224745,-1.705574,-0.724207,-0.560116,-0.098628,-1.156716,-0.761345,-1.134408,-1.179301,-0.265989,-0.999694,-1.013378,-1.008686,-1.283194,-1.610403,0.567165,-0.390567
4,-1.224745,-1.705574,-0.434524,-0.560116,-0.098628,-1.156716,-0.761345,-1.134408,-1.179301,-0.265989,-0.999694,-1.013378,-1.008686,-1.283194,-1.610403,0.578816,-0.390567


In [9]:
# THIS IS A TEST CELL
# I'm not getting any accuracy so I'm going to try removing features and seeing if the accuracy 

# Split our preprocessed data into our features and target arrays

# testing to see if the neural network can predict RUCC
# changing Cost to RUCC

y = scaled_data["Cost"].values
# X = scaled_data[["Year","FIPS","month","cases","POPESTIMATE","RUCC"]].values
# Trying just one feature
X = scaled_data["RUCC"].values.reshape(-1, 1)

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=22)

In [16]:
# Split our preprocessed data into our features and target arrays
y = scaled_data["Cost"].values
X = scaled_data.drop(["Cost"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=22)

  This is separate from the ipykernel package so we can avoid doing imports until


In [10]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Create the Keras Sequential model
nn_model = tf.keras.models.Sequential()

In [12]:
len(data_df.axes[1])

17

In [13]:
# Add our first Dense layer, including the input layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="relu", input_dim=1)) #len(data_df.axes[1])-1))

In [14]:
# Add the output layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [15]:
# Check the structure of the Sequential model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 2         
                                                                 
 dense_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [17]:
# Fit the model to the training data
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=250)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

Epoch 80/250
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250
Epoch 85/250
Epoch 86/250
Epoch 87/250
Epoch 88/250
Epoch 89/250
Epoch 90/250
Epoch 91/250
Epoch 92/250
Epoch 93/250
Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250
Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/25

Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250
Epoch 163/250
Epoch 164/250
Epoch 165/250
Epoch 166/250
Epoch 167/250
Epoch 168/250
Epoch 169/250
Epoch 170/250
Epoch 171/250
Epoch 172/250
Epoch 173/250
Epoch 174/250
Epoch 175/250
Epoch 176/250
Epoch 177/250
Epoch 178/250
Epoch 179/250
Epoch 180/250
Epoch 181/250
Epoch 182/250
Epoch 183/250
Epoch 184/250
Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 

Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 241/250
Epoch 242/250
Epoch 243/250
Epoch 244/250
Epoch 245/250
Epoch 246/250
Epoch 247/250
Epoch 248/250
Epoch 249/250
Epoch 250/250
