Imports and Reading in Data

In [None]:
# imports and reading in datasets
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import io
from google.colab import files
uploaded = files.upload()

In [None]:
X_train = pd.read_csv(io.StringIO(uploaded['X_train.csv'].decode('utf-8')))
X_train.drop('country', axis = 1, inplace = True) # drop the string column
Y_train_5 = pd.read_csv(io.StringIO(uploaded['Y5_train.csv'].decode('utf-8')))
Y_train_25 = pd.read_csv(io.StringIO(uploaded['Y25_train.csv'].decode('utf-8')))
Y_train_50 = pd.read_csv(io.StringIO(uploaded['Y50_train.csv'].decode('utf-8')))

# scale data
min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)

In [None]:
X_test = pd.read_csv(io.StringIO(uploaded['X_test.csv'].decode('utf-8')))
X_test.drop('country', axis = 1, inplace = True) # drop the string column
Y_test_5 = pd.read_csv(io.StringIO(uploaded['Y5_test.csv'].decode('utf-8')))
Y_test_25 = pd.read_csv(io.StringIO(uploaded['Y25_test.csv'].decode('utf-8')))
Y_test_50 = pd.read_csv(io.StringIO(uploaded['Y50_test.csv'].decode('utf-8')))

# scale data
min_max_scaler = preprocessing.MinMaxScaler()
X_test = min_max_scaler.fit_transform(X_test)

Building and Training the NN

In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential([
    Dense(32, activation='relu', input_shape=(58,)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear'),
])

In [None]:
model.compile(optimizer='adam',loss='mean_squared_error', metrics=['mse'])

In [None]:
X_train_float = np.asarray(X_train).astype(np.float32)
Y_train_float = np.asarray(Y_train_5).astype(np.float32)
X_val_float = np.asarray(X_test).astype(np.float32)
Y_val_float = np.asarray(Y_test_5).astype(np.float32)
hist = model.fit(X_train_float, Y_train_float,
          batch_size=32, epochs=100, validation_data=(X_val_float, Y_val_float))

In [None]:
X_test_float = np.asarray(X_test).astype(np.float32)
Y_test_float = np.asarray(X_test).astype(np.float32)
model.evaluate(X_test_float, Y_test_float)

 Visualizations

In [None]:
import matplotlib.pyplot as plt

In [None]:
# training loss and validation loss
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

Overfitted Model

In [None]:
model_2 = Sequential([
    Dense(1000, activation='relu', input_shape=(58,)),
    Dense(1000, activation='relu'),
    Dense(1000, activation='relu'),
    Dense(1000, activation='relu'),
    Dense(1, activation='linear'),
])
model_2.compile(optimizer='adam',
              loss='mean_squared_error')
hist_2 = model_2.fit(X_train_float, Y_train_float,
          batch_size=32, epochs=100, validation_data=(X_val_float, Y_val_float))

In [None]:
plt.plot(hist_2.history['loss'])
plt.plot(hist_2.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

Incorporating Regularization

In [None]:
from keras.layers import Dropout
from keras import regularizers

In [None]:
model_3 = Sequential([
    Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(58,)),
    Dropout(0.3),
    Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.3),
    Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.3),
    Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.3),
    Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)),
])

In [None]:
model_3.compile(optimizer='adam',
              loss='mean_squared_error')
hist_3 = model_3.fit(X_train_float, Y_train_float,
          batch_size=32, epochs=100, validation_data=(X_val_float, Y_val_float))

In [None]:
plt.plot(hist_3.history['loss'])
plt.plot(hist_3.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

In [None]:
Printing out Prediction Values on the test dataset

In [None]:
ynew = model.predict(X_test_float)
for i in range(len(ynew)):
  print(ynew[i])