In [336]:
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn import model_selection

In [337]:
class nnDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.length = self.x.shape[0]
    def __getitem__(self,idx):
        return self.x[idx], self.y[idx]
    def __len__(self):
        return self.length
class Net(nn.Module):
    def __init__(self, nInputFeatures):
        super(Net, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(nInputFeatures,1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,1),
            
        )
    def forward(self, x):
        output = self.linear_relu_stack(x)
        return output

In [338]:
data = pd.read_csv("4Features,1Predictor.csv")
for CR in data['CR'].unique():
    if(data['CR'][data['CR']==CR].count() < 2):
        print(CR, "Removed, less than two of this CR present")
        data = data[data['CR']!=CR]
    if(CR == 0):
        data = data[data['CR']!=CR]
x = data.drop(columns="CR")
y = data['CR']
xTrain, xTest, yTrain, yTest = model_selection.train_test_split(x, y,test_size=0.5,stratify=y)

26.0 Removed, less than two of this CR present


In [339]:
for i,column in enumerate(xTrain):
    colMin = xTrain[column].max()
    colMax = xTrain[column].min()
    if(colMin == colMax):
        xTrain[column] = xTrain[column]  * 0
        xTest[column] = xTest[column] * 0
    else:
        xTrain[column] = (xTrain[column] - colMin) / (colMax - colMin)
        xTest[column] = (xTest[column] - colMin) / (colMax - colMin)
nnTrainingSet = nnDataset(xTrain.values, yTrain.values)
nnTestingSet = nnDataset(xTest.values, yTest.values)
nnTrainLoader = DataLoader(nnTrainingSet, batch_size=16, shuffle=True)
nnTestLoader = DataLoader(nnTestingSet, batch_size=16, shuffle=False)

In [340]:
model = Net(nInputFeatures=xTrain.shape[1])
mse_loss = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001,weight_decay = 0.0001)

bestMSE = 10000000
bestWeights = None

Θ = 10000
timeSinceLastUpdate = 0
for θ in range(Θ):
    for batchIDX, (xBatch, yBatch) in enumerate(nnTrainLoader):
        output = model(xBatch)
        loss = mse_loss(output, yBatch.reshape(-1, 1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if(θ%100==0):
        print("On epoch", str(θ))
    timeSinceLastUpdate += 1
    if float(loss)<bestMSE:
        timeSinceLastUpdate = 0
        bestMSE = float(loss)
        bestWeights = copy.deepcopy(model.state_dict())
        print("Better MSE Loss {} achieved on epoch {}".format(loss,θ))
    if(timeSinceLastUpdate>=150 and bestMSE<0.005):
        print("No significant updates in 150 epochs and under loss threshold, breaking training")
        break


On epoch 0
Better MSE Loss 19.112308502197266 achieved on epoch 0
Better MSE Loss 13.852324485778809 achieved on epoch 3
Better MSE Loss 5.656300067901611 achieved on epoch 4
Better MSE Loss 3.196657419204712 achieved on epoch 7
Better MSE Loss 1.7579929828643799 achieved on epoch 9
Better MSE Loss 1.500333547592163 achieved on epoch 10
Better MSE Loss 0.3258572220802307 achieved on epoch 15
Better MSE Loss 0.08665739744901657 achieved on epoch 26
On epoch 100
Better MSE Loss 0.03310694918036461 achieved on epoch 186
On epoch 200
Better MSE Loss 0.0193644892424345 achieved on epoch 243
On epoch 300
On epoch 400
On epoch 500
On epoch 600
On epoch 700
On epoch 800
On epoch 900
On epoch 1000
On epoch 1100
On epoch 1200
On epoch 1300
On epoch 1400
On epoch 1500
On epoch 1600
On epoch 1700
On epoch 1800
On epoch 1900
On epoch 2000
On epoch 2100
On epoch 2200
On epoch 2300
On epoch 2400
On epoch 2500
Better MSE Loss 0.017847541719675064 achieved on epoch 2549
On epoch 2600
On epoch 2700
On e

In [341]:
model.load_state_dict(bestWeights)
total = 0
withinReason = 0
lessWithinReason = 0
outerBoundOfReason = 0
for batchIDX, (xTestBatch, yTestBatch) in enumerate(nnTestLoader):
    output = model(xTestBatch)
    for i, element in enumerate(output):
        element = element.detach().numpy()
        if(np.abs(yTestBatch[i]-element)<.5):
            veryClose += 1
        if(np.abs(yTestBatch[i]-element)<1):
            withinReason += 1
        if(np.abs(yTestBatch[i]-element)<2):
            lessWithinReason += 1
        if(np.abs(yTestBatch[i]-element)<3):
            outerBoundOfReason += 1
        total += 1
    loss = float(mse_loss(output, yTestBatch.reshape(-1, 1)))
mse = loss
rootMSE = np.sqrt(mse)
print("Mean Squared Error is: ", mse)
print("RMSE is: ", rootMSE)
print("Predicted CR values within +/- 1.00 of actual CR: ", str(withinReason/total))
print("Predicted CR values within +/- 2.00 of actual CR: ", str(lessWithinReason/total))
print("Predicted CR values within +/- 3.00 of actual CR: ", str(outerBoundOfReason/total))

Mean Squared Error is:  0.9510733485221863
RMSE is:  0.9752298952155775
Predicted CR values within +/- 1.00 of actual CR:  0.676948051948052
Predicted CR values within +/- 2.00 of actual CR:  0.8668831168831169
Predicted CR values within +/- 3.00 of actual CR:  0.9318181818181818


In [342]:
data = pd.read_csv("manyFeatures,1Predictor.csv")
for CR in data['CR'].unique():
    if(data['CR'][data['CR']==CR].count() < 2):
        print(CR, "Removed, less than two of this CR present")
        data = data[data['CR']!=CR]
    if(CR == 0):
        data = data[data['CR']!=CR]
x = data.drop(columns="CR")
y = data['CR']
xTrain, xTest, yTrain, yTest = model_selection.train_test_split(x, y,test_size=0.5,stratify=y)

26.0 Removed, less than two of this CR present


In [343]:
for i,column in enumerate(xTrain):
    colMin = xTrain[column].max()
    colMax = xTrain[column].min()
    if(colMin == colMax):
        xTrain[column] = xTrain[column]  * 0
        xTest[column] = xTest[column] * 0
    else:
        xTrain[column] = (xTrain[column] - colMin) / (colMax - colMin)
        xTest[column] = (xTest[column] - colMin) / (colMax - colMin)
nnTrainingSet = nnDataset(xTrain.values, yTrain.values)
nnTestingSet = nnDataset(xTest.values, yTest.values)
nnTrainLoader = DataLoader(nnTrainingSet, batch_size=16, shuffle=True)
nnTestLoader = DataLoader(nnTestingSet, batch_size=16, shuffle=False)

In [344]:
model = Net(nInputFeatures=xTrain.shape[1])
mse_loss = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001,weight_decay = 0.0001)

bestMSE = 10000000
bestWeights = None

Θ = 10000
timeSinceLastUpdate = 0
for θ in range(Θ):
    for batchIDX, (xBatch, yBatch) in enumerate(nnTrainLoader):
        output = model(xBatch)
        loss = mse_loss(output, yBatch.reshape(-1, 1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if(θ%100==0):
        print("On epoch", str(θ))
    timeSinceLastUpdate += 1
    if float(loss)<bestMSE:
        timeSinceLastUpdate = 0
        bestMSE = float(loss)
        bestWeights = copy.deepcopy(model.state_dict())
        print("Better MSE Loss {} achieved on epoch {}".format(loss,θ))
    if(timeSinceLastUpdate>=150 and bestMSE<0.005):
        print("No significant updates in 150 epochs and under loss threshold, breaking training")
        break


On epoch 0
Better MSE Loss 19.710920333862305 achieved on epoch 0
Better MSE Loss 7.7667694091796875 achieved on epoch 1
Better MSE Loss 7.532956600189209 achieved on epoch 4
Better MSE Loss 5.12923002243042 achieved on epoch 5
Better MSE Loss 4.258469104766846 achieved on epoch 7
Better MSE Loss 2.898838520050049 achieved on epoch 8
Better MSE Loss 0.9733448028564453 achieved on epoch 9
Better MSE Loss 0.7785572409629822 achieved on epoch 18
Better MSE Loss 0.39592117071151733 achieved on epoch 28
Better MSE Loss 0.1895531415939331 achieved on epoch 45
Better MSE Loss 0.17859749495983124 achieved on epoch 58
Better MSE Loss 0.1368197351694107 achieved on epoch 91
Better MSE Loss 0.10193615406751633 achieved on epoch 98
On epoch 100
Better MSE Loss 0.046958617866039276 achieved on epoch 106
Better MSE Loss 0.027489904314279556 achieved on epoch 107
On epoch 200
On epoch 300
Better MSE Loss 0.012501709163188934 achieved on epoch 322
On epoch 400
Better MSE Loss 0.009315232746303082 achi

In [345]:
model.load_state_dict(bestWeights)
total = 0
withinReason = 0
lessWithinReason = 0
outerBoundOfReason = 0
for batchIDX, (xTestBatch, yTestBatch) in enumerate(nnTestLoader):
    output = model(xTestBatch)
    for i, element in enumerate(output):
        element = element.detach().numpy()
        if(np.abs(yTestBatch[i]-element)<.5):
            veryClose += 1
        if(np.abs(yTestBatch[i]-element)<1):
            withinReason += 1
        if(np.abs(yTestBatch[i]-element)<2):
            lessWithinReason += 1
        if(np.abs(yTestBatch[i]-element)<3):
            outerBoundOfReason += 1
        total += 1
    loss = float(mse_loss(output, yTestBatch.reshape(-1, 1)))
mse = loss
rootMSE = np.sqrt(mse)
print("Mean Squared Error is: ", mse)
print("RMSE is: ", rootMSE)
print("Predicted CR values within +/- 1.00 of actual CR: ", str(withinReason/total))
print("Predicted CR values within +/- 2.00 of actual CR: ", str(lessWithinReason/total))
print("Predicted CR values within +/- 3.00 of actual CR: ", str(outerBoundOfReason/total))

Mean Squared Error is:  1.806218147277832
RMSE is:  1.3439561552661723
Predicted CR values within +/- 1.00 of actual CR:  0.685064935064935
Predicted CR values within +/- 2.00 of actual CR:  0.8717532467532467
Predicted CR values within +/- 3.00 of actual CR:  0.939935064935065
