In [171]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold

def getData(filePath):
    data = np.genfromtxt(filePath, delimiter=',')
    x, y = np.array(data[:,0:-1], dtype=float), np.array(data[:,-1],dtype=int)
    y = y.reshape(1,len(y)).T
    return x,y

def splitInputOutput(data):
    x, y = np.array(data[:,0:-1], dtype=float), np.array(data[:,-1],dtype=int)
    y = y.reshape(1,len(y)).T
    return x,y

def sigmoid(x):
    return 1/(1+np.exp(-x))

def make_sigmoid_prime(x):
    return x*(1-x)

def trainNeuralNet(synapse0, synapse1, epochs):
    for j in range(epochs):
        l1 = sigmoid(np.dot(X,synapse0))
        l2 = sigmoid(np.dot(l1,synapse1))
        l2_delta = (y - l2)*make_sigmoid_prime(l2)
        l1_delta = l2_delta.dot(synapse1.T) * make_sigmoid_prime(l1)
        synapse1 += l1.T.dot(l2_delta) #adjust our synapses up or down as necessary
        synapse0 += X.T.dot(l1_delta)


Test our neural network trainer against a simple dataset: X will contain binary tuples and Y will be the XOR result of rows in X.

In [172]:
# trivial dataset
X, y = getData('data/prepared/trivial.csv')

# X = np.array([ [0,0],[0,1],[1,0],[1,1] ])
# y = np.array([[0,1,1,0]]).T # XOR(X)

np.random.seed(seed=42)
syn0 = 2*np.random.random((X.shape[1],X.shape[0])) - 1
syn1 = 2*np.random.random((y.shape[0],y.shape[1])) - 1
epochs = 1000

trainNeuralNet(syn0, syn1, epochs)

layer1_transform = sigmoid(np.dot(X,syn0))
result = sigmoid(np.dot(layer1_transform,syn1))

print("MSE: ",0.5*np.sum((y - result)**2))
print("Output of predicted y (2nd and 3rd rows should be 1): ",result)


MSE:  0.0114725639859
Output of predicted y (2nd and 3rd rows should be 1):  [[ 0.09335549]
 [ 0.95700552]
 [ 0.91362687]
 [ 0.0701501 ]]


Looks good.  Now lets load our accute inflamation dataset.

In [173]:
# X,y = getData('data/prepared/dataWithTemp.csv')

df = pd.read_csv('data/prepared/dataWithTemp.csv',sep=',',names=["Temp", "Nausea", "Lumbar", "Pushing","Micturition","Burning","BladderInflamation"]);
df["Temp"] = df.transform(lambda x: x - 37)

X,y = splitInputOutput(df.as_matrix())

kf = KFold(n_splits=5,random_state=None, shuffle=True)


for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    np.random.seed(seed=42)
    syn0 = 2*np.random.random((X_train.shape[1],X_train.shape[0])) - 1
    syn1 = 2*np.random.random((y_train.shape[0],y_train.shape[1])) - 1
    epochs = 10000

    trainNeuralNet(syn0, syn1, epochs)

    layer1_transform = sigmoid(np.dot(X_test,syn0))
    result = sigmoid(np.dot(layer1_transform,syn1))
    print("MSE: ",0.5*np.sum((y_test - result)**2))


MSE:  5.39454610192e-07
MSE:  5.16238558606e-06
MSE:  3.31410895478e-06
MSE:  3.60183408064e-07
MSE:  7.61713710947e-06
