In [1]:
import numpy as np
import pandas as pd
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from scipy.linalg import pinv2

In [2]:
def parse_line_by_line(filename: str) -> Tuple[dict, np.ndarray]:
    with open(filename) as f:
        data: List[str] = f.readlines()

    config = dict()
    array = []

    for line in data:
        if line.startswith("%"):
            v = line[1:].split("=")
            config[v[0]] = int(v[1].replace("\n", ""))
        else:
            array.append(list(map(float, line.split(" "))))
    return config, np.array(array)

In [3]:
config, array = parse_line_by_line("diabetes2.dt")
print(array.shape)

(768, 9)


In [4]:
train = pd.DataFrame(array[:576,:])
test = pd.DataFrame(array[576:,])

In [5]:
train.shape

(576, 9)

In [6]:
test.shape

(192, 9)

In [7]:
onehotencoder = OneHotEncoder(categories='auto')
scaler = StandardScaler()

X_train = scaler.fit_transform(train.values[:,:8])
y_train = onehotencoder.fit_transform(train.values[:,8:]).toarray()

X_test = scaler.fit_transform(test.values[:,:8])
y_test = onehotencoder.fit_transform(test.values[:,8:]).toarray()

In [8]:
X_test.shape

(192, 8)

In [9]:
input_size = X_train.shape[1]

In [10]:
hidden_size = 50

In [11]:
input_weights = np.random.normal(size=[input_size,hidden_size])
biases = np.random.normal(size=[hidden_size])

In [12]:
def relu(x):
    return np.maximum(x, 0, x)

In [13]:
def hidden_nodes(X):
    G = np.dot(X, input_weights)
    G = G + biases
    H = relu(G)
    return H

output_weights = np.dot(pinv2(hidden_nodes(X_train)), y_train)

In [14]:
def predict(X):
    out = hidden_nodes(X)
    out = np.dot(out, output_weights)
    return out

In [15]:
prediction = predict(X_test)
correct = 0
total = X_test.shape[0]


for i in range(total):
    predicted = np.argmax(prediction[i])
    actual = np.argmax(y_test[i])
    correct += 1 if predicted == actual else 0
accuracy = correct/total
print('Accuracy for ', hidden_size, ' hidden nodes: ', accuracy)

Accuracy for  50  hidden nodes:  0.7395833333333334
