In [13]:
import pandas as pd
import numpy as np
import csv
colNames = ['class', 'ra', 'dec', 'u', 'g', 'r', 'i', 'z', 'nuv_mag']
dataSet = pd.read_csv('../GALEX_data-extended-feats.csv', usecols = colNames)
layers = [8, 24, 3]
dataSet = pd.DataFrame(dataSet)

In [14]:
def sig(z):
    return (1)/(1 + np.exp(-z))

def sig_prime(z):
    return sig(z)*(1-sig(z))

def tanH(z):
    return np.tanh(z)

def tanH_prime(z):
    return 1 - ((tanH(z))**2)


In [15]:
# (np.sqrt(2/(layers[0] + layers[1])))
weightsHidden = np.array(np.random.randn(layers[1], layers[0]))*(np.sqrt(2/(layers[1] + layers[0])))
# (np.sqrt(2/(layers[1] + layers[2])))
biasHidden = np.zeros(layers[1])
weightsOut = np.array(np.random.randn(layers[2], layers[1]))*(np.sqrt(2/(layers[1] + layers[2])))
biasOut = np.zeros(layers[2])
testDf = dataSet.sample(frac = 0.2, random_state = 3)

In [16]:
def expec(inp):
    if inp == 0:
        return np.array([1, 0, 0])
    elif inp == 1:
        return np.array([0, 1, 0])
    else:
        return np.array([0, 0, 1])

def feedFor(trainDf, row, weightsHidden, biasHidden, weightsOut, biasOut):
    x = np.array(trainDf.iloc[row, 1:])
    tempDot = np.array(np.dot(weightsHidden, x.T)) # 20 x 1
    hiddenLI = np.add(tempDot, biasHidden) # 1 x 20
    hiddenAct = tanH(hiddenLI) # 1 x 20
    tempDot2 = np.dot(weightsOut, hiddenAct)
    outputLI = np.add(tempDot2, biasOut.T)
    output = tanH(outputLI)
#     print(output)
    return x, output, hiddenAct

def backProp(trainDf, layers, x, row, output, hiddenAct, weightsHidden, biasHidden, weightsOut, biasOut, eta):
    actClass = int(trainDf.iloc[row, 0])
    expected = expec(actClass)
    error = expected - output
    slopeOutput = tanH_prime(np.array(output))
    slopeHL = tanH_prime(np.array(hiddenAct))
    deltaO = error*slopeOutput
    errHL = np.dot(weightsOut.T, deltaO)
    deltaHL = errHL*slopeHL
    
    deltaO = np.reshape(deltaO, (3, 1))
    hiddenAct = np.reshape(hiddenAct, (layers[1], 1))
    tempDot = np.dot(deltaO, hiddenAct.T)*(eta) # 3 x 20
    weightsOut += tempDot
    
    deltaHL = np.reshape(deltaHL, (layers[1], 1))
    x = np.reshape(x, (8, 1))
    tempDot2 = np.dot(deltaHL, x.T)*(eta)
    weightsHidden += tempDot2
    biasHidden += np.sum(deltaHL, axis = 0)*(eta)
    biasOut += np.sum(deltaO, axis = 0)*(eta)


def runNN(layers, trainDf, weightsHidden, biasHidden, weightsOut, biasOut, eta):
    for row in range(len(trainDf)):
        x, output, hiddenAct = feedFor(trainDf, row, weightsHidden, biasHidden, weightsOut, biasOut)
        backProp(trainDf, layers, x, row, output, hiddenAct, weightsHidden, biasHidden, weightsOut, biasOut, eta)

def accu():
    correct = 0
    for row in range(len(testDf)):
        actClass = int(testDf.iloc[row, 0])
        x = np.array(testDf.iloc[row, 1:])
        tempDot = np.array(np.dot(weightsHidden, x.T)) # 20 x 1
        hiddenLI = np.add(tempDot, biasHidden) # 1 x 20
        hiddenAct = tanH(hiddenLI) # 1 x 20
        tempDot2 = np.dot(weightsOut, hiddenAct)
        outputLI = np.add(tempDot2, biasOut.T)
        output = tanH(outputLI)
        expected = expec(actClass)
        if np.argmax(expected) == np.argmax(output):
            #print("Instance classified as: ", np.argmax(output))
            correct += 1
        else:
            pass
            #print("Instance misclassified as: ", np.argmax(output))
        #print(output)
    return correct/len(testDf)

filename = "accuracy_0.001.csv"
fields = ['Training Data', 'Accuracy']
with open(filename, 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(fields) 
    eta = 0.001
    lenDf = 0
    for i in range(100):
        trainDf = dataSet.sample(frac = 0.8)
        lenDf += len(trainDf)
        runNN(layers, trainDf, weightsHidden, biasHidden, weightsOut, biasOut, eta)
        acc = accu()
        newRow = [lenDf, acc]
        csvwriter.writerow(newRow)
        print(i, acc)



0 0.6573468173706127
1 0.6573468173706127
2 0.6668649613325401
3 0.7727543129089828
4 0.6627007733491969
5 0.7043426531826293
6 0.819155264723379
7 0.6674598453301607
8 0.8054729327781083
9 0.7566924449732302
10 0.8072575847709696
11 0.8447352766210589
12 0.8227245687091017
13 0.6989886972040452
14 0.8453301606186794
15 0.792385484830458
16 0.7834622248661511
17 0.6972040452111838
18 0.8340273646638905
19 0.8399762046400951
20 0.7787031528851874
21 0.8465199286139203
22 0.6591314693634741
23 0.7798929208804283
24 0.7917906008328376
25 0.834622248661511
26 0.8465199286139203
27 0.842355740630577
28 0.8274836406900654
29 0.8477096966091612
30 0.8316478286734087
31 0.6585365853658537
32 0.7287328970850684
33 0.7900059488399762
34 0.8465199286139203
35 0.8483045806067817
36 0.8364069006543724
37 0.8471148126115408
38 0.8245092207019631
39 0.784651992861392
40 0.8447352766210589
41 0.8352171326591314
42 0.7906008328375966
43 0.8387864366448543
44 0.8453301606186794
45 0.8494943486020226
46 