In [1]:
import pandas as pd
import numpy as np
import csv
colNames = ['class', 'ra'
            , 'dec', 'u', 'g', 'r', 'i', 'z', 'nuv_mag']

dataSet = pd.read_csv('../GALEX_data-extended-feats.csv', usecols = colNames)
layers = [8, 32, 3]
dataSet = pd.DataFrame(dataSet)

In [2]:
def sig(z):
    return (1)/(1 + np.exp(-z))

def sig_prime(z):
    return sig(z)*(1-sig(z))

def tanH(z):
    return np.tanh(z)

def tanH_prime(z):
    return 1 - ((tanH(z))**2)

In [3]:
weightsHidden = np.array(np.random.randn(layers[1], layers[0]))*(np.sqrt(2/(layers[1] + layers[0])))
biasHidden = np.zeros(layers[1])
weightsOut = np.array(np.random.randn(layers[2], layers[1]))*(np.sqrt(2/(layers[1] + layers[2])))
biasOut = np.zeros(layers[2])
testDf = dataSet.sample(frac = 0.2, random_state = 3)

In [4]:
def expec(inp):
    if inp == 0:
        return np.array([1, 0, 0])
    elif inp == 1:
        return np.array([0, 1, 0])
    else:
        return np.array([0, 0, 1])

def feedFor(trainDf, row, weightsHidden, biasHidden, weightsOut, biasOut):
    x = np.array(trainDf.iloc[row, 1:])
    tempDot = np.array(np.dot(weightsHidden, x.T)) # 20 x 1
    hiddenLI = np.add(tempDot, biasHidden) # 1 x 20
    hiddenAct = tanH(hiddenLI) # 1 x 20
    tempDot2 = np.dot(weightsOut, hiddenAct)
    outputLI = np.add(tempDot2, biasOut.T)
    output = tanH(outputLI)
#     print(output)
    return x, output, hiddenAct

def backProp(trainDf, layers, x, row, output, hiddenAct, weightsHidden, biasHidden, weightsOut, biasOut, eta):
    actClass = int(trainDf.iloc[row, 0])
    expected = expec(actClass)
    error = expected - output
    slopeOutput = tanH_prime(np.array(output))
    slopeHL = tanH_prime(np.array(hiddenAct))
    deltaO = error*slopeOutput
    errHL = np.dot(weightsOut.T, deltaO)
    deltaHL = errHL*slopeHL
    
    deltaO = np.reshape(deltaO, (3, 1))
    hiddenAct = np.reshape(hiddenAct, (layers[1], 1))
    tempDot = np.dot(deltaO, hiddenAct.T)*(eta) # 3 x 20
    weightsOut += tempDot
    
    deltaHL = np.reshape(deltaHL, (layers[1], 1))
    x = np.reshape(x, (8, 1))
    tempDot2 = np.dot(deltaHL, x.T)*(eta)
    weightsHidden += tempDot2
    biasHidden += np.sum(deltaHL, axis = 0)*(eta)
    biasOut += np.sum(deltaO, axis = 0)*(eta)


def runNN(layers, trainDf, weightsHidden, biasHidden, weightsOut, biasOut, eta):
    for row in range(len(trainDf)):
        x, output, hiddenAct = feedFor(trainDf, row, weightsHidden, biasHidden, weightsOut, biasOut)
        backProp(trainDf, layers, x, row, output, hiddenAct, weightsHidden, biasHidden, weightsOut, biasOut, eta)

def accu():
    correct = 0
    for row in range(len(testDf)):
        actClass = int(testDf.iloc[row, 0])
        x = np.array(testDf.iloc[row, 1:])
        tempDot = np.array(np.dot(weightsHidden, x.T)) # 20 x 1
        hiddenLI = np.add(tempDot, biasHidden) # 1 x 20
        hiddenAct = tanH(hiddenLI) # 1 x 20
        tempDot2 = np.dot(weightsOut, hiddenAct)
        outputLI = np.add(tempDot2, biasOut.T)
        output = tanH(outputLI)
        expected = expec(actClass)
        if np.argmax(expected) == np.argmax(output):
            #print("Instance classified as: ", np.argmax(output))
            correct += 1
        else:
            pass
            #print("Instance misclassified as: ", np.argmax(output))
        #print(output)
    return correct/len(testDf)

filename = "accuracy_0.01_32.csv"
fields = ['Training Data', 'Accuracy']
with open(filename, 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(fields) 
    eta = 0.01
    lenDf = 0
    for i in range(100):
        trainDf = dataSet.sample(frac = 0.8)
        lenDf += len(trainDf)
        runNN(layers, trainDf, weightsHidden, biasHidden, weightsOut, biasOut, eta)
        acc = accu()
        newRow = [lenDf, acc]
        csvwriter.writerow(newRow)
        print(i, acc)



  


0 0.6573468173706127
1 0.6573468173706127
2 0.6573468173706127
3 0.6686496133254015
4 0.669244497323022
5 0.6573468173706127
6 0.5080309339678762
7 0.60261748958953
8 0.669244497323022
9 0.6591314693634741
10 0.6103509815585961
11 0.6793575252825699
12 0.5074360499702558
13 0.5883402736466389
14 0.6627007733491969
15 0.4277215942891136
16 0.6995835812016656
17 0.5490779298036883
18 0.6983938132064248
19 0.5687091017251636
20 0.702558001189768
21 0.5431290898274836
22 0.6591314693634741
23 0.7150505651397977
24 0.37775133848899467
25 0.27483640690065436
26 0.709101725163593
27 0.5443188578227246
28 0.7085068411659726
29 0.17906008328375966
30 0.7001784651992862
31 0.31469363474122547
32 0.7096966091612136
33 0.6787626412849495
34 0.5687091017251636
35 0.4681737061273052
36 0.6966091612135633
37 0.635930993456276
38 0.7120761451516954
39 0.6811421772754312
40 0.7108863771564545
41 0.6650803093396788
42 0.5395597858417609
43 0.27900059488399764
44 0.43723973825104107
45 0.5800118976799524

In [16]:
np.save("wH", weightsHidden)
np.save("bH", biasHidden)
np.save("wO", weightsOut)
np.save("bO", biasOut)