## Testing the implementation of the algorithm. ##

In [3]:
# Data Source - https://www.kaggle.com/lucidlenn/sloan-digital-sky-survey

# Classification of stars, galaxies and quasars.

import pandas as pd
import numpy as np
import scipy.optimize

#load the data
data = pd.read_csv("CosmosObject.csv")
data.head()

Unnamed: 0,objid,ra,dec,u,g,r,i,z,run,rerun,camcol,field,specobjid,redshift,plate,mjd,fiberid,class
0,1.24e+18,183.531326,0.089693,19.47406,17.0424,15.94699,15.50342,15.22531,752,301,4,267,3.72e+18,-9e-06,3306,54922,491,1
1,1.24e+18,183.598371,0.135285,18.6628,17.21449,16.67637,16.48922,16.3915,752,301,4,267,3.64e+17,-5.5e-05,323,51615,541,1
2,1.24e+18,183.680207,0.126185,19.38298,18.19169,17.47428,17.08732,16.80125,752,301,4,268,3.23e+17,0.123111,287,52023,513,2
3,1.24e+18,183.870529,0.049911,17.76536,16.60272,16.16116,15.98233,15.90438,752,301,4,269,3.72e+18,-0.000111,3306,54922,510,1
4,1.24e+18,183.883288,0.102557,17.55025,16.26342,16.43869,16.55492,16.61326,752,301,4,269,3.72e+18,0.00059,3306,54922,512,1


In [4]:
# Some feature selection

data.drop("objid", axis=1, inplace=True)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values.reshape(-1, 1)

In [5]:
# scale the features for better results
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train.shape, y_train.shape

((8000, 16), (8000, 1))

In [7]:
# import simple neural network
import nn

# try with default parameters and 1 hidden layer with 28 units
neural_net = nn.NeuralNetwork(28)
neural_net.fit(X_train, y_train)

Training Neural Network...
Minimizing Cost Function... this may take some time...
Training Done...


In [8]:
# predicting on train set to see how well it did 
preds = neural_net.predict(X_train)
np.mean(preds == y_train)

0.9015

In [9]:
# predicting on test set
test_preds = neural_net.predict(X_test)
np.mean(test_preds == y_test)

0.8795

### Testing with multiple hidden layers and some random parameters ###

In [10]:
neural_net_multiple = nn.NeuralNetwork(28, 28, 28, alpha=0.85, epsilon=0.3, maxiter=200)
neural_net_multiple.fit(X_train, y_train)

Training Neural Network...
Minimizing Cost Function... this may take some time...
Training Done...


In [11]:
# predicting on train set to see how well it did
# The performance might decrease because the training set is small 
multiple_preds = neural_net_multiple.predict(X_train)
np.mean(multiple_preds == y_train)

0.91075

In [12]:
# Nice, the performance actually increased
# predicting on test set
multiple_test_preds = neural_net_multiple.predict(X_test)
np.mean(multiple_test_preds == y_test)

0.894