In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.special

In [3]:
df = pd.read_csv('nasa.csv')
df = df.drop(['Neo Reference ID', 'Name', 'Est Dia in KM(min)', 'Est Dia in KM(max)', 'Est Dia in Feet(min)', 'Est Dia in Feet(max)', 'Est Dia in Miles(min)', 'Est Dia in Miles(max)', 'Equinox', 'Relative Velocity km per hr', 'Miss Dist.(Astronomical)', 'Miss Dist.(kilometers)', 'Miss Dist.(miles)', 'Orbiting Body', 'Orbit ID', 'Orbit Determination Date', 'Close Approach Date'], axis=1)

In [4]:
df = df.astype('float')
df = (df-df.min())/(df.max()-df.min())* .99998 + .00001
df = df.sample(frac=1).reset_index(drop=True)

In [5]:
tr_s = int(df.shape[0]*0.7)
df_train = df.iloc[:tr_s]
df_valid = df.iloc[tr_s:]

In [6]:
class Net:
    
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        self.inodes = input_nodes
        self.hnodes = hidden_nodes
        self.onodes = output_nodes
        self.lr = learning_rate
        self.wih = (np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes,self.inodes)))
        self.who = (np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes,self.hnodes)))
        
        self.activation = lambda x: scipy.special.expit(x)
        pass
    
    def train(self, inputs_list, targets_list):
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation(hidden_inputs)
        final_inputs = np.dot(self.who, hidden_outputs)
        final_outputs = self.activation(final_inputs)
        
        output_errors = targets - final_outputs
        hidden_errors = np.dot(self.who.T, output_errors)
        self.who += self.lr*np.dot((output_errors*final_outputs*(1-final_outputs)),np.transpose(hidden_outputs))
        self.wih += self.lr*np.dot((hidden_errors*hidden_outputs*(1-hidden_outputs)),np.transpose(inputs))
        
        pass
    
    def query(self, inputs_list):
        inputs = np.array(inputs_list, ndmin=2).T
        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation(hidden_inputs)
        final_inputs = np.dot(self.who, hidden_outputs)
        final_outputs = self.activation(final_inputs)
        
        return final_outputs

In [6]:
df_train

Unnamed: 0,Absolute Magnitude,Est Dia in M(min),Est Dia in M(max),Epoch Date Close Approach,Relative Velocity km per sec,Miles per hour,Miss Dist.(lunar),Orbit Uncertainity,Minimum Orbit Intersection,Jupiter Tisserand Invariant,...,Inclination,Asc Node Longitude,Orbital Period,Perihelion Distance,Perihelion Arg,Aphelion Dist,Perihelion Time,Mean Anomaly,Mean Motion,Hazardous
0,0.680035,0.001364,0.001364,0.143172,0.255560,0.255560,0.536013,0.777772,0.006279,0.684870,...,0.012065,0.191878,0.026368,0.267366,0.651918,0.057741,0.905629,0.951385,0.609845,0.00001
1,0.336202,0.039031,0.039031,0.917179,0.367049,0.367049,0.796172,0.000010,0.812645,0.475766,...,0.381719,0.261971,0.061285,0.418655,0.541187,0.098434,0.921708,0.634025,0.393308,0.00001
2,0.422160,0.017007,0.017007,0.616458,0.221767,0.221767,0.446246,0.111119,0.150386,0.311177,...,0.189851,0.064485,0.128631,0.578255,0.752153,0.179505,0.915741,0.852276,0.222813,0.00001
3,0.555873,0.004644,0.004644,0.452214,0.493063,0.493063,0.684993,0.777772,0.242951,0.675498,...,0.166795,0.516955,0.027898,0.330062,0.969288,0.051360,0.900786,0.099164,0.595963,0.00001
4,0.288447,0.061892,0.061892,0.344527,0.229677,0.229677,0.153349,0.000010,0.069434,0.611508,...,0.062337,0.866774,0.037648,0.386211,0.229401,0.061409,0.911707,0.795616,0.519689,0.99999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3275,0.383956,0.024607,0.024607,0.919199,0.280259,0.280259,0.927112,0.000010,0.400342,0.264465,...,0.340062,0.352431,0.159531,0.793734,0.679845,0.190720,0.875811,0.303284,0.182314,0.00001
3276,0.536771,0.005595,0.005595,0.252368,0.515987,0.515987,0.964692,0.111119,0.099379,0.552203,...,0.308457,0.791248,0.045428,0.340252,0.854980,0.082434,0.920639,0.595323,0.470732,0.00001
3277,0.407834,0.019534,0.019534,0.347430,0.102401,0.102401,0.582877,0.000010,0.610278,0.346467,...,0.160456,0.679323,0.118698,0.920848,0.245630,0.113983,0.872884,0.418628,0.239096,0.00001
3278,0.613178,0.002649,0.002649,0.786515,0.201041,0.201041,0.103685,0.777772,0.056629,0.138681,...,0.086565,0.319600,0.329243,0.757756,0.966995,0.403736,0.838685,0.382928,0.079379,0.00001


In [14]:
n = Net(22, 20, 1, 0.5)

In [48]:
for j in range(100):
    for i in range(400):
        n.train(df_train.drop(['Hazardous'], axis=1).iloc[i], df_train['Hazardous'][i])

In [37]:
n.query(df_valid.drop(['Hazardous'], axis=1).iloc[3])

array([[0.00365738]])

In [25]:
df_valid['Hazardous'].iloc[3]

1e-05

In [49]:
cnt = 0
for sample in df_valid.iloc:
    ans = n.query(sample.drop(['Hazardous']))[0][0]
    target = sample['Hazardous']
    if (ans >= 0.5) & (target > 0.5):
        cnt += 1
print(cnt)

172


In [50]:
cnt/df_valid[df_valid['Hazardous']>0.5].shape[0]

0.8113207547169812

In [1]:
df['Hazardous'].count()

NameError: name 'df' is not defined