In [50]:
# For neural networks.
import keras as kr
# For data frames.
import pandas as pd
# For numerical arrays.
import numpy as np
# For preprocessing data.
import sklearn.preprocessing as pre
# For splitting data sets.
import sklearn.model_selection as mod
# For whitening.
import sklearn.decomposition as dec

In [51]:
df = pd.read_csv('data/dog_data.csv')
df

Unnamed: 0,pure,age,weight,height,lifespan
0,True,3,7.94,49.6,15
1,True,7,5.05,63.5,17
2,True,6,16.15,68.2,11
3,False,3,15.50,64.7,12
4,False,3,12.17,59.7,14
...,...,...,...,...,...
95,False,4,20.76,52.2,10
96,False,4,17.40,47.5,11
97,False,6,15.60,51.4,12
98,False,3,8.37,54.1,16


Pure is a categorical data point, with True or False option. So we need to transform this into qualitative data before applying neural network. With more than two categorical variables, I can choose one values to encode as either 1 or 0, and I can do that here too. 

In [52]:
df['ispure'] = (df['pure']==True).astype(np.int)
df['isnotpure']= (df['pure']==False).astype(np.int)

In [53]:
df

Unnamed: 0,pure,age,weight,height,lifespan,ispure,isnotpure
0,True,3,7.94,49.6,15,1,0
1,True,7,5.05,63.5,17,1,0
2,True,6,16.15,68.2,11,1,0
3,False,3,15.50,64.7,12,0,1
4,False,3,12.17,59.7,14,0,1
...,...,...,...,...,...,...,...
95,False,4,20.76,52.2,10,0,1
96,False,4,17.40,47.5,11,0,1
97,False,6,15.60,51.4,12,0,1
98,False,3,8.37,54.1,16,0,1


In [54]:
pureenc = pre.LabelBinarizer()
pureenc.fit(df['pure'])
df['pureenc'] = pureenc.transform(df['pure'])

In [85]:
x = df[['ispure', 'isnotpure','age', 'weight', 'height']]
y = df['lifespan'] #Outputs for training my neural network

In [86]:
x #Inputs for training my neural network

Unnamed: 0,ispure,isnotpure,age,weight,height
0,1,0,3,7.94,49.6
1,1,0,7,5.05,63.5
2,1,0,6,16.15,68.2
3,0,1,3,15.50,64.7
4,0,1,3,12.17,59.7
...,...,...,...,...,...
95,0,1,4,20.76,52.2
96,0,1,4,17.40,47.5
97,0,1,6,15.60,51.4
98,0,1,3,8.37,54.1


In [87]:
m = kr.models.Sequential()

In [98]:
# dim is 5 because I'm inputting 5 variables
m.add(kr.layers.Dense(10, input_dim=5, activation="linear"))

m.add(kr.layers.Dense(10, activation="linear"))
m.add(kr.layers.Dense(1, activation="linear"))
m.compile(loss='mean_squared_error', optimizer = 'adam')

In [99]:
m.fit(x, y, epochs = 10, batch_size = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1dac2fd2d30>

In [100]:
predict = m.predict(x) # an array

In [101]:
actual_output = y.as_matrix()

  """Entry point for launching an IPython kernel.


In [102]:
#Calculating the Mean Squared Error
np.sqrt(np.sum((predict.T - actual_output)**2))

19.52973912554437

In [103]:
actual_output

array([15, 17, 11, 12, 14, 12, 15, 11, 13, 10, 12, 17, 10, 14, 12, 11, 16,
       12, 11, 10, 12, 11, 15, 11, 11, 14, 15, 11, 17, 10, 10, 11, 11, 19,
        8, 13, 16, 14, 10, 12, 12, 14, 10, 13, 14, 11, 15, 10, 15, 12, 13,
       12, 10, 11, 10, 11, 10, 14, 10, 11, 11, 11, 13, 19, 14, 14,  8, 12,
       11, 11, 11, 16, 13, 12, 12, 18, 12, 16, 11,  8,  8, 12, 18,  9, 16,
       13,  9, 16,  9, 11, 16, 13,  8, 14, 15, 10, 11, 12, 16, 14],
      dtype=int64)

In [105]:
#Predicted outputs
np.round(m.predict(x).T)

array([[14., 14., 13., 13., 13., 12., 14., 12., 13., 12., 13., 14., 12.,
        13., 12., 13., 14., 13., 12., 12., 12., 12., 14., 12., 12., 13.,
        13., 13., 13., 12., 12., 12., 12., 14., 12., 12., 14., 13., 12.,
        12., 12., 13., 12., 14., 13., 13., 13., 12., 13., 12., 13., 12.,
        12., 12., 12., 13., 12., 13., 12., 12., 12., 13., 13., 14., 13.,
        14.,  9., 13., 13., 12., 12., 14., 12., 12., 12., 14., 12., 13.,
        12., 12., 12., 13., 14., 12., 14., 14., 12., 14., 12., 12., 14.,
        14., 12., 13., 13., 12., 12., 13., 14., 14.]], dtype=float32)