In [None]:
# random forest classifier model
import pandas as pd
pd.options.display.max_columns = None
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import cufflinks as cf

# random forest classifier plots
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd

# neural network model
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

# neural network plots
import matplotlib.pyplot as plt
import pydot
import pydotplus
import graphviz
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from ann_visualizer.visualize import ann_viz

# Random Forest Classifier model

In [None]:
# load data and create data frame
data = pd.read_csv('dataSet.csv')
df = pd.DataFrame(data)

# transform column label into numbers
#     - male:   0
#     - female: 1
df.replace('male', 0, inplace=True)
df.replace('female', 1, inplace=True)

In [None]:
# prepare training values:
#     - x: what we know
#     - y: what we want to know
x = df.drop('label', axis=1)
y = df['label']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

# create random fores classifier
rfc = RandomForestClassifier(n_estimators=100)

# train rfc
rfc.fit(x_train, y_train)

# print score
score = rfc.score(x_test, y_test)
print('{}%'.format(round(score*100, 2)))

In [None]:
# make predictions
prediction = rfc.predict([[
    0.2022728,
    0.04060666,
    0.2129694,
    0.1821243,
    0.227241,
    0.04511674,
    3.040879,
    17.07277,
    0.8827420,
    0.2635666,
    0.1200658,
    0.2022728,
    0.1497998,
    0.04319295,
    0.2791139,
    0.3374789,
    0,
    1.593457,
    1.593457,
    0.11383929
]])
print("M" if prediction[0]==0 else "F")

# Random Forest Classifier plots

### Plot de la meanfun (s'hauria de fer millor)

In [None]:
# meanfun, IQR, Q25, sp.ent
male = df.query("label == '0'")
female = df.query("label == '1'")
x = male['meanfun']
y = female['meanfun']
df3 = pd.DataFrame({
    'x': x,
    'y': y,
})
df3.iplot(colorscale='rdylbu')

### Epic Cloud 3D Plot

In [None]:
data = pd.read_csv('dataSet.csv')
df = pd.DataFrame(data)
df.replace('male', 0, inplace=True)
df.replace('female', 1, inplace=True)

data = []
clusters = []
colors = ['rgb(139,229,128)','rgb(255,184,122)']

for i in range(len(df['label'].unique())):
    name = df['label'].unique()[i]
    color = colors[i]
    x = df[df['label']==name]['meanfun']
    y = df[df['label']==name]['IQR']
    z = df[df['label']==name]['Q25']
    trace = dict(
        name=name,
        x=x, y=y, z=z,
        type = "scatter3d",    
        mode = 'markers',
        marker = dict(size=2, color=color, line=dict(width=0))
    )
    data.append(trace)

layout = dict(
    width=800,
    height=550,
    autosize=False,
    title='Clouds'
)

fig = dict(data=data, layout=layout)

py.iplot(fig, validate=False)

# Neural network model

In [None]:
# create model
model = Sequential()
model.add(Dense(32, activation='softsign', input_dim=20))
model.add(Dense(32, activation='softsign'))
model.add(Dense(32, activation='softsign'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# create training values
df = pd.read_csv("dataSet.csv")
x = df.drop('label', axis=1).values
df.replace('male',0, inplace=True)
df.replace('female',1, inplace=True)
c = df['label'].values

# train and save training info so then we are able to plot cool stuff
history = model.fit(x,c, epochs=100, batch_size=10, validation_data=(x_test, y_test))
loss, acc = model.evaluate(x_test, y_test, batch_size=128)

In [None]:
# predict from values
xnew = np.array([[0.1984445, 0.06684052, 0.2157356, 0.1375283, 0.264536, 0.1270077, 3.38914, 20.50335, 0.8929154, 0.3376926, 0.1200362, 0.1984445, 0.1396227, 0.04349112, 0.2791139, 0.4190832, 0, 5.081836, 5.081836, 0.07727807]])
ynew = model.predict_classes(xnew)
print("X=%s, Predicted=%s" % (xnew[0], ynew[0]))

# Neural network plots

### Show layers

In [None]:
# show layer schema
SVG(model_to_dot(model, show_shapes=True, show_layer_names=False).create(prog='dot', format='svg'))

### Plot training/validation acuracy and loss values

In [None]:
# accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

### Plot connections

In [None]:
ann_viz(model, view=True, filename="network.gv", title="Neural Network Connections")