# **Decision trees**

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import random
from sklearn.tree import export_graphviz
from IPython.display import SVG
!pip install graphviz
from graphviz import Source
from sklearn.tree import export_text

import matplotlib.pyplot as plt

In [None]:
bankNote = pd.read_csv('BankNote_Authentication.csv')
bankNote

In [None]:
feature_cols = ['variance','skewness','curtosis','entropy']
bankNote = bankNote.rename({'class':'genuine'}, axis=1)
bankNote.isnull().sum()

In [None]:
X = bankNote[feature_cols]
y = bankNote.genuine

from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
treeclf = DecisionTreeClassifier(max_depth=3, random_state=1)
treeclf.fit(X, y)

graph = Source(tree.export_graphviz(treeclf, out_file=None,
                                    feature_names=feature_cols,
                                    class_names=['0', '1'], filled = True))

svg = SVG(graph.pipe(format='svg'))
display(svg)

In [None]:
decision_rules = export_text(treeclf, feature_names=feature_cols)
print(decision_rules)

In [None]:
pd.DataFrame({'feature':feature_cols, 'importance':treeclf.feature_importances_})

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
treeclf = DecisionTreeClassifier(max_depth=5, random_state=1)
treeclf.fit(X, y)

graph = Source(tree.export_graphviz(treeclf, out_file=None,
                                    feature_names=feature_cols,
                                    class_names=['0', '1'], filled = True))

svg = SVG(graph.pipe(format='svg'))
display(svg)

In [None]:
decision_rules = export_text(treeclf, feature_names=feature_cols)
print(decision_rules)

In [None]:
pd.DataFrame({'feature':feature_cols, 'importance':treeclf.feature_importances_})

# **Neural networks**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
feature_cols = ['variance','skewness','curtosis','entropy']

X = bankNote[feature_cols]
y = bankNote.genuine

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
X.columns
y.unique()

In [None]:
scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(3),max_iter=500)

In [None]:
mlp.fit(X_train,y_train)

In [None]:
predictions = mlp.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, predictions)
print('Test accuracy:', accuracy)

In [None]:
print('This dataset has {} input nodes and {} output node(s)'.format(len(X.columns), len(y.unique())))
print('There are {} 2D arrays of coefficients, one for each layer'.format(len(mlp.coefs_)))
print('The layers have the following number of coefficients: {}')
for l in range(len(mlp.coefs_)):
    m = len(mlp.coefs_[l])
    n = len(mlp.coefs_[l][0])
    print('  {}: {}x{} ({} nodes feeding into a layer of {} nodes)'.format(l, m, n, m, n))
# Print the actual coefficients
# print(mlp.coefs_)

print()
print('There are {} 1D arrays of intercepts, one for each layer'.format(len(mlp.intercepts_)))
print('Each layer has {} intercepts, one for each node'.format([len(mlp.intercepts_[l]) for l,_ in enumerate(mlp.intercepts_)]))


In [None]:
import networkx as nx
import colorsys

mlp = MLPClassifier(hidden_layer_sizes=(3),max_iter=5000)
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))

def show_ann(mlp):
    hidden_layers_n = len(mlp.coefs_)-1
    layers_n = hidden_layers_n + 2
    input_neurons_n = len(mlp.coefs_[0])
    hidden_neurons_n = [len(mlp.coefs_[i+1]) for i in range(hidden_layers_n)]
    output_neurons_n = len(mlp.coefs_[-1][0])

    G = nx.DiGraph()
    pos = {}

    for i in range(input_neurons_n):
        pos['Layer0_{}'.format(i)] = (i,layers_n-1)

    for j in range(hidden_layers_n):
        prev_layer = j
        cur_layer = j+1
        if (j == 0):
            prev_size = input_neurons_n
        else:
            prev_size = hidden_neurons_n[j-1]
        for i in range(hidden_neurons_n[j]):
            pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
            for k in range(prev_size):
                w = mlp.coefs_[prev_layer][k][i]
                G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    prev_layer = hidden_layers_n
    cur_layer = hidden_layers_n+1
    for i in range(output_neurons_n):
        pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
        for k in range(hidden_neurons_n[-1]):
            w = mlp.coefs_[prev_layer][k][i]
            G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    edges = G.edges()
    colors = [colorsys.hsv_to_rgb(0 if G[u][v]['weight'] < 0 else 0.65,
                                  1,
                                  1) for u,v in edges]
    weights = [abs(G[u][v]['weight'])*2 for u,v in edges]

    nx.draw(G, pos, node_color='y', node_size=450, width=weights, edge_color=colors)
    
show_ann(mlp)

In [None]:
mlp1 = MLPClassifier(hidden_layer_sizes=(4, 4),max_iter=5000)

In [None]:

mlp1.fit(X_train,y_train)

In [None]:
predictions1 = mlp1.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predictions1))

In [None]:
print(classification_report(y_test,predictions1))

In [None]:
print('This dataset has {} input nodes and {} output node(s)'.format(len(X.columns), len(y.unique())))
print('There are {} 2D arrays of coefficients, one for each layer'.format(len(mlp1.coefs_)))
print('The layers have the following number of coefficients: {}')
for l in range(len(mlp1.coefs_)):
    m = len(mlp1.coefs_[l])
    n = len(mlp1.coefs_[l][0])
    print('  {}: {}x{} ({} nodes feeding into a layer of {} nodes)'.format(l, m, n, m, n))
# Print the actual coefficients
# print(mlp.coefs_)

print()
print('There are {} 1D arrays of intercepts, one for each layer'.format(len(mlp1.intercepts_)))
print('Each layer has {} intercepts, one for each node'.format([len(mlp1.intercepts_[l]) for l,_ in enumerate(mlp1.intercepts_)]))


In [None]:
import networkx as nx
import colorsys

mlp1 = MLPClassifier(hidden_layer_sizes=(4, 4),max_iter=5000)
mlp1.fit(X_train,y_train)
predictions1 = mlp1.predict(X_test)
print(confusion_matrix(y_test,predictions1))

def show_ann(mlp):
    hidden_layers_n = len(mlp1.coefs_)-1
    layers_n = hidden_layers_n + 2
    input_neurons_n = len(mlp1.coefs_[0])
    hidden_neurons_n = [len(mlp1.coefs_[i+1]) for i in range(hidden_layers_n)]
    output_neurons_n = len(mlp1.coefs_[-1][0])

    G = nx.DiGraph()
    pos = {}

    for i in range(input_neurons_n):
        pos['Layer0_{}'.format(i)] = (i,layers_n-1)

    for j in range(hidden_layers_n):
        prev_layer = j
        cur_layer = j+1
        if (j == 0):
            prev_size = input_neurons_n
        else:
            prev_size = hidden_neurons_n[j-1]
        for i in range(hidden_neurons_n[j]):
            pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
            for k in range(prev_size):
                w = mlp.coefs_[prev_layer][k][i]
                G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    prev_layer = hidden_layers_n
    cur_layer = hidden_layers_n+1
    for i in range(output_neurons_n):
        pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
        for k in range(hidden_neurons_n[-1]):
            w = mlp1.coefs_[prev_layer][k][i]
            G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    edges = G.edges()
    colors = [colorsys.hsv_to_rgb(0 if G[u][v]['weight'] < 0 else 0.65,
                                  1,
                                  1) for u,v in edges]
    weights = [abs(G[u][v]['weight'])*2 for u,v in edges]

    nx.draw(G, pos, node_color='y', node_size=450, width=weights, edge_color=colors)
    
show_ann(mlp1)

In [None]:
mlp2 = MLPClassifier(hidden_layer_sizes=(8, 4),max_iter=500)

In [None]:
mlp2.fit(X_train,y_train)

In [None]:
predictions2 = mlp2.predict(X_test)

In [None]:
print(confusion_matrix(y_test,predictions2))

In [None]:
print(classification_report(y_test,predictions2))

In [None]:
print('This dataset has {} input nodes and {} output node(s)'.format(len(X.columns), len(y.unique())))
print('There are {} 2D arrays of coefficients, one for each layer'.format(len(mlp2.coefs_)))
print('The layers have the following number of coefficients: {}')
for l in range(len(mlp2.coefs_)):
    m = len(mlp2.coefs_[l])
    n = len(mlp2.coefs_[l][0])
    print('  {}: {}x{} ({} nodes feeding into a layer of {} nodes)'.format(l, m, n, m, n))
# Print the actual coefficients
# print(mlp.coefs_)

print()
print('There are {} 1D arrays of intercepts, one for each layer'.format(len(mlp2.intercepts_)))
print('Each layer has {} intercepts, one for each node'.format([len(mlp2.intercepts_[l]) for l,_ in enumerate(mlp2.intercepts_)]))

In [None]:
import networkx as nx
import colorsys

mlp2 = MLPClassifier(hidden_layer_sizes=(8, 4),max_iter=5000)
mlp2.fit(X_train,y_train)
predictions2 = mlp2.predict(X_test)
print(confusion_matrix(y_test,predictions2))

def show_ann(mlp):
    hidden_layers_n = len(mlp2.coefs_)-1
    layers_n = hidden_layers_n + 2
    input_neurons_n = len(mlp2.coefs_[0])
    hidden_neurons_n = [len(mlp2.coefs_[i+1]) for i in range(hidden_layers_n)]
    output_neurons_n = len(mlp2.coefs_[-1][0])

    G = nx.DiGraph()
    pos = {}

    for i in range(input_neurons_n):
        pos['Layer0_{}'.format(i)] = (i,layers_n-1)

    for j in range(hidden_layers_n):
        prev_layer = j
        cur_layer = j+1
        if (j == 0):
            prev_size = input_neurons_n
        else:
            prev_size = hidden_neurons_n[j-1]
        for i in range(hidden_neurons_n[j]):
            pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
            for k in range(prev_size):
                w = mlp2.coefs_[prev_layer][k][i]
                G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    prev_layer = hidden_layers_n
    cur_layer = hidden_layers_n+1
    for i in range(output_neurons_n):
        pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
        for k in range(hidden_neurons_n[-1]):
            w = mlp2.coefs_[prev_layer][k][i]
            G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    edges = G.edges()
    colors = [colorsys.hsv_to_rgb(0 if G[u][v]['weight'] < 0 else 0.65,
                                  1,
                                  1) for u,v in edges]
    weights = [abs(G[u][v]['weight'])*2 for u,v in edges]

    nx.draw(G, pos, node_color='y', node_size=450, width=weights, edge_color=colors)
    
show_ann(mlp2)