In [1]:
import numpy as np
import pandas as pd
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn

from torch.optim import SGD
from torch.nn.functional import cross_entropy


import warnings
warnings.filterwarnings('ignore')

from sklearn.svm import SVC
from typing import List, Tuple
import matplotlib.pyplot as plt
from copy import deepcopy, copy

from sklearn.linear_model import LogisticRegression

from sklearn import tree
from sklearn.metrics import accuracy_score

In [2]:
def split_data(data_x: np.ndarray, data_y: np.ndarray):
    
    ordering = np.arange(data_x.shape[0])
    np.random.shuffle(ordering)
    data_x = data_x[ordering]
    data_y = data_y[ordering]
    
    valid_start = int(len(data_x) * 0.7)
    test_start = int(len(data_x) * 0.9)
    
    train_set = (data_x[:valid_start], data_y[:valid_start])
    valid_set = (data_x[valid_start:test_start], data_y[valid_start:test_start])
    test_set = (data_x[test_start:], data_y[test_start:])
    
    return train_set, valid_set, test_set

In [3]:
data = pd.read_csv('data.csv')

y = np.array(data['target'])
x = np.array(data.drop(['target'], axis=1))
train_set, valid_set, test_set = split_data(x,y)

x_train = train_set[0]
y_train = train_set[1]

x_val = valid_set[0]
y_val = valid_set[1]

x_test = test_set[0]
y_test = test_set[1]

print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

(2055, 136)
(587, 136)
(294, 136)


## Neural network model

In [4]:
input_dim = x_test.shape[1]
output_dim = 3 

class TheModelClass(nn.Module):
    def __init__(self, input_dim, hidden_dim1,hidden_dim2, output_dim):
        super(TheModelClass, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)        
        self.fc3 = nn.Linear(hidden_dim2, output_dim)
        self.dropout = nn.Dropout(0.9)
    
    def forward(self, x):   ## softmax, tanh, relu, sigmoid
        x = torch.tanh(self.fc1(x)) 
        x = self.dropout(x)
        x = torch.tanh(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# Bagging using decision tree, SVN and NN:

In [5]:
def create_bootstrap_data():
    bootstrap_ids = np.random.randint(0, len(x_train), size=len(x_train))
    return x_train[bootstrap_ids,:], y_train[bootstrap_ids]

In [6]:
def vote(test_data):
    
    output_DTC1 = model_DTC1.predict(test_data)
    output_DTC2 = model_DTC2.predict(test_data)
    output_DTC3 = model_DTC3.predict(test_data)
    output_DTC4 = model_DTC4.predict(test_data)
    output_DTC5 = model_DTC5.predict(test_data)
    
    output_SVN = model_SVC.predict(test_data)
    output_LR = model_LR.predict(test_data)
    
    x_test = torch.from_numpy(test_data).float()
    with torch.no_grad():
        output_NN = model(x_test)  
        output_NN = np.array(torch.argmax(output_NN, 1))
    
    
    output = [output_DTC1,output_DTC2,output_DTC3,output_DTC4,output_DTC5]
    for i in range(5):
        output.append(output_NN)
        output.append(output_SVN)
    output = np.array(output)
    
    predicted = []
    for i in range(len(test_data)):
        classified = output[:, i]
        counts = np.bincount(classified)
        predicted.append(np.argmax(counts))
    return predicted

In [7]:
# Decision Tree
bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_DTC1 = tree.DecisionTreeClassifier(max_depth=12, criterion='entropy', splitter='best')
model_DTC1.fit(bootstrap_set, bootstrap_labels)

bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_DTC2 = tree.DecisionTreeClassifier(max_depth=12, criterion='entropy', splitter='best')
model_DTC2.fit(bootstrap_set, bootstrap_labels)

bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_DTC3 = tree.DecisionTreeClassifier(max_depth=12, criterion='entropy', splitter='best')
model_DTC3.fit(bootstrap_set, bootstrap_labels)
    
bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_DTC4 = tree.DecisionTreeClassifier(max_depth=12, criterion='entropy', splitter='best')
model_DTC4.fit(bootstrap_set, bootstrap_labels)

bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_DTC5 = tree.DecisionTreeClassifier(max_depth=12, criterion='entropy', splitter='best')
model_DTC5.fit(bootstrap_set, bootstrap_labels)


# Support vector machines
bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_SVC = SVC(kernel ='rbf', C = 10, gamma =  0.1, coef0 = -10)
model_SVC.fit(x_train, y_train)
    
#Logistic regression
bootstrap_set, bootstrap_labels = create_bootstrap_data()
model_LR = LogisticRegression(solver = 'newton-cg',C=10)
model_LR.fit(bootstrap_set, bootstrap_labels)  
   
##Neural network model
model = TheModelClass(input_dim, 40,40,output_dim)
model.load_state_dict(torch.load("neural_network_model.h4"))

<All keys matched successfully>

In [8]:
output = vote(x_train)
target = y_train
accuracy = sum(output == target)/ len(target)
print("Accuracy on validation set: {:.2f} %".format(accuracy*100))

output = vote(x_val)
target = y_val
accuracy = sum(output == target)/ len(target)
print("Accuracy on validation set: {:.2f} %".format(accuracy*100))

output = vote(x_test)
target = y_test
accuracy = sum(output == target)/ len(target)
print("Accuracy on test set: {:.2f} %".format(accuracy*100))

Accuracy on validation set: 99.12 %
Accuracy on validation set: 88.25 %
Accuracy on test set: 93.54 %
