In [91]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.metrics import confusion_matrix, accuracy_score
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader 
import copy

import xgboost as xgb
import lightgbm as lgbm
import catboost as cbt
from sklearn_rvm import EMRVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from datetime import datetime

# Load Dataset

In [220]:
# random_seed
# rand_seed = 1024 
# rand_seed = 2048 
rand_seed = 4096

# Select Dataset

# dataset_name = 'diabetes'
# dataset_name = 'breast_cancer'
dataset_name = 'digits'
# dataset_name = 'iris'

if dataset_name == 'diabetes':
    df = pd.read_csv('diabetes.csv')
    X = df.iloc[:,:-1].values   
    y = df.iloc[:,-1].values    
elif dataset_name == 'breast_cancer':
    breast_cancer = datasets.load_breast_cancer()
    X = breast_cancer.data
    y = breast_cancer.target

elif dataset_name == 'digits':
    digits = datasets.load_digits()
    X = digits.data
    y = digits.target
elif dataset_name == 'iris':
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target
else:
    pass



In [221]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [222]:
print("X shape")
print(X.shape)
print("Y shape")
print(y.shape)

X shape
(150, 4)
Y shape
(150,)


In [223]:
# split the dataset
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=rand_seed, train_size=0.8)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, random_state=rand_seed, train_size=0.9)

In [224]:
# scaling by minmax scaler
scaler = MinMaxScaler()

scaler = scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
x_valid = scaler.transform(x_valid)

# SVM - Linear

In [225]:
param_grid = [
    {'C': [1.0, 2.0, 3.0, 10., 30., 100., 300.]},
]

elapsed_time_linear_svm = []


svm_classifier = SVC(kernel='linear')

# svm_classifier = svm_classifier.fit(x_train, y_train)
# y_pred = svm_classifier.predict(x_test)

start_time = datetime.now()
grid_search = GridSearchCV(svm_classifier, param_grid, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_svm_classifier = grid_search.fit(x_train, y_train)
elapsed_time_linear_svm.append((datetime.now()-start_time).total_seconds())

start_time = datetime.now()
y_pred = best_svm_classifier.predict(x_test)
elapsed_time_linear_svm.append((datetime.now()-start_time).total_seconds())

acc_svm_linear = accuracy_score(y_test, y_pred)


Fitting 7 folds for each of 7 candidates, totalling 49 fits
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=1.0; total time=   0.0s
[CV] END ..............................................C=2.0; total time=   0.0s
[CV] END ..............................................C=2.0; total time=   0.0s
[CV] END ..............................................C=2.0; total time=   0.0s
[CV] END ..............................................C=2.0; total time=   0.0s
[CV] END ........................................

In [226]:

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print('Best Prameters ', grid_search.best_params_)
print('Accuracy ', acc_svm_linear)
print('Elapsed Time(train, test) ', elapsed_time_linear_svm)

Confusion Matrix
 [[ 9  0  0]
 [ 0 13  1]
 [ 0  0  7]]
Best Prameters  {'C': 10.0}
Accuracy  0.9666666666666667
Elapsed Time(train, test)  [0.086868, 0.0]


# SVM - Kernel

In [227]:
param_grid = [
    # {'kernel': ['linear'], 'C': [1.0, 2.0, 3.0, 10., 30., 100.]},
    {'kernel': ['rbf'], 'C': [1.0, 2.0, 3.0, 5.0, 10., 30., 100.],
    'gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0]},
]

elapsed_time_kernel_svm = []

svm_classifier = SVC(kernel='rbf')
# svm_classifier = svm_classifier.fit(x_train, y_train)

start_time = datetime.now()
grid_search = GridSearchCV(svm_classifier, param_grid, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_svm_classifier = grid_search.fit(x_train, y_train)
elapsed_time_kernel_svm.append((datetime.now()-start_time).total_seconds())


# y_pred = svm_classifier.predict(x_test)
start_time = datetime.now()
y_pred = best_svm_classifier.predict(x_test)
elapsed_time_kernel_svm.append((datetime.now()-start_time).total_seconds())
acc_svm_kernel = accuracy_score(y_test, y_pred)



Fitting 7 folds for each of 42 candidates, totalling 294 fits
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ......................C=1.0, gamma=0.0

In [228]:

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print('Best Prameters ', grid_search.best_params_)
print('Accuracy ', acc_svm_kernel)
print('Elapsed Time(train, test) ', elapsed_time_kernel_svm)

Confusion Matrix
 [[ 9  0  0]
 [ 0 13  1]
 [ 0  0  7]]
Best Prameters  {'C': 3.0, 'gamma': 1.0, 'kernel': 'rbf'}
Accuracy  0.9666666666666667
Elapsed Time(train, test)  [0.588009, 0.001]


# Basic Deep Neural Network

In [229]:
print('Unique : ', np.unique(y_train))

num_output = np.unique(y_train, axis=0)

num_output = num_output.shape[0]

y_train_onehot = np.eye(num_output)[y_train]
y_test_onehot = np.eye(num_output)[y_test]
y_valid_onehot = np.eye(num_output)[y_valid]
print('Num Output ', num_output)

Unique :  [0 1 2]
Num Output  3


In [230]:
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)



class TestData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

train_data = TrainData(torch.FloatTensor(x_train), 
                       torch.FloatTensor(y_train_onehot))

test_data = TestData(torch.FloatTensor(x_test))
valid_data = TrainData(torch.FloatTensor(x_valid), torch.FloatTensor(y_valid_onehot))

In [231]:
train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=1)
valid_loader = DataLoader(dataset=valid_data, batch_size=2)

In [232]:
x_train.shape

(108, 4)

In [233]:
EPOCHS = 100 
BATCH_SIZE = 128 
LEARNING_RATE = 0.01

NUM_INPUT = x_train.shape[1]
NUM_OUTPUT = num_output
NUM_1ST_HIDDEN = 32
NUM_2ND_HIDDEN = 16
NUM_1ST_DROPOUT = 0.2
NUM_2ND_DROPOUT = 0.1

In [234]:
class BasicClassification(nn.Module):
    def __init__(self) -> None:
        super(BasicClassification, self).__init__()

        self.layer_1 = nn.Linear(NUM_INPUT, NUM_1ST_HIDDEN)
        self.layer_2 = nn.Linear(NUM_1ST_HIDDEN, NUM_2ND_HIDDEN)
        self.layer_out = nn.Linear(NUM_2ND_HIDDEN, NUM_OUTPUT)

        # self.actvation = nn.ReLU()
        self.actvation_1 = nn.SELU()
        self.actvation_2 = nn.SELU()
        self.dropout_1 = nn.Dropout(p=NUM_1ST_DROPOUT)
        self.dropout_2 = nn.Dropout(p=NUM_2ND_DROPOUT)
        self.batchnorm_1 = nn.BatchNorm1d(NUM_1ST_HIDDEN)
        self.batchnorm_2 = nn.BatchNorm1d(NUM_2ND_HIDDEN)
    
    def forward(self, inputs):
        x = self.actvation_1(self.layer_1(inputs))
        x = self.batchnorm_1(x)
        x = self.dropout_1(x)
        x = self.actvation_2(self.layer_2(x))
        x = self.batchnorm_2(x)
        x = self.dropout_2(x)
        x = self.layer_out(x)

        return x


In [235]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [236]:
model = BasicClassification()
model.to(device)

print(model)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

BasicClassification(
  (layer_1): Linear(in_features=4, out_features=32, bias=True)
  (layer_2): Linear(in_features=32, out_features=16, bias=True)
  (layer_out): Linear(in_features=16, out_features=3, bias=True)
  (actvation_1): SELU()
  (actvation_2): SELU()
  (dropout_1): Dropout(p=0.2, inplace=False)
  (dropout_2): Dropout(p=0.1, inplace=False)
  (batchnorm_1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm_2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [237]:
def calc_accuracy(y_pred, y_test):
    # y_pred_tag = torch.round(torch.sigmoid(y_pred))
    probs = torch.softmax(y_pred, dim=1)
    winners = probs.argmax(dim=1)
    winners_y_test = y_test.argmax(dim=1)

    # correct_results_sum = (y_pred_tag == y_test).sum().float()
    correct_results_sum = (winners == winners_y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100, decimals=2)
    
    return acc

## Train ANN

In [238]:
best_model  = None
best_loss_on_valid = 999999999
num_train_data = len(train_loader)
num_eval_data = len(valid_loader)


elapsed_time_basic_ann = []

start_time = datetime.now()


for epoch in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0

    eval_epoch_loss = 0
    eval_epoch_acc = 0

    model.train()
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = model(x_batch)

        loss = criterion(y_pred, y_batch)
        acc = calc_accuracy(y_pred, y_batch)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()
    
    if epoch % 10 == 0:
        
        with torch.no_grad():
            model.eval()
            for x, y in valid_loader:
                x = x.to(device)
                y = y.to(device)

                output = model(x)

                eval_loss = criterion(output, y)
                eval_acc = calc_accuracy(output, y)

                eval_epoch_loss += eval_loss.item()
                eval_epoch_acc += eval_acc.item()
        
        if best_loss_on_valid >= (eval_epoch_loss/num_eval_data):
            best_loss_on_valid = (eval_epoch_loss/num_eval_data)
            best_model = copy.deepcopy(model)
            print("Best Model is copied - Best Loss : ", best_loss_on_valid)
        


        print(f"Epoch {epoch+0:03}: : Loss: T_{epoch_loss/num_train_data:.3f} V_{eval_epoch_loss/num_eval_data:.3f} | Acc: T_{epoch_acc/num_train_data:.3f}) V_{eval_epoch_acc/num_eval_data:.3f}")


elapsed_time_basic_ann.append((datetime.now()-start_time).total_seconds())

Best Model is copied - Best Loss :  0.24718873658760762
Epoch 010: : Loss: T_0.147 V_0.247 | Acc: T_95.239) V_100.000
Epoch 020: : Loss: T_0.193 V_0.274 | Acc: T_89.286) V_83.333
Best Model is copied - Best Loss :  0.11082583795844887
Epoch 030: : Loss: T_0.160 V_0.111 | Acc: T_95.536) V_100.000
Epoch 040: : Loss: T_0.200 V_0.299 | Acc: T_91.369) V_83.333
Epoch 050: : Loss: T_0.177 V_0.154 | Acc: T_93.750) V_91.667
Epoch 060: : Loss: T_0.110 V_0.196 | Acc: T_97.321) V_83.333
Epoch 070: : Loss: T_0.115 V_0.241 | Acc: T_95.536) V_83.333
Epoch 080: : Loss: T_0.253 V_0.134 | Acc: T_89.286) V_91.667
Best Model is copied - Best Loss :  0.08999127121205674
Epoch 090: : Loss: T_0.088 V_0.090 | Acc: T_97.321) V_100.000
Epoch 100: : Loss: T_0.081 V_0.197 | Acc: T_97.321) V_83.333


In [239]:

# with torch.no_grad():
# model.eval()
best_model.eval()
data = torch.from_numpy(x_test).float().to(device)
answer = torch.from_numpy(y_test_onehot).float().to(device)


# data = torch.from_numpy(x_train).float().to(device)
# answer = torch.from_numpy(y_train_onehot).float().to(device)


# data = torch.from_numpy(x_valid).float().to(device)
# answer = torch.from_numpy(y_valid_onehot).float().to(device)

start_time = datetime.now()
output = best_model(data)
acc_basic_ann = calc_accuracy(output, answer)
elapsed_time_basic_ann.append((datetime.now()-start_time).total_seconds())

print('Accuracy ', acc_basic_ann)
print('elapsed time ', elapsed_time_basic_ann)

Accuracy  tensor(100., device='cuda:0')
elapsed time  [7.391372, 0.002998]


# TabNet

In [252]:
tabnet_classifier = TabNetClassifier()

elapsed_time_tabnet = []

start_time = datetime.now()
tabnet_classifier.fit(
  X_train=x_train, y_train=y_train,
  eval_set = [(x_valid, y_valid)],
  patience=1000,
  max_epochs=100,
  batch_size=32
)
elapsed_time_tabnet.append((datetime.now()-start_time).total_seconds())



epoch 0  | loss: 1.2028  | val_0_accuracy: 0.5     |  0:00:00s
epoch 1  | loss: 0.56858 | val_0_accuracy: 0.5     |  0:00:00s
epoch 2  | loss: 0.36268 | val_0_accuracy: 0.5     |  0:00:00s
epoch 3  | loss: 0.34121 | val_0_accuracy: 0.5     |  0:00:01s
epoch 4  | loss: 0.25712 | val_0_accuracy: 0.66667 |  0:00:01s
epoch 5  | loss: 0.19336 | val_0_accuracy: 0.66667 |  0:00:01s
epoch 6  | loss: 0.094   | val_0_accuracy: 0.5     |  0:00:01s
epoch 7  | loss: 0.16285 | val_0_accuracy: 0.5     |  0:00:02s
epoch 8  | loss: 0.11231 | val_0_accuracy: 0.5     |  0:00:02s
epoch 9  | loss: 0.12046 | val_0_accuracy: 0.41667 |  0:00:02s
epoch 10 | loss: 0.09339 | val_0_accuracy: 0.41667 |  0:00:02s
epoch 11 | loss: 0.09355 | val_0_accuracy: 0.5     |  0:00:03s
epoch 12 | loss: 0.06057 | val_0_accuracy: 0.58333 |  0:00:03s
epoch 13 | loss: 0.15789 | val_0_accuracy: 0.83333 |  0:00:03s
epoch 14 | loss: 0.1253  | val_0_accuracy: 0.91667 |  0:00:04s
epoch 15 | loss: 0.06379 | val_0_accuracy: 0.91667 |  0



In [253]:
start_time = datetime.now()
y_pred = tabnet_classifier.predict(x_test)
elapsed_time_tabnet.append((datetime.now()-start_time).total_seconds())


acc_tabnet = accuracy_score(y_test, y_pred)
print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))

print('Accuracy ', acc_tabnet)
print('elapsed time ', elapsed_time_tabnet)

Confusion Matrix
 [[ 4  5  0]
 [ 0 12  2]
 [ 0  0  7]]
Accuracy  0.7666666666666667
elapsed time  [80.806523, 0.022994]


# XGBoost

In [242]:
xgb_classifier = xgb.XGBClassifier()

xgb_parameters ={'max_depth' : [1, 2, 3,4,5,6, 7] , 'n_estimators': [4, 8, 12,24,32, 64]}
#  'learning_rate' : [0.01,0.02,0.03],
#  'gamma': [0.5, 1, 2], 'random_state':[99]}
elapsed_time_xgboost = []

start_time = datetime.now()
xgb_grid_search = GridSearchCV(xgb_classifier, xgb_parameters, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_xgb_classifier = xgb_grid_search.fit(x_train, y_train)
elapsed_time_xgboost.append((datetime.now()-start_time).total_seconds())

Fitting 7 folds for each of 42 candidates, totalling 294 fits
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n

In [243]:
# y_pred = xgb_classifier.predict(x_test)
start_time = datetime.now()
y_pred = best_xgb_classifier.predict(x_test)
elapsed_time_xgboost.append((datetime.now()-start_time).total_seconds())


acc_xgboost = accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print("best parameters ", xgb_grid_search.best_params_)
print('Accuracy ', acc_xgboost)
print('elapsed time ', elapsed_time_xgboost)

Confusion Matrix
 [[ 9  0  0]
 [ 0 13  1]
 [ 0  0  7]]
best parameters  {'max_depth': 1, 'n_estimators': 12}
Accuracy  0.9666666666666667
elapsed time  [8.34208, 0.003003]


# LightGBM

In [244]:
lgbm_classifier = lgbm.LGBMClassifier()

# lgbm_classifier=lgbm_classifier.fit(x_train, y_train)
# y_pred = lgbm_classifier.predict(x_test)


lgbm_parameters ={'max_depth' : [1, 2, 3,4,5,6, 7] , 'n_estimators': [4, 8, 12,24,32, 64]}
#  'learning_rate' : [0.01,0.02,0.03],
#  'gamma': [0.5, 1, 2], 'random_state':[99]}
elapsed_time_lgbm= []

start_time = datetime.now()
lgbm_grid_search = GridSearchCV(lgbm_classifier, lgbm_parameters, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_lgbm_classifier = lgbm_grid_search.fit(x_train, y_train)
elapsed_time_lgbm.append((datetime.now()-start_time).total_seconds())


start_time = datetime.now()
y_pred = best_lgbm_classifier.predict(x_test)
elapsed_time_lgbm.append((datetime.now()-start_time).total_seconds())


acc_lgbm = accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print("best parameters ", lgbm_grid_search.best_params_)
print('Accuracy ', acc_lgbm)
print('elapsed time ', elapsed_time_lgbm)


Fitting 7 folds for each of 42 candidates, totalling 294 fits
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=4; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n_estimators=8; total time=   0.0s
[CV] END ........................max_depth=1, n

# CatBoost

In [245]:
# cbt_classifier=cbt_classifier.fit(x_train, y_train)
cbt_parameters = {
'depth' : [1, 2, 3,4,5,6, 7] , 'n_estimators': [4, 8, 12,24,32, 64]
    # 'depth'         : [3,4,5,6],

    #             'learning_rate' : [0.01,0.02,0.03],
    #             'n_estimators': [12,24,32], 
}

# y_pred = cbt_classifier.predict(x_test)
cbt_classifier = cbt.CatBoostClassifier()

elapsed_time_cbt = []

start_time = datetime.now()
cbt_grid_search = GridSearchCV(cbt_classifier, cbt_parameters, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_cbt_classifier = cbt_grid_search.fit(x_train, y_train)
elapsed_time_cbt.append((datetime.now()-start_time).total_seconds())

start_time = datetime.now()
y_pred = best_cbt_classifier.predict(x_test)
elapsed_time_cbt.append((datetime.now()-start_time).total_seconds())

cbt_grid_search.best_params_

acc_cbt= accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print("best parameters ", cbt_grid_search.best_params_)
print('Accuracy ', acc_cbt)
print('elapsed time ', elapsed_time_cbt)



Fitting 7 folds for each of 42 candidates, totalling 294 fits
Learning rate set to 0.5
0:	learn: 0.8083789	total: 459us	remaining: 1.38ms
1:	learn: 0.5670271	total: 886us	remaining: 886us
2:	learn: 0.4748203	total: 1.23ms	remaining: 409us
3:	learn: 0.3846956	total: 1.54ms	remaining: 0us
[CV] END ............................depth=1, n_estimators=4; total time=   0.0s
Learning rate set to 0.5
0:	learn: 0.8134038	total: 390us	remaining: 1.17ms
1:	learn: 0.5525684	total: 749us	remaining: 749us
2:	learn: 0.4627084	total: 1.08ms	remaining: 360us
3:	learn: 0.3749818	total: 1.41ms	remaining: 0us
[CV] END ............................depth=1, n_estimators=4; total time=   0.0s
Learning rate set to 0.5
0:	learn: 0.8291352	total: 435us	remaining: 1.3ms
1:	learn: 0.5656094	total: 768us	remaining: 768us
2:	learn: 0.4798161	total: 1.09ms	remaining: 364us
3:	learn: 0.4018519	total: 1.4ms	remaining: 0us
[CV] END ............................depth=1, n_estimators=4; total time=   0.0s
Learning rate set t

# Random Forest

In [246]:
rf_param_grid = { 
    'max_depth' : [1, 2, 3, 4, 5,6, 7],
    'n_estimators': [10, 20, 30, 40, 50, 100],
    # 'max_features': ['auto', 'sqrt'],
    # 'criterion' :['gini', 'entropy']
}

rf_classifier = RandomForestClassifier()

elapsed_time_rf = []

start_time = datetime.now()
rf_grid_search = GridSearchCV(rf_classifier, rf_param_grid, cv=7, scoring="neg_mean_squared_error", verbose=2)
best_rf_classifier = rf_grid_search.fit(x_train, y_train)
elapsed_time_rf.append((datetime.now()-start_time).total_seconds())



start_time = datetime.now()
y_pred = best_rf_classifier.predict(x_test)
elapsed_time_rf.append((datetime.now()-start_time).total_seconds())


best_rf_classifier.best_params_

acc_rf = accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print("best parameters ", rf_grid_search.best_params_)
print('Accuracy ', acc_rf)
print('elapsed time ', elapsed_time_rf)


Fitting 7 folds for each of 42 candidates, totalling 294 fits
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=10; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=20; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=20; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=20; total time=   0.0s
[CV] END .......................max_depth=1, n_estimators=20; total time=   0.0s
[CV] END .......................max_depth=1, n_

# Linear RVM(Relevance Vector Machine)

In [247]:
em_rvc = EMRVC(kernel="linear")


elapsed_time_linear_rvm = []

start_time = datetime.now()
em_rvc = em_rvc.fit(x_train, y_train)
elapsed_time_linear_rvm.append((datetime.now()-start_time).total_seconds())


start_time = datetime.now()
y_pred = em_rvc.predict(x_test)
elapsed_time_linear_rvm.append((datetime.now()-start_time).total_seconds())

acc_linear_rvm = accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
# print("best parameters ", rf_grid_search.best_params_)
print('Accuracy ', acc_linear_rvm)
print('elapsed time ', elapsed_time_linear_rvm)

# print(clf.predict(x_test))
# print(clf.predict_proba(X))
# print(clf.score(X, y))

Confusion Matrix
 [[ 9  0  0]
 [ 0 13  1]
 [ 0  0  7]]
Accuracy  0.9666666666666667
elapsed time  [45.461231, 0.002]


# Kernel RVM(Relevance Vector Machine)

In [248]:
em_rvc = EMRVC(kernel="rbf")
elapsed_time_kernel_rvm = []

start_time = datetime.now()
em_rvc = em_rvc.fit(x_train, y_train)
elapsed_time_kernel_rvm.append((datetime.now()-start_time).total_seconds())


start_time = datetime.now()
y_pred = em_rvc.predict(x_test)
elapsed_time_kernel_rvm.append((datetime.now()-start_time).total_seconds())


acc_kernel_rvm = accuracy_score(y_test, y_pred)

print('Confusion Matrix\n', confusion_matrix(y_test, y_pred))
print('Accuracy ', acc_kernel_rvm)
print('elapsed time ', elapsed_time_kernel_rvm)




Confusion Matrix
 [[ 9  0  0]
 [ 0 13  1]
 [ 0  0  7]]
Accuracy  0.9666666666666667
elapsed time  [30.162868, 0.003254]
