# AML ASSIGNMENT 2
- Advanced Machine Learning, Innopolis University 
- Professor: Witold Pedrycz
- Teaching Assistant: Gcinizwe Dlamini

Task Description: [here](https://hackmd.io/@gFZmdMTOQxGFHEFqqU8pMQ/Hk7A_FIRO#Labelling-logic)

Dataset:[Task 1](https://drive.google.com/file/d/1iVl4Q4Bq3Fbwv60lLthfBc6eYxYLrdnU/view?usp=sharing) & [Task 2](https://cloudstor.aarnet.edu.au/plus/s/2DhnLGDdEECo4ys?path=%2FUNSW-NB15%20-%20CSV%20Files%2Fa%20part%20of%20training%20and%20testing%20set)

In [1]:
import pandas as pd
import numpy as np

In [2]:
identity_data = pd.read_csv("data_identity.csv")
transaction_data = pd.read_csv("data_transaction.csv")

In [3]:
obj_transaction_data = transaction_data.select_dtypes(["object"]).columns
obj_identity_data = identity_data.select_dtypes(["object"]).columns

print(f'Transaction data categorical features: {transaction_data.select_dtypes(["object"]).columns}\n')
print(f'Identity data categorical features: {identity_data.select_dtypes(["object"]).columns}\n')

Transaction data categorical features: Index(['ProductCD', 'card4', 'card6', 'P_emaildomain', 'R_emaildomain', 'M1',
       'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9'],
      dtype='object')

Identity data categorical features: Index(['id_12', 'id_15', 'id_16', 'id_23', 'id_27', 'id_28', 'id_29', 'id_30',
       'id_31', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38',
       'DeviceType', 'DeviceInfo'],
      dtype='object')



In [4]:
from sklearn.preprocessing import LabelEncoder

# Encoding categorical features using LabelEncoder

le = LabelEncoder()
identity_data[obj_identity_data] = identity_data[obj_identity_data].apply(le.fit_transform)
transaction_data[obj_transaction_data] = transaction_data[obj_transaction_data].apply(le.fit_transform)

In [5]:
print(transaction_data.select_dtypes(["object"]).columns)
print(identity_data.select_dtypes(["object"]).columns)

Index([], dtype='object')
Index([], dtype='object')


In [7]:
transaction_notnan_df = transaction_data.notna()
identity_notnan_df = identity_data.notna()

In [8]:
full_data = transaction_data.join(identity_data.set_index("TransactionID"), on="TransactionID")

In [9]:
from sklearn.preprocessing import RobustScaler

# Data normalizing using RobustScaler

transformer = RobustScaler().fit_transform(full_data.fillna(0).values)

In [11]:
data_notnan_df = full_data.notna()
data_n = data_notnan_df.astype("int").values
data = np.concatenate([transformer, data_n], axis=1)

: 

: 

In [11]:
robust_df = pd.DataFrame(transformer, columns=full_data.columns)
X = robust_df.drop(columns=['isFraud','TransactionID'])
y = robust_df['isFraud']

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [14]:
batch_size = 128

# convert to tensors
X_train_ten = torch.Tensor(X_train.values)
X_test_ten = torch.Tensor(X_test.values)
y_train_ten = torch.Tensor(y_train.values)
y_test_ten = torch.Tensor(y_test.values)

# create DataLoaders
train_loader = DataLoader(TensorDataset(X_train_ten, y_train_ten), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_ten, y_test_ten), batch_size=batch_size, shuffle=True)

In [31]:
for x,_ in train_loader:
  print(x.shape)
  break

torch.Size([128, 432])


In [23]:
## modified undercomplete AE from the lab
class under_autoencoder(nn.Module):
    def __init__(self, input_size, latent_dim):
      super(under_autoencoder, self).__init__()
      # Step 1 : Define the encoder 
      # Step 2 : Define the decoder
      # Step 3 : Initialize the weights (optional)
      self.encoder = nn.Sequential(
          nn.Linear(input_size, input_size//2),
          nn.ReLU(True),
          nn.Linear(input_size//2, input_size//3),
          nn.Linear(input_size//3, input_size//4),
          nn.Tanh(),
          nn.Linear(input_size//4, latent_dim)
      )
      # the output size of the decoder should be twice smaller than the input size to the encoder
      self.decoder = nn.Sequential(
          nn.Linear(latent_dim, input_size//4),
          nn.ReLU(True),
          nn.Linear(input_size//4, input_size//3),
          nn.Tanh(),
          nn.Linear(input_size//3, input_size//2)
      )
      self.encoder.apply(self.__init_weights)
      self.decoder.apply(self.__init_weights)
        
    def forward(self, x):
      # Step 1: Pass the input through encoder to get latent representation
      # Step 2: Take latent representation and pass through decoder
      x = self.encoder(x)
      x = self.decoder(x)
      return x
        
    
    def encode(self,input):
      #Step 1: Pass the input through the encoder to get latent representation
      return self.encoder(input)
    
    def decode(self, input):
      return self.decoder(input)
    
    def __init_weights(self,m):
      #Init the weights (optional)
      if type(m) == nn.Linear:
          torch.nn.init.xavier_uniform_(m.weight)
          m.bias.data.fill_(0.01)

In [27]:
learning_rate = 1e-4
eps = 1e-9

AE = under_autoencoder(864,200).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.RMSprop(AE.parameters(), lr=learning_rate, eps=eps)

In [29]:
num_epochs = 20

best_model = AE.state_dict()
best_loss = 1e300
best_epoch = 0

for epoch in range(num_epochs):
  epoch_loss = 0.0
  for X, _ in train_loader:
    X = X.to(device)

    optimizer.zero_grad()
    # forward
    output = AE(X)
    loss = criterion(output * X[:, 432:], X[:, :432]) # the features of X already modified by missing values indicator; only need to modify the output

    # backward
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()

  # log
  print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, epoch_loss / len(train_loader)))
  
  # test the model
  with torch.no_grad():
    tot_loss = 0
    for X, _ in test_loader:
      X = X.to(device)
      
      output = AE(X)
      tot_loss += criterion(output * X[:, 432:], X[:, :432]).item()
      
    tot_loss = tot_loss / len(test_loader)
    
    print("Test loss:", tot_loss)
      
    if tot_loss < best_loss:
      best_loss = tot_loss
      best_model = AE.state_dict()
      best_epoch = epoch
    else:
      AE.load_state_dict(best_model)
      
print(f"The best test loss is {best_loss}, from the epoch number {best_epoch + 1}")

RuntimeError: ignored

In [17]:
num_epochs = 20

best_model = AE.state_dict()
best_loss = 1e300
best_epoch = 0
for epoch in range(num_epochs):
  epoch_loss = 0.0
  for X in train_loader:
    X = X[0].to(device)

    optimizer.zero_grad()
    # forward
    output = AE(X)
    loss = criterion(output, X)

    # backward
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()

  # log
  print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.item()))

RuntimeError: ignored

# Task 2


In [288]:
import pandas as pd
import numpy as np

In [289]:
# Dataset loading 
train = pd.read_csv('/content/drive/MyDrive/Assignment2_AML/Task 2/datasets/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Assignment2_AML/Task 2/datasets/test.csv')

In [290]:
test.shape

(175341, 45)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [292]:
train.head()

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,1.1e-05,udp,-,INT,2,0,496,0,90909.0902,...,1,2,0,0,0,1,2,0,Normal,0
1,2,8e-06,udp,-,INT,2,0,1762,0,125000.0003,...,1,2,0,0,0,1,2,0,Normal,0
2,3,5e-06,udp,-,INT,2,0,1068,0,200000.0051,...,1,3,0,0,0,1,3,0,Normal,0
3,4,6e-06,udp,-,INT,2,0,900,0,166666.6608,...,1,3,0,0,0,2,3,0,Normal,0
4,5,1e-05,udp,-,INT,2,0,2126,0,100000.0025,...,1,3,0,0,0,2,3,0,Normal,0


### Data Preprocessing

In [293]:
# Concatinate train and test datasets for data preprocessing

full_data = pd.concat([train, test], ignore_index=True)

In [294]:
# Divide data into x - features, y - labels

X = full_data.drop('label', axis=1)
y = full_data['label']

In [295]:
X.head()

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat
0,1,1.1e-05,udp,-,INT,2,0,496,0,90909.0902,...,1,1,2,0,0,0,1,2,0,Normal
1,2,8e-06,udp,-,INT,2,0,1762,0,125000.0003,...,1,1,2,0,0,0,1,2,0,Normal
2,3,5e-06,udp,-,INT,2,0,1068,0,200000.0051,...,1,1,3,0,0,0,1,3,0,Normal
3,4,6e-06,udp,-,INT,2,0,900,0,166666.6608,...,2,1,3,0,0,0,2,3,0,Normal
4,5,1e-05,udp,-,INT,2,0,2126,0,100000.0025,...,2,1,3,0,0,0,2,3,0,Normal


In [296]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 257673 entries, 0 to 257672
Data columns (total 44 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   id                 257673 non-null  int64  
 1   dur                257673 non-null  float64
 2   proto              257673 non-null  object 
 3   service            257673 non-null  object 
 4   state              257673 non-null  object 
 5   spkts              257673 non-null  int64  
 6   dpkts              257673 non-null  int64  
 7   sbytes             257673 non-null  int64  
 8   dbytes             257673 non-null  int64  
 9   rate               257673 non-null  float64
 10  sttl               257673 non-null  int64  
 11  dttl               257673 non-null  int64  
 12  sload              257673 non-null  float64
 13  dload              257673 non-null  float64
 14  sloss              257673 non-null  int64  
 15  dloss              257673 non-null  int64  
 16  si

In [297]:
# Getting object type features

X.select_dtypes(exclude=['int64','float64']).columns 

Index(['proto', 'service', 'state', 'attack_cat'], dtype='object')

In [298]:
max(X['spkts'].value_counts())

122182

In [299]:
# Checking frequency of values in each column
# Getting all feture values with frequency higher than 95

drop_list = []
for column in X.columns:
    percentage = (max(X[column].value_counts())*100) / X.shape[0]
    if percentage > 95:
        print(f"{column}: has {percentage}% unique value")
        drop_list.append(column)

is_ftp_login: has 98.74065191153129% unique value
ct_ftp_cmd: has 98.73987573397291% unique value
is_sm_ips_ports: has 98.5726094701424% unique value


In [300]:
# Drop features -> is_ftp_login, -> ct_ftp_cmd, -> is_sm_ips_ports

X = X.drop(columns = drop_list)

In [301]:
# full_data.shape

In [302]:
# Checking for NaN values
X.isnull().any().any()

False

In [303]:
# Getting categorical features

object_cols = X.select_dtypes(exclude=['int64','float64']).columns
object_cols

Index(['proto', 'service', 'state', 'attack_cat'], dtype='object')

In [304]:
from sklearn.preprocessing import LabelEncoder

# Encoding categorical features using LabelEncoder

le = LabelEncoder()
X[object_cols] = X[object_cols].apply(le.fit_transform)

In [305]:
# Checking for categorical features

X.select_dtypes(exclude=['int64','float64']).columns

Index([], dtype='object')

In [306]:
# Checking for categorical features

(X.dtypes == 'object').any()

False

In [307]:
from sklearn.preprocessing import RobustScaler

# Data normalizing using RobustScaler

transformer = RobustScaler().fit_transform(X)

In [308]:
robust_df = pd.DataFrame(transformer, columns = X.columns)

In [309]:
# robust_df.head()

In [310]:
X = robust_df.drop(columns = ['dwin','id'],axis=1)

In [311]:
# Splitting dataset 

x_train = X[:len(train)]
y_train = y[:len(train)]

x_test = X[len(train):]
y_test = y[len(train):]


### Conditional GAN

In [312]:
import matplotlib.pyplot as plt
import torch.utils.data as data_utils
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [313]:
batch_size = 64

# Converting data to tensor
X_train = torch.Tensor(x_train.to_numpy())
X_test = torch.Tensor(x_test.to_numpy())

Y_train = torch.tensor(y_train.to_numpy(dtype=int), dtype=torch.int64)
Y_test = torch.tensor(y_test.to_numpy(dtype=int), dtype=torch.int64)

In [314]:
# Data loading 
train_loader = DataLoader(TensorDataset(X_train, Y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=batch_size, shuffle=True)

In [315]:
# Conditional Generative Adversarial Network (cGAN) model
# Generator class
class Generator(nn.Module):
    
  def __init__(self, z_size=100, h_dim=128):
    super(Generator, self).__init__()
    
    self.g_model = nn.Sequential(
      nn.Linear(z_size, h_dim),
      nn.Linear(h_dim, h_dim*2),
      nn.ReLU(True),
      nn.Linear(h_dim*2, h_dim*4),
      nn.ReLU(True),
      nn.Linear(h_dim*4, 39)
    )
    
    # self.generator.apply(self.__init_weights)
    self.__init_weights(self.g_model)

  def forward(self, z, y):
    y = F.one_hot(y, num_classes=10)
    
    z = torch.cat((z, y), 1)
    x = self.g_model(z)
    
    return x
  
  def __init_weights(self,m):
    #Init the weights (optional)
    if type(m) == nn.Linear:
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)
      
# Discriminator class
class Discriminator(nn.Module):

  def __init__(self, h_dim=128):
    super(Discriminator, self).__init__()
    
    self.d_model = nn.Sequential(
      nn.Linear(49, h_dim*4),
      nn.Linear(h_dim*4, h_dim*2),
      nn.LeakyReLU(inplace=True),
      nn.Linear(h_dim*2, h_dim),
      nn.LeakyReLU(inplace=True),
      nn.Linear(h_dim, 1),
      nn.Sigmoid()
    )

    self.__init_weights(self.d_model)
    # self.discriminator.apply(self.__init_weights)

  def forward(self, x, y):

    y = F.one_hot(y, num_classes=10)
    x = torch.cat((x, y), 1)
    x = self.d_model(x)
    
    return x
  
  def __init_weights(self,m):
 
    if type(m) == nn.Linear:
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)

In [316]:
def real_loss(d_out):
  
    batch_size = d_out.size(0)
    labels = torch.FloatTensor(batch_size).uniform_(0.9, 1).to(device)
    
    loss_fn = nn.BCELoss()
    
    loss = loss_fn(d_out.squeeze(), labels)
    return loss

def fake_loss(d_out):
    
    batch_size = d_out.size(0)
    labels = torch.FloatTensor(batch_size).uniform_(0, 0.1).to(device)
    
    loss_fn = nn.BCELoss()
    
    loss = loss_fn(d_out.squeeze(), labels)
    return loss

In [317]:
learning_rate = 1e-4
eps = 1e-9

G = Generator().to(device)
D = Discriminator().to(device)

g_optimizer = torch.optim.RMSprop(G.parameters(), lr=learning_rate, eps=eps)
d_optimizer = torch.optim.RMSprop(D.parameters(), lr=learning_rate, eps=eps)

In [319]:
num_epochs = 100
z_size = 90

losses = []

# epoch training loop
for epoch in range(num_epochs):

    for _, (X, y) in enumerate(train_loader):

        batch_size = X.size(0)
        z = np.random.uniform(-1, 1, size=(batch_size, z_size))
        z = torch.from_numpy(z).float().to(device)
        
        # Discriminator on real and fake images
        d_optimizer.zero_grad()
        
        X = X.to(device)
        y = y.to(device)
        
        d_real = D(X, y)
        d_loss = real_loss(d_real)
        
        g_fake = G(z, y)
        
        d_fake = D(g_fake, y)
        d_loss = d_loss + fake_loss(d_fake)
        
        d_loss.backward()
        d_optimizer.step()

        # Generator on real and fake images
        g_optimizer.zero_grad()
        
        g_fake = G(z, y)
        
        g_loss = real_loss(D(g_fake, y))
        
        g_loss.backward()
        g_optimizer.step()
            
    print('epoch [{}/{}], d_loss: {:6.4f} | g_loss: {:6.4f}'.format(epoch + 1, num_epochs, d_loss.item(), g_loss.item()))
    losses.append((d_loss.item(), g_loss.item()))

epoch [1/100], d_loss: 0.9196 | g_loss: 1.3055
epoch [2/100], d_loss: 0.8940 | g_loss: 1.3002
epoch [3/100], d_loss: 1.4531 | g_loss: 1.6703
epoch [4/100], d_loss: 0.9184 | g_loss: 1.3136
epoch [5/100], d_loss: 0.8509 | g_loss: 1.7736
epoch [6/100], d_loss: 1.0645 | g_loss: 1.7184
epoch [7/100], d_loss: 1.0939 | g_loss: 1.3125
epoch [8/100], d_loss: 1.0098 | g_loss: 1.4511
epoch [9/100], d_loss: 0.7683 | g_loss: 1.7462
epoch [10/100], d_loss: 1.3289 | g_loss: 1.1777
epoch [11/100], d_loss: 0.9603 | g_loss: 1.5268
epoch [12/100], d_loss: 0.9676 | g_loss: 1.8301
epoch [13/100], d_loss: 1.1452 | g_loss: 1.9160
epoch [14/100], d_loss: 0.7565 | g_loss: 1.8610
epoch [15/100], d_loss: 0.9324 | g_loss: 1.6378
epoch [16/100], d_loss: 1.3895 | g_loss: 2.0963
epoch [17/100], d_loss: 0.8784 | g_loss: 1.7823
epoch [18/100], d_loss: 1.0552 | g_loss: 1.7201
epoch [19/100], d_loss: 0.7755 | g_loss: 1.7582
epoch [20/100], d_loss: 0.7384 | g_loss: 2.0835
epoch [21/100], d_loss: 1.0354 | g_loss: 1.9678
e

In [334]:
losses = np.asarray(losses).T

array([[0.91961777, 0.89403558, 1.45306993, 0.91836882, 0.85088539,
        1.06447673, 1.09390175, 1.00982428, 0.7682991 , 1.32887363,
        0.96032488, 0.96761698, 1.14524543, 0.75653231, 0.93237972,
        1.38946748, 0.87840688, 1.05523026, 0.77554238, 0.73843646,
        1.03541207, 0.95033801, 0.85016167, 0.97827101, 1.17779744,
        0.90834665, 0.93691641, 0.89995015, 1.02743292, 0.74191725,
        0.88575792, 0.68214643, 0.94558758, 0.84564155, 0.81247371,
        0.84573877, 0.82907581, 1.2830112 , 1.00352192, 0.95833874,
        0.71411723, 1.10066366, 0.88098454, 0.78203034, 0.95191896,
        1.02465141, 0.73862112, 1.119344  , 1.42577398, 1.54393733,
        0.70163846, 1.0517602 , 0.71950454, 0.74195486, 0.90648121,
        1.30059826, 0.72531605, 0.75700593, 0.80796003, 1.28977561,
        0.84689653, 0.751755  , 0.74139398, 0.69864553, 0.74069077,
        0.71486527, 0.81568271, 0.98009199, 0.73567581, 0.73682582,
        0.98417956, 0.8375994 , 0.87252533, 0.79

In [266]:
# !pip3 install interpret

### Classification

In [336]:
from sklearn.ensemble import RandomForestClassifier
from interpret.glassbox import ExplainableBoostingClassifier
from sklearn.metrics import f1_score, classification_report


In [324]:
# Build NN Classifier

class nn_classifier(nn.Module):
    
  def __init__(self, i_size=39, h_dim=15, n_classes=10):
    super(nn_classifier, self).__init__()
    
    self.classifier_model = nn.Sequential(
      nn.Linear(i_size, h_dim),
      nn.ReLU(True),
      nn.Linear(h_dim, n_classes)
    )
    
    self.__init_weights(self.classifier_model)

  def forward(self, x):

    out = self.classifier_model(x)
    return out
  
  def __init_weights(self,m):

    if type(m) == nn.Linear:
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)

In [325]:
# Train NN Classifier
def nn_classifier_train(model, train_loader, num_epochs = 20):
    learning_rate = 1e-4
    eps = 1e-9
    
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, eps=eps)
    criterion = nn.CrossEntropyLoss()
    
    model.train()
    
    for epoch in range(num_epochs):
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            optimizer.zero_grad()
            # forward
            output = model(X)
            loss = criterion(output, y)
            
            # backward
            loss.backward()
            optimizer.step()
        
        # log
        print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.item()))
        
    return model

In [326]:
rfc = RandomForestClassifier(max_depth=50, n_jobs=-1)
ebc = ExplainableBoostingClassifier(max_bins=2, n_jobs=-1, interactions=0, max_leaves=50)
nnc = nn_classifier().to(device)

In [327]:
arr_x_train = x_train.values
arr_y_train = y_train.values

arr_x_test = x_test.values
arr_y_test = y_test.values

In [329]:
nn_classifier_train(nnc, train_loader)

outputs = nnc(X_test.to(device))
nnc_y_pred = torch.max(outputs.data, 1)[1].cpu().detach().numpy()

# RandomForestClassifier
rfc.fit(arr_x_train,arr_y_train)
rfc_y_pred = rfc.predict(arr_x_test)

# ExplainableBoostingClassifier
ebc.fit(arr_x_train,arr_y_train)
ebc_y_pred = rfc.predict(arr_x_test)

# Scores
rfc_score = f1_score(arr_y_test, rfc_y_pred, average="weighted")
ebc_score =  f1_score(arr_y_test, ebc_y_pred, average="weighted")
nnc_score = f1_score(arr_y_test, nnc_y_pred, average="weighted")

print('-'*20)
print(f'F1 Score for RandomForestClassifier: {rfc_score}')
print(f'F1 Score for ExplainableBoostingClassifier: {ebc_score}')
print(f'Score for Neural Nets Classifier: {nnc_score}')

epoch [1/20], loss:0.0510
epoch [2/20], loss:0.0617
epoch [3/20], loss:0.1137
epoch [4/20], loss:0.1032
epoch [5/20], loss:0.0464
epoch [6/20], loss:0.0168
epoch [7/20], loss:0.0227
epoch [8/20], loss:0.0646
epoch [9/20], loss:0.0111
epoch [10/20], loss:0.0412
epoch [11/20], loss:0.2808
epoch [12/20], loss:0.0317
epoch [13/20], loss:0.0786
epoch [14/20], loss:0.0145
epoch [15/20], loss:0.0663
epoch [16/20], loss:0.0235
epoch [17/20], loss:0.1158
epoch [18/20], loss:0.0734
epoch [19/20], loss:0.0134
epoch [20/20], loss:0.0662
--------------------
F1 Score for RandomForestClassifier: 0.9998859397889113
F1 Score for ExplainableBoostingClassifier: 0.9998859397889113
Score for Neural Nets Classifier: 0.9832025165485135


In [340]:
print('-'*60)
print('RandomForestClassifier classification report\n')
print(classification_report(arr_y_test, rfc_y_pred))
print('-'*60)
print('ExplainableBoostingClassifier classification report\n')
print(classification_report(arr_y_test, ebc_y_pred))


------------------------------------------------------------
RandomForestClassifier classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56000
           1       1.00      1.00      1.00    119341

    accuracy                           1.00    175341
   macro avg       1.00      1.00      1.00    175341
weighted avg       1.00      1.00      1.00    175341

------------------------------------------------------------
ExplainableBoostingClassifier classification report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56000
           1       1.00      1.00      1.00    119341

    accuracy                           1.00    175341
   macro avg       1.00      1.00      1.00    175341
weighted avg       1.00      1.00      1.00    175341

