Import all necessary parts to train and evaluate

In [None]:
import hydra
import hydra.conf
import wandb
import numpy as np
import os
from omegaconf import DictConfig
import utils
from model import LinearRegression, Perceptron, LogisticRegression, MLP

Load data and hyperparameters configs, split training and testing sets

In [None]:
@hydra.main(version_base="1.3", config_path="./conf", config_name="config_proj_midterm")
def main(cfg: DictConfig):
    # Preprocess dataset
    dataset_path = cfg.dataset
    print("If path is existed:", os.path.exists(dataset_path))
    X, y = utils.load_and_process_data(dataset_path, features_to_remove=None) 
    X_train, X_test, y_train, y_test = utils.split_data(X[:,1:], y, test_size=0.3, val_size=0.2, random_state=42)

    if(cfg.wandb_on_off and cfg.name == "Project_midterm"):
        wandb.init(project="Project_midterm")
        
    X_train_f = X_train[:,3:]
    X_test_f = X_test[:,3:]

Linear regression model implementation

In [None]:
# Linear Regression
print("Linear Regression training")
model_L = LinearRegression(n_feature=X_train_f.shape[1], epoch = cfg.epoch, lr = cfg.lr_linear_regression, batch_size=cfg.batch_size, gd = cfg.gd)
model_L.fit(X_train_f, y_train)
metrics_L = model_L._evaluate(X_test_f, y_test)
print(f"Linear Regression evaluation: {metrics_L}")


Perceptron model implementation 

In [None]:
# Perceptron
y_train_P = y_train.copy()
y_test_P = y_test.copy()
y_train_P[y_train_P == 0] = -1
y_test_P[y_test_P == 0] = -1
model_P = Perceptron(n_feature=X_train.shape[1], epoch=1000, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_P.fit(X_train, y_train_P)
metrics_P = model_P._evaluate(X_test, y_test_P)
print(f"Perceptron evaluation: {metrics_P}")

Logistic regression model implementation

In [None]:
# Logistic Regression
print("Logistic Regression training")
model_LR = LogisticRegression(n_feature=X_train_f.shape[1], epoch=cfg.epoch, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_LR.fit(X_train_f, y_train)
metrics_LR = model_LR._evaluate(X_test_f, y_test)
print(f"Logistic Regression evaluation: {metrics_LR}")

MLP model implementation

In [None]:
# MLP
print("MLP training")
input_size = X_train_f.shape[1]
layers_list = [input_size] + cfg.hidden_layers + [1]
model_MLP = MLP(layers_list)
model_MLP.train(X_train_f, y_train, cfg.epoch, cfg.lr, cfg.batch_size, cfg.gd)
metrics_MLP = model_MLP.evaluate(X_test_f, y_test)
print(f"MLP evaluation with two correlated features: {metrics_MLP}")

Results with all the features using 4 models

In [15]:
print("Linear Regression evaluation:{'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}\nPerceptron evaluation: {'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}\nLogistic Regression evaluation: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092} \nMLP evaluation: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.9338183500000001, 'f1': 0.9495677504357161}")

Linear Regression evaluation:{'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}
Perceptron evaluation: {'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}
Logistic Regression evaluation: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092} 
MLP evaluation: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.9338183500000001, 'f1': 0.9495677504357161}


Using different features to train MLP model

In [None]:
# MLP
print("MLP training")
input_size = X_train_f.shape[1]
layers_list = [input_size] + cfg.hidden_layers + [1]
model_MLP = MLP(layers_list)
model_MLP.train(X_train_f, y_train, cfg.epoch, cfg.lr, cfg.batch_size, cfg.gd)
metrics_MLP = model_MLP.evaluate(X_test_f, y_test)

input_size1 = X_train[:,0:3].shape[1]
layers_list1 = [input_size1] + cfg.hidden_layers + [1]
model_MLP1 = MLP(layers_list1)
model_MLP1.train(X_train[:,0:3], y_train, cfg.epoch, cfg.lr, cfg.batch_size, cfg.gd)
metrics_MLP1 = model_MLP.evaluate(X_test_f, y_test)

input_size2 = X_train.shape[1]
layers_list2 = [input_size2] + cfg.hidden_layers + [1]
model_MLP2 = MLP(layers_list2)
model_MLP2.train(X_train, y_train, cfg.epoch, cfg.lr, cfg.batch_size, cfg.gd)
metrics_MLP2 = model_MLP.evaluate(X_test_f, y_test)

print(f"MLP evaluation with two correlated features: {metrics_MLP}")
print(f"MLP evaluation with another three features: {metrics_MLP1}")
print(f"MLP evaluation with all features: {metrics_MLP2}")

Results with all the features using MLP model

In [1]:
print("MLP evaluation with two correlated features: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.933818350001, 'f1': 0.9495677504357161}\nMLP evaluation with another three features: {'accuracy': 0.898, 'recall': 0.06557377049180328, 'precision': 0.015810276679841896, 'f1': 0.02547770700636942}\nMLP evaluation with all features: {'accuracy': 0.961, 'recall': 0.010752688172043012, 'precision': 0.038461538461538464, 'f1': 0.01680672268907563}")

MLP evaluation with two correlated features: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.933818350001, 'f1': 0.9495677504357161}
MLP evaluation with another three features: {'accuracy': 0.898, 'recall': 0.06557377049180328, 'precision': 0.015810276679841896, 'f1': 0.02547770700636942}
MLP evaluation with all features: {'accuracy': 0.961, 'recall': 0.010752688172043012, 'precision': 0.038461538461538464, 'f1': 0.01680672268907563}
