Import all necessary parts to train and evaluate

In [None]:
import hydra
import hydra.conf
import wandb
import numpy as np
import os
from omegaconf import DictConfig
import utils
from model import LinearRegression, Perceptron, LogisticRegression, MLP

Load data and hyperparameters configs, split training and testing sets

In [None]:
@hydra.main(version_base="1.3", config_path="./conf", config_name="config_proj_midterm")
def main(cfg: DictConfig):
    # Preprocess dataset
    dataset_path = cfg.dataset
    print("If path is existed:", os.path.exists(dataset_path))
    X, y = utils.load_and_process_data(dataset_path, features_to_remove=None) 
    X_train, X_test, y_train, y_test = utils.split_data(X[:,1:], y, test_size=0.3, val_size=0.2, random_state=42)

    if(cfg.wandb_on_off and cfg.name == "Project_midterm"):
        wandb.init(project="Project_midterm")

Linear regression model implementation

In [None]:
# Linear Regression
model_L = LinearRegression(n_feature=X_train.shape[1], epoch = cfg.epoch, lr = cfg.lr, gd = cfg.gd)
model_L.fit(X_train, y_train)
metrics_L = model_L._evaluate(X_test, y_test)
print(f"Linear Regression evaluation: {metrics_L}")

Perceptron model implementation 

In [None]:
# Perceptron
y_train_P = y_train.copy()
y_test_P = y_test.copy()
y_train_P[y_train_P == 0] = -1
y_test_P[y_test_P == 0] = -1
model_P = Perceptron(n_feature=X_train.shape[1], epoch=1000, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_P.fit(X_train, y_train_P)
metrics_P = model_P._evaluate(X_test, y_test_P)
print(f"Perceptron evaluation: {metrics_P}")

Logistic regression model implementation

In [None]:
# Logistic Regression
model_LR = LogisticRegression(n_feature=X_train.shape[1], epoch=cfg.epoch, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_LR.fit(X_train, y_train)
metrics_LR = model_LR._evaluate(X_test, y_test)
print(f"Logistic Regression evaluation: {metrics_LR}")

MLP model implementation

In [None]:
# MLP
input_size = X.shape[1] - 1 
layers_list = [input_size, 10, 1]
model_MLP = MLP(layers_list)
metrics_MLP = utils.cross_validate(model_MLP, X, y, cfg.k, cfg.epoch, cfg.lr, cfg.batch_size, cfg.gd)
print(f"MLP evaluation: {metrics_MLP}")

Results with all the features using 4 models

In [12]:
print("Linear Regression evaluation:{'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}\nPerceptron evaluation: {'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}\nLogistic Regression evaluation: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092} \nMLP evaluation: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.9338183500000001, 'f1': 0.9495677504357161}")

Linear Regression evaluation:{'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}
Perceptron evaluation: {'accuracy': 0.031, 'recall': 0.031, 'precision': 0.000961, 'f1': 0.0018642095053346267}
Logistic Regression evaluation: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092} 
MLP evaluation: {'accuracy': 0.9661, 'recall': 0.9661, 'precision': 0.9338183500000001, 'f1': 0.9495677504357161}


Using different features to train the logistic regression model

In [None]:
# trainning and testing sets without two temperature features
X_train_LR1 = X_train[:,2:]
X_test_LR1 = X_test[:,2:]
# trainning and testing sets with only two temperature features
X_train_LR2 = X_train[:,0:2]
X_test_LR2 = X_test[:,0:2]
model_LR = LogisticRegression(n_feature=X_train.shape[1], epoch=cfg.epoch, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_LR1 = LogisticRegression(n_feature=X_train_LR1.shape[1], epoch=cfg.epoch, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_LR2 = LogisticRegression(n_feature=X_train_LR2.shape[1], epoch=cfg.epoch, lr=cfg.lr, tol=cfg.tol, wandb=cfg.wandb_on_off, gd=cfg.gd)
model_LR.fit(X_train, y_train)
model_LR1.fit(X_train_LR1, y_train)
model_LR2.fit(X_train_LR2, y_train)
# Evaluate each model for comparison
metrics_LR = model_LR._evaluate(X_test, y_test)
metrics_LR1 = model_LR1._evaluate(X_test_LR1, y_test)
metrics_LR2 = model_LR2._evaluate(X_test_LR2, y_test)
print(f"Logistic Regression evaluation with all features: {metrics_LR}")
print(f"Logistic Regression evaluation without two temperature features: {metrics_LR1}")
print(f"Logistic Regression evaluation with only two temperature features: {metrics_LR2}")

Results with all the features using logistic regression model

In [14]:
print("Logistic Regression evaluation with all features: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092}\nLogistic Regression evaluation without two temperature features: {'accuracy': 0.8946666666666667, 'recall': 0.8946666666666667, 'precision': 0.9553094565487982, 'f1': 0.9209330197885173}\nLogistic Regression evaluation with only two temperature features: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092}")

Logistic Regression evaluation with all features: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092}
Logistic Regression evaluation without two temperature features: {'accuracy': 0.8946666666666667, 'recall': 0.8946666666666667, 'precision': 0.9553094565487982, 'f1': 0.9209330197885173}
Logistic Regression evaluation with only two temperature features: {'accuracy': 0.969, 'recall': 0.969, 'precision': 0.9389609999999999, 'f1': 0.9537440325038092}
