#
---
# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from lightgbm import LGBMClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, fbeta_score, precision_score
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier
import torch
import torch.nn as nn
import torch.optim as optim
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F

#
---
# Importing Dataset

In [None]:
data = np.load('Data/data.npz')
trainx = data['arr1']
trainy = data['arr2']
testx = data['arr3']
testy = data['arr4']

#
---
# Machine Learning Models

#### ◉ Logistic Regression (Python)

In [None]:
%%time
# Define the Logistic Regression model
model = LogisticRegression(solver='saga', max_iter=100000)

# Define the parameter grid for Grid Search
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]
}

# Create Grid Search with cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit the model to find the best parameters
grid_search.fit(trainx, trainy)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Use the best parameters to create the final model
best_model = LogisticRegression(**best_params, solver='saga', max_iter=100000)
best_model.fit(trainx, trainy)

# Make predictions on the training set
trainy_pred = best_model.predict(trainx)

# Evaluate the model on the training set
accuracy_train = accuracy_score(trainy, trainy_pred)
print("Training Accuracy:", accuracy_train)

# Make predictions on the test set
testy_pred = best_model.predict(testx)

# Evaluate the model on the test set
accuracy = accuracy_score(testy, testy_pred)
report = classification_report(testy, testy_pred)
auc = roc_auc_score(testy, testy_pred)
f2 = fbeta_score(testy, testy_pred, beta=2)
precision_per_class = precision_score(testy, testy_pred, average=None)

# Additional Metrics
print("Accuracy:", accuracy)
print("AUC:", auc)
print("F2-score:", f2)
print("Precision per class:", precision_per_class)
print("Classification Report:\n", report)

# Confusion Matrix Visualization
plt.rcParams['figure.figsize'] = (4, 4)
sns.heatmap(confusion_matrix(testy, testy_pred), annot=True, fmt='d', linewidths=.5, cmap="YlGnBu")
plt.title('Confusion Matrix')
plt.show()

# True Positive, True Negative, False Positive, False Negative
tp = confusion_matrix(testy, testy_pred)[1][1]
tn = confusion_matrix(testy, testy_pred)[0][0]
fp = confusion_matrix(testy, testy_pred)[0][1]
fn = confusion_matrix(testy, testy_pred)[1][0]

print('True Positive Cases: {}'.format(tp))
print('True Negative Cases: {}'.format(tn))
print('False Positive Cases: {}'.format(fp))
print('False Negative Cases: {}'.format(fn))

---
#### ◉ Logistic Regression (Cython + Parallelized GridSearchCV Process)

In [None]:
%%cython -a
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, fbeta_score, precision_score, confusion_matrix
import matplotlib.pyplot as plt
from joblib import parallel_backend
import seaborn as sns
cimport numpy as np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)

def logistic_regression_cython(np.ndarray[np.float64_t, ndim=2] trainx, np.ndarray[np.int64_t, ndim=1] trainy, np.ndarray[np.float64_t, ndim=2] testx, np.ndarray[np.int64_t, ndim=1] testy):
    # Define the Logistic Regression model
    model = LogisticRegression(solver='saga', max_iter=100000)

    # Define the parameter grid for Grid Search
    param_grid = {
        'penalty': ['l1', 'l2'],
        'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]
    }

    with parallel_backend('multiprocessing', n_jobs=-1):
        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

        # Fit the model to find the best parameters
        grid_search.fit(trainx, trainy)

    # Get the best parameters
    best_params = grid_search.best_params_
    print("Best Parameters:", best_params)

    # Use the best parameters to create the final model
    best_model = LogisticRegression(**best_params, solver='saga', max_iter=100000)
    best_model.fit(trainx, trainy)

    # Make predictions on the training set
    trainy_pred = best_model.predict(trainx)

    # Evaluate the model on the training set
    accuracy_train = accuracy_score(trainy, trainy_pred)
    print("Training Accuracy:", accuracy_train)

    # Make predictions on the test set
    testy_pred = best_model.predict(testx)

    # Evaluate the model on the test set
    accuracy = accuracy_score(testy, testy_pred)
    report = classification_report(testy, testy_pred)
    auc = roc_auc_score(testy, testy_pred)
    f2 = fbeta_score(testy, testy_pred, beta=2)
    precision_per_class = precision_score(testy, testy_pred, average=None)

    # Additional Metrics
    print("Accuracy:", accuracy)
    print("AUC:", auc)
    print("F2-score:", f2)
    print("Precision per class:", precision_per_class)
    print("Classification Report:\n", report)

    # Confusion Matrix Visualization
    plt.rcParams['figure.figsize'] = (4, 4)
    sns.heatmap(confusion_matrix(testy, testy_pred), annot=True, fmt='d', linewidths=.5, cmap="YlGnBu")
    plt.title('Confusion Matrix')
    plt.show()

    # True Positive, True Negative, False Positive, False Negative
    tp = confusion_matrix(testy, testy_pred)[1][1]
    tn = confusion_matrix(testy, testy_pred)[0][0]
    fp = confusion_matrix(testy, testy_pred)[0][1]
    fn = confusion_matrix(testy, testy_pred)[1][0]

    print('True Positive Cases: {}'.format(tp))
    print('True Negative Cases: {}'.format(tn))
    print('False Positive Cases: {}'.format(fp))
    print('False Negative Cases: {}'.format(fn))

In [None]:
%%time
logistic_regression_cython(trainx, trainy, testx, testy)