In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [2]:
# Set the random seed for reproducibility
random.seed(42)

In [3]:
# Load Dataset
file_path = "data"
df = pd.read_excel(file_path)

In [4]:
# Drop unwanted columns
df = df.drop(['Patient', 'Gender', 'Age', 'Event', 'Scale factor', 'SNR'], axis='columns')

# Normalize the selected features
features_to_normalize = df.columns.difference(['NP-SLE'])
scaler = MinMaxScaler()
df[features_to_normalize] = scaler.fit_transform(df[features_to_normalize])


In [5]:
# Separate features and target variable
X = df.drop(['NP-SLE'], axis=1)
y = df['NP-SLE']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Hyperparameter Tuning
param_grid = {
    'C': np.logspace(-4, 4, 20),
    'penalty': ['l1', 'l2']
}

# Create Logistic Regression classifier
logistic_regression = LogisticRegression(solver='liblinear')

# Initialize GridSearchCV
grid_search = GridSearchCV(logistic_regression, param_grid, cv=5, verbose=0, scoring='accuracy')

# Fit the GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Train the best model
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, zero_division=0)

# Print results
print("Accuracy on Test Set:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Best Hyperparameters: {'C': 0.08858667904100823, 'penalty': 'l2'}
Accuracy on Test Set: 0.6
Confusion Matrix:
 [[0 4 0]
 [0 2 0]
 [0 0 4]]
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.33      1.00      0.50         2
           2       1.00      1.00      1.00         4

    accuracy                           0.60        10
   macro avg       0.44      0.67      0.50        10
weighted avg       0.47      0.60      0.50        10

