In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import joblib

# Loading preprocessed data
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv').values.ravel()
y_test = pd.read_csv('y_test.csv').values.ravel()

# Training Logistic Regression
model = LogisticRegression(random_state=42, max_iter=1000, multi_class='ovr')
model.fit(X_train, y_train)

# Saving the model
joblib.dump(model, 'logistic_regression_model.pkl')

# Making predictions
y_pred = model.predict(X_test)

# Calculating metrics
metrics = {
    'Accuracy': accuracy_score(y_test, y_pred),
    'Precision': precision_score(y_test, y_pred, average='weighted'),
    'Recall': recall_score(y_test, y_pred, average='weighted'),
    'F1-Score': f1_score(y_test, y_pred, average='weighted')
}
print("Logistic Regression Metrics:\n", metrics)

# Saving metrics
with open('logistic_regression_metrics.txt', 'w') as f:
    for metric, value in metrics.items():
        f.write(f"{metric}: {value:.4f}\n")

# Plotting confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Logistic Regression Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('logistic_regression_cm.png')
plt.close()

# Plotting ROC curve for multiclass
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
y_score = model.predict_proba(X_test)
fpr, tpr, roc_auc = {}, {}, {}
for i in range(3):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(8, 6))
for i in range(3):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve (class {i}, AUC = {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Logistic Regression ROC Curve')
plt.legend(loc='lower right')
plt.savefig('logistic_regression_roc.png')
plt.close()

# Saving ROC AUC scores
with open('logistic_regression_roc_auc.txt', 'w') as f:
    for i in range(3):
        f.write(f"Class {i} AUC: {roc_auc[i]:.4f}\n")

print("Logistic Regression training and evaluation completed.")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression Metrics:
 {'Accuracy': 0.5964912280701754, 'Precision': 0.35580178516466604, 'Recall': 0.5964912280701754, 'F1-Score': 0.4457297088876036}
Logistic Regression training and evaluation completed.
