# 1. Importing Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, confusion_matrix, 
    classification_report, roc_auc_score, roc_curve
)

💫 We import all the necessary libraries for performing **Logistic Regression analysis**.  
This includes tools for data handling, visualization, scaling, model building, and evaluation.

# 2. Loading and Preparing the Data

In [None]:
# Load data
df = pd.read_csv("your_data.csv")

# Replace with your column names
feature_columns = ["feature1", "feature2", "feature3"]   # Multiple features work well
target_column = "target_column"  # Should be categorical/binary

X = df[feature_columns]
y = df[target_column]

# Check target variable type
unique_classes = y.nunique()
print(f"Target variable has {unique_classes} unique classes: {y.unique()}")

💫 We load our dataset using `pd.read_csv()`.  
We separate **features (X)** and **target (y)**.  
We also check if the target variable is **binary** or **multiclass**.

# 3. Splitting Data into Training and Testing Sets

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

💫 We split the dataset into **training (80%)** and **testing (20%)** sets,  
while keeping the target class distribution balanced (`stratify=y`).

# 4. Feature Scaling

In [None]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

💫 We use **StandardScaler** to normalize features,  
ensuring better performance of Logistic Regression.

# 5. Training the Logistic Regression Model

In [None]:
# Train model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

💫 We train a **Logistic Regression model** on the scaled training data.

# 6. Making Predictions

In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)

💫 The model predicts both **class labels** (`y_pred`)  
and **class probabilities** (`y_pred_proba`).

# 7. Evaluating the Model

In [None]:
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("="*50)
print("LOGISTIC REGRESSION RESULTS")
print("="*50)
print(f"Features: {feature_columns}")
print(f"Target: {target_column}")
print(f"Number of classes: {unique_classes}")
print(f"Accuracy: {accuracy:.4f}")
print(f"\nConfusion Matrix:\n{conf_matrix}")
print(f"\nClassification Report:\n{classification_report(y_test, y_pred)}")

💫 We calculate key evaluation metrics:  
- **Accuracy**  
- **Confusion Matrix**  
- **Classification Report (Precision, Recall, F1-score)**

# 8. Visualizing the Confusion Matrix

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=model.classes_, yticklabels=model.classes_)
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

💫 A **heatmap** of the confusion matrix provides  
a clear picture of how well the model classified each class.

# 9. Plotting the ROC Curve (for Binary Classification)

In [None]:
if unique_classes == 2:
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba[:, 1])
    roc_auc = roc_auc_score(y_test, y_pred_proba[:, 1])

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, 
             label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

💫 The **ROC Curve** evaluates the model’s ability  
to distinguish between two classes.  
The **AUC score** summarizes this performance.