<a href="https://colab.research.google.com/github/MohiniRathore20/Aiml-Lab--Mohini-rathore/blob/main/Aiml%20Exp9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Step 1: Import Libraries and Dataset
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Generate an imbalanced dataset
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=2,
    n_redundant=10,
    n_clusters_per_class=1,
    weights=[0.9, 0.1],  # Imbalanced ratio
    random_state=42
)

# Convert to DataFrame for exploration
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
df['target'] = y

# Show class distribution
print("Class Distribution (Before Balancing):")
print(df['target'].value_counts())

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Step 2: Implement Balancing Techniques
# Random Oversampling
ros = RandomOverSampler(random_state=42)
X_train_ros, y_train_ros = ros.fit_resample(X_train, y_train)

# Random Undersampling
rus = RandomUnderSampler(random_state=42)
X_train_rus, y_train_rus = rus.fit_resample(X_train, y_train)

# SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Step 3: Define a function to train and evaluate a model
def train_and_evaluate(X_train, y_train, X_test, y_test, class_weight=None):
    model = LogisticRegression(class_weight=class_weight, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    report = classification_report(y_test, y_pred, output_dict=True)
    auc_roc = roc_auc_score(y_test, y_pred_proba)

    return {
        "Precision": report["1"]["precision"],
        "Recall": report["1"]["recall"],
        "F1-Score": report["1"]["f1-score"],
        "AUC-ROC": auc_roc
    }

# Step 4: Evaluate the model on different datasets
results = {}

# Imbalanced Dataset
results["Imbalanced"] = train_and_evaluate(X_train, y_train, X_test, y_test)

# Random Oversampling
results["Random Oversampling"] = train_and_evaluate(X_train_ros, y_train_ros, X_test, y_test)

# Random Undersampling
results["Random Undersampling"] = train_and_evaluate(X_train_rus, y_train_rus, X_test, y_test)

# SMOTE
results["SMOTE"] = train_and_evaluate(X_train_smote, y_train_smote, X_test, y_test)

# Class Weighting
results["Class Weighting"] = train_and_evaluate(X_train, y_train, X_test, y_test, class_weight="balanced")

# Step 5: Summarize the results
results_df = pd.DataFrame(results).T
print("\nPerformance Metrics:")
print(results_df)


Class Distribution (Before Balancing):
target
0    894
1    106
Name: count, dtype: int64

Performance Metrics:
                      Precision   Recall  F1-Score   AUC-ROC
Imbalanced             1.000000  0.71875  0.836364  0.973414
Random Oversampling    0.658537  0.84375  0.739726  0.962687
Random Undersampling   0.491525  0.90625  0.637363  0.970149
SMOTE                  0.650000  0.81250  0.722222  0.966884
Class Weighting        0.658537  0.84375  0.739726  0.965135
