In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

# Set the random seed for reproducibility
np.random.seed(42)

# Generate the imbalanced classification dataset
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
                           n_clusters_per_class=1, weights=[0.95, 0.05], random_state=42)

# Plot the original imbalanced data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Original Imbalanced Dataset')
plt.show()

# Verify the class distribution using numpy or Counter
class_distribution = np.bincount(y)
class_percentage = class_distribution / len(y) * 100
print("Class Distribution:")
print("Class 0:", class_distribution[0], "samples", f"({class_percentage[0]:.2f}%)")
print("Class 1:", class_distribution[1], "samples", f"({class_percentage[1]:.2f}%)")
print()

# Perform oversampling using SMOTE
smote = SMOTE(sampling_strategy=0.25, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Verify the number of samples after oversampling
resampled_class_distribution = Counter(y_resampled)
resampled_class_percentage = [resampled_class_distribution[0] / len(y_resampled) * 100,
                              resampled_class_distribution[1] / len(y_resampled) * 100]
print("Class Distribution after SMOTE:")
print("Class 0:", resampled_class_distribution[0], "samples", f"({resampled_class_percentage[0]:.2f}%)")
print("Class 1:", resampled_class_distribution[1], "samples", f"({resampled_class_percentage[1]:.2f}%)")
print()

# Plot the oversampled data
plt.scatter(X_resampled[:, 0], X_resampled[:, 1], c=y_resampled, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Oversampled Dataset')
plt.show()

# Perform undersampling using RandomUnderSampler
rus = RandomUnderSampler(sampling_strategy=0.15, random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, y)

# Verify the number of samples after undersampling
resampled_class_distribution = Counter(y_resampled)
resampled_class_percentage = [resampled_class_distribution[0] / len(y_resampled) * 100,
                              resampled_class_distribution[1] / len(y_resampled) * 100]
print("Class Distribution after RandomUnderSampler:")
print("Class 0:", resampled_class_distribution[0], "samples", f"({resampled_class_percentage[0]:.2f}%)")
print("Class 1:", resampled_class_distribution[1], "samples", f"({resampled_class_percentage[1]:.2f}%)")
