In [2]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE

def balance_binary_classes(df, target_col='target', random_state=42):
    """
    Balances binary classification data by oversampling the minority class using SMOTE.
    
    Parameters:
    - df: Input DataFrame with features and target
    - target_col: Name of the target column (default: 'target')
    - random_state: For reproducibility
    
    Returns:
    - Balanced DataFrame with both classes having equal representation
    """
    
    X = df.drop(columns=[target_col])
    y = df[target_col]
    
    # Find the majority and minority class
    class_counts = y.value_counts()
    majority_class = class_counts.idxmax()
    minority_class = class_counts.idxmin()
    
    # Apply SMOTE to balance the dataset
    smote = SMOTE(sampling_strategy='auto', random_state=random_state)
    X_res, y_res = smote.fit_resample(X, y)
    
    # Create balanced DataFrame
    balanced_df = pd.DataFrame(X_res, columns=X.columns)
    balanced_df[target_col] = y_res
    
    return balanced_df

# Usage:
df = pd.read_csv(r'D:\Machine-Learning\ECG-Based Arrhythmia Detection\Binary_Classification\Binary.csv')
balanced_df = balance_binary_classes(df)
print(balanced_df['target'].value_counts())
print(balanced_df.shape)


target
1    437
0    437
Name: count, dtype: int64
(874, 30)


In [3]:
balanced_df.to_csv('Balanced_binary.csv', index=False)