### Detecting & Handling Imbalanced Data: Visualizing Class Imbalance
**Question**: Load the Credit Card Fraud Detection dataset and visualize the class imbalance. Then apply random undersampling to balance it.

In [2]:
# write your code from here
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import resample

# --- Step 1: Load the Dataset ---
def load_data(file_path):
    """
    Load the Credit Card Fraud Detection dataset.
    
    Args:
        file_path (str): Path to the CSV file.
    
    Returns:
        pd.DataFrame: The loaded dataset.
    """
    try:
        # Load the dataset
        df = pd.read_csv(file_path)
        print(f"Data loaded successfully with {df.shape[0]} rows and {df.shape[1]} columns.")
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

# --- Step 2: Visualize Class Imbalance ---
def visualize_class_imbalance(df):
    """
    Visualize the class imbalance in the dataset.
    
    Args:
        df (pd.DataFrame): The dataset containing the class labels.
    """
    try:
        # Plot the class distribution
        plt.figure(figsize=(6, 4))
        sns.countplot(x='Class', data=df, palette='coolwarm')
        plt.title("Class Distribution: Fraud vs Non-Fraud")
        plt.xlabel("Class")
        plt.ylabel("Count")
        plt.show()
    except Exception as e:
        print(f"Error during visualization: {e}")

# --- Step 3: Apply Random Undersampling ---
def random_undersample(df):
    """
    Apply random undersampling to balance the dataset.
    
    Args:
        df (pd.DataFrame): The original imbalanced dataset.
        
    Returns:
        pd.DataFrame: The undersampled balanced dataset.
    """
    try:
        # Separate majority and minority classes
        majority = df[df['Class'] == 0]
        minority = df[df['Class'] == 1]

        # Undersample the majority class
        majority_undersampled = resample(majority, 
                                         replace=False,     # Sample without replacement
                                         n_samples=len(minority),  # Match the minority class size
                                         random_state=42)   # For reproducibility

        # Combine the undersampled majority class with the minority class
        balanced_df = pd.concat([majority_undersampled, minority])

        print(f"Balanced dataset created with {balanced_df.shape[0]} rows.")
        return balanced_df
    except Exception as e:
        print(f"Error during undersampling: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

# --- Step 4: Plot Balanced Dataset ---
def visualize_balanced_class_imbalance(df):
    """
    Visualize the class distribution after random undersampling.
    
    Args:
        df (pd.DataFrame): The balanced dataset.
    """
    try:
        # Plot the class distribution after undersampling
        plt.figure(figsize=(6, 4))
        sns.countplot(x='Class', data=df, palette='coolwarm')
        plt.title("Balanced Class Distribution: Fraud vs Non-Fraud")
        plt.xlabel("Class")
        plt.ylabel("Count")
        plt.show()
    except Exception as e:
        print(f"Error during visualization: {e}")

# --- Step 5: Execution ---
def main(file_path):
    # Load the data
    df = load_data(file_path)
    
    if not df.empty:
        # Step 2: Visualize class imbalance
        visualize_class_imbalance(df)
        
        # Step 3: Apply random undersampling
        balanced_df = random_undersample(df)
        
        if not balanced_df.empty:
            # Step 4: Visualize the balanced dataset
            visualize_balanced_class_imbalance(balanced_df)

# --- Run the Process ---
# Provide the path to your CSV file here
file_path = 'path_to_your_credit_card_fraud_detection_data.csv'
main(file_path)


Error loading data: [Errno 2] No such file or directory: 'path_to_your_credit_card_fraud_detection_data.csv'
