## Load DataSet

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset (Replace 'your_dataset.csv' with actual file name)
df = pd.read_csv('your_dataset.csv')

# Display basic info
print(df.info())

# Check for missing values
print(df.isnull().sum())


## Impute Missing Values

In [None]:
# Impute numerical columns using mean
for col in df.select_dtypes(include=['number']).columns:
    df[col].fillna(df[col].mean(), inplace=True)

# Impute categorical columns using mode
for col in df.select_dtypes(include=['object']).columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Interpolation for sequential data
df.interpolate(method='linear', inplace=True)

## Compate before and after Imputation

In [None]:
plt.figure(figsize=(10, 5))
sns.heatmap(df.isnull(), cmap='viridis', cbar=False)
plt.title("Missing Values After Imputation")
plt.show()


#  Balancing Data with SMOTE & Undersampling

In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

# Load dataset (Replace with your dataset)
df = pd.read_csv('imbalanced_dataset.csv')

# Check class distribution
print("Before Balancing:", Counter(df['target']))  # Replace 'target' with actual class column


## Apply SMOTE

In [None]:
X = df.drop('target', axis=1)
y = df['target']

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)

print("After SMOTE:", Counter(y_smote))


## Apply random Undersampling

In [None]:
undersample = RandomUnderSampler(sampling_strategy='auto', random_state=42)
X_under, y_under = undersample.fit_resample(X, y)

print("After Undersampling:", Counter(y_under))


# Visualizing Data (Before & After Preprocessing)

## Class Distribution Before & After Balancing

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(x=y, palette='coolwarm')
plt.title("Before Balancing")
plt.show()

plt.figure(figsize=(10, 5))
sns.countplot(x=y_smote, palette='coolwarm')
plt.title("After SMOTE (Over-sampling)")
plt.show()

plt.figure(figsize=(10, 5))
sns.countplot(x=y_under, palette='coolwarm')
plt.title("After Random Undersampling")
plt.show()
