In [None]:
"""
Data Cleaning Script using Pandas
---------------------------------
This script handles missing values in the dataset using:
- Mean for continuous numerical columns
- Median for skewed numerical columns
- Mode for categorical columns
"""

import pandas as pd

# Load dataset
df = pd.read_csv("eda_practice_dataset.csv")

# -------------------------
# Handle numerical columns
# -------------------------

# Fill missing values with mean
mean_columns = ['age', 'hours_per_week']
for col in mean_columns:
    df[col] = df[col].fillna(df[col].mean()).round(2))

# Fill missing values with median
median_columns = ['salary', 'experience_years', 'performance_score']
for col in median_columns:
    df[col] = df[col].fillna(df[col].median()).round(2)

# -------------------------
# Handle categorical columns
# -------------------------

categorical_columns = [
    'city',
    'education_level',
    'gender',
    'department',
    'attrition'
]

for col in categorical_columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# -------------------------
# Save cleaned dataset
# -------------------------

df.to_csv("cleaned_data.csv", index=False)

print("âœ… Data cleaning completed successfully.")
