# Data Cleaning and Preprocessing in Python (Pandas)

In [None]:
import pandas as pd

# Load the dataset (if it's in the same folder)
df = pd.read_csv("marketing_campaign.csv", sep='\t')  # adjust 'sep' if needed
df.head()

## Step 1: Explore Dataset

In [None]:
# Display basic info and stats
print(df.info())
print(df.describe())

## Step 2: Handle Missing Values

In [None]:
# Check for missing values
print(df.isnull().sum())

# Fill missing Income values with median
df['Income'].fillna(df['Income'].median(), inplace=True)

## Step 3: Remove Duplicates

In [None]:
# Drop duplicate rows
df.drop_duplicates(inplace=True)

## Step 4: Convert Data Types

In [None]:
# Convert Dt_Customer to datetime
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], format='%d-%m-%Y')

## Step 5: Clean Inconsistent Categorical Values

In [None]:
# Replace inconsistent Marital_Status entries
df['Marital_Status'] = df['Marital_Status'].replace({
    'Alone': 'Single',
    'Absurd': 'Single',
    'YOLO': 'Single'
})

## Step 6: Save Cleaned Dataset

In [None]:
# Save cleaned dataset to CSV
df.to_csv("cleaned_marketing_campaign.csv", index=False)

## ✅ Final Check

In [None]:
print("✅ Cleaning Completed")
print(f"Shape: {df.shape}")
print("Missing values:\n", df.isnull().sum())