In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("/content/marketing_campaign.csv")  # Change this to your actual file name

# 1. Identify and handle missing values
missing_values = df.isnull().sum()
print("Missing values before cleaning:\n", missing_values)

# Option: Drop rows with any nulls (you can choose to fill instead)
df = df.dropna()

# 2. Remove duplicate rows
df = df.drop_duplicates()

# 3. Standardize text values
# Example for gender and country columns (adjust according to your dataset)
text_columns = ['gender', 'country']  # Add relevant text columns
for col in text_columns:
    if col in df.columns:
        df[col] = df[col].astype(str).str.lower().str.strip()

        if col == 'gender':
            df[col] = df[col].replace({'m': 'male', 'f': 'female'})

# 4. Convert date formats
# Example for a 'date' column
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')

# 5. Rename columns: lowercase, no spaces
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# 6. Fix data types
# Example for age column
if 'age' in df.columns:
    df['age'] = pd.to_numeric(df['age'], errors='coerce').astype('Int64')

# Export cleaned dataset
df.to_csv("cleaned_dataset.csv", index=False)
print("Cleaned dataset saved as cleaned_dataset.csv")


Missing values before cleaning:
 ID\tYear_Birth\tEducation\tMarital_Status\tIncome\tKidhome\tTeenhome\tDt_Customer\tRecency\tMntWines\tMntFruits\tMntMeatProducts\tMntFishProducts\tMntSweetProducts\tMntGoldProds\tNumDealsPurchases\tNumWebPurchases\tNumCatalogPurchases\tNumStorePurchases\tNumWebVisitsMonth\tAcceptedCmp3\tAcceptedCmp4\tAcceptedCmp5\tAcceptedCmp1\tAcceptedCmp2\tComplain\tZ_CostContact\tZ_Revenue\tResponse    0
dtype: int64
Cleaned dataset saved as cleaned_dataset.csv
