<a href="https://colab.research.google.com/github/MOHAMMAD-ALI-ZAIDI/Credit-Card-Fraud-Detection/blob/main/Customer_Personality_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/marketing_campaign.csv", sep="\t")

# 1. Remove duplicate rows
df = df.drop_duplicates()

# 2. Handle missing values
# Fill missing 'Income' with the median income
df['Income'] = df['Income'].fillna(df['Income'].median())

# 3. Standardize text fields
df['Education'] = df['Education'].str.strip().str.title()
df['Marital_Status'] = df['Marital_Status'].str.strip().str.title()

# Fix known inconsistent values in 'Marital_Status'
df['Marital_Status'] = df['Marital_Status'].replace({
    'Alone': 'Single',
    'Absurd': 'Single',
    'Yolo': 'Single',
    'Divorced': 'Divorced',
    'Married': 'Married',
    'Together': 'Together',
    'Widow': 'Widow'
})

# 4. Convert 'Dt_Customer' to datetime
# If any missing dates, fill with a default value
df['Dt_Customer'] = df['Dt_Customer'].fillna("01-01-2014")
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], format='%d-%m-%Y', errors='coerce')

# Replace any parsing errors with the default date
df['Dt_Customer'] = df['Dt_Customer'].fillna(pd.to_datetime("2014-01-01"))

# 5. Rename columns
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("-", "_")
)

# 6. Fix data types (if necessary)
df = df.astype({
    'income': float,
    'year_birth': int,
    'kidhome': int,
    'teenhome': int,
    'recency': int,
    'z_costcontact': int,
    'z_revenue': int
})

# 7. Save cleaned data
df.to_csv("marketing_campaign_cleaned.csv", index=False)

# 8. Summary
print("✅ Cleaning complete!")
print("Rows:", df.shape[0])
print("Columns:", df.shape[1])
print("\n📋 Sample cleaned data:\n", df.head())


✅ Cleaning complete!
Rows: 2240
Columns: 29

📋 Sample cleaned data:
      id  year_birth   education marital_status   income  kidhome  teenhome  \
0  5524        1957  Graduation         Single  58138.0        0         0   
1  2174        1954  Graduation         Single  46344.0        1         1   
2  4141        1965  Graduation       Together  71613.0        0         0   
3  6182        1984  Graduation       Together  26646.0        1         0   
4  5324        1981         Phd        Married  58293.0        1         0   

  dt_customer  recency  mntwines  ...  numwebvisitsmonth  acceptedcmp3  \
0  2012-09-04       58       635  ...                  7             0   
1  2014-03-08       38        11  ...                  5             0   
2  2013-08-21       26       426  ...                  4             0   
3  2014-02-10       26        11  ...                  6             0   
4  2014-01-19       94       173  ...                  5             0   

   acceptedcmp4  