In [None]:
#  Customer Personality Data Cleaning

#  Importing Libraries
import pandas as pd

#  Load Raw Dataset
df = pd.read_csv('customer_personality_150_raw.csv')

# Explore Data
print("Initial Info:\n")
print(df.info())
print("\nFirst 5 rows:\n")
print(df.head())

#  Step 1: Drop rows with missing Age (critical field)
df = df.dropna(subset=['Age'])

#  Step 2: Fill Gender and Country missing values with 'Unknown'
df['Gender'] = df['Gender'].fillna('Unknown')
df['Country'] = df['Country'].fillna('Unknown')

#  Step 3: Clean up formatting
df['Gender'] = df['Gender'].str.strip().str.lower()
df['Country'] = df['Country'].str.strip().str.title()

#  Step 4: Convert 'Join Date' to datetime
df['Join Date'] = pd.to_datetime(df['Join Date'], dayfirst=True, errors='coerce')

#  Step 5: Remove duplicates
df = df.drop_duplicates()

#  Step 6: Rename columns
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

#  Step 7: Save Cleaned Data
df.to_csv('customer_personality_150_cleaned.csv', index=False)

#  Show final data
print("\nCleaned Data Preview:")
print(df.head())