* ## Marketing Campaign

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [9]:
import pandas as pd


df = pd.read_csv("marketing_campaign.csv", sep="\t")

# 1. Remove duplicate rows
df = df.drop_duplicates()

# 2. Handle missing values
df['Income'].fillna(df['Income'].median(), inplace=True)

# 3. Standardize column names
df.columns = df.columns.str.lower().str.replace(" ", "_")

# 4. Convert 'dt_customer' column to datetime
df['dt_customer'] = pd.to_datetime(df['dt_customer'], format="%d-%m-%Y")

# 5. Standardize categorical values

# Education: group '2n Cycle' and 'Basic' as 'Undergraduate'
df['education'] = df['education'].replace({
    '2n Cycle': 'Undergraduate',
    'Basic': 'Undergraduate'
})

# Marital Status: group unusual values under 'Other'
df['marital_status'] = df['marital_status'].replace({
    'Alone': 'Other',
    'Absurd': 'Other',
    'YOLO': 'Other'
})

# 6. (Optional) Drop columns with constant values (e.g., z_costcontact, z_revenue)
# These columns are not useful for analysis
df.drop(columns=['z_costcontact', 'z_revenue'], inplace=True)

# 7. Save the cleaned dataset
df.to_csv("marketing_campaign_cleaned.csv", index=False)

print("✅ Cleaning complete. File saved as 'marketing_campaign_cleaned.csv'")


✅ Cleaning complete. File saved as 'marketing_campaign_cleaned.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Income'].fillna(df['Income'].median(), inplace=True)
