In [91]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import pandas as pd
from sklearn.preprocessing import StandardScaler
from datetime import datetime
 
df = pd.read_csv("marketing_campaign.csv",sep='\t',header=(0))
df = df.dropna() # income contains nulls
print(df.nunique())

#Feature engineering
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], format='%d-%m-%Y')
df['Days_Since_Joining'] = (pd.to_datetime('today') - df['Dt_Customer']).dt.days
df['Year_Enrolled'] = df['Dt_Customer'].dt.year
df['Customer_Age'] = (pd.to_datetime('today').year - df['Year_Birth'])

# Aggegrating similar columns
df['Spending'] = df[['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum(axis=1)
df = df.drop(['Dt_Customer', 'Z_Revenue', 'Z_CostContact', 'ID', 'Year_Birth'], axis=1)


print(df.info())

ID                     2216
Year_Birth               59
Education                 5
Marital_Status            8
Income                 1974
Kidhome                   3
Teenhome                  3
Dt_Customer             662
Recency                 100
MntWines                776
MntFruits               158
MntMeatProducts         554
MntFishProducts         182
MntSweetProducts        176
MntGoldProds            212
NumDealsPurchases        15
NumWebPurchases          15
NumCatalogPurchases      14
NumStorePurchases        14
NumWebVisitsMonth        16
AcceptedCmp3              2
AcceptedCmp4              2
AcceptedCmp5              2
AcceptedCmp1              2
AcceptedCmp2              2
Complain                  2
Z_CostContact             1
Z_Revenue                 1
Response                  2
dtype: int64
<class 'pandas.core.frame.DataFrame'>
Index: 2216 entries, 0 to 2239
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype  
---  ------            

In [92]:
print(df[['Education']].value_counts())
print(df[['Marital_Status']].value_counts())

df['Education'] = df['Education'].replace(
    {'Graduation': 'Graduation',
     'PhD': 'Postgraduate',
     'Master': 'Postgraduate',
     '2n Cycle': 'Postgraduate',
     'Basic': 'Other'})

print(df[['Education']].value_counts())

df['Marital_Status'] = df['Marital_Status'].replace(
    {'Married': 'Together',
    'Together': 'Together',
    'Single': 'Single',
    'Divorced': 'Single',
    'Widow': 'Single',
    'Alone': 'Single',
    'Absurd': 'Single',
    'YOLO': 'Single'
    })

print(df[['Marital_Status']].value_counts())

df = pd.get_dummies(df, columns=['Education', 'Marital_Status']).astype(int)

Education 
Graduation    1116
PhD            481
Master         365
2n Cycle       200
Basic           54
Name: count, dtype: int64
Marital_Status
Married           857
Together          573
Single            471
Divorced          232
Widow              76
Alone               3
Absurd              2
YOLO                2
Name: count, dtype: int64
Education   
Graduation      1116
Postgraduate    1046
Other             54
Name: count, dtype: int64
Marital_Status
Together          1430
Single             786
Name: count, dtype: int64


In [93]:
df.head(2)

Unnamed: 0,Income,Kidhome,Teenhome,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,...,Response,Days_Since_Joining,Year_Enrolled,Customer_Age,Spending,Education_Graduation,Education_Other,Education_Postgraduate,Marital_Status_Single,Marital_Status_Together
0,58138,0,0,58,635,88,546,172,88,88,...,1,4423,2012,67,1617,1,0,0,1,0
1,46344,1,1,38,11,1,6,2,1,6,...,0,3873,2014,70,27,1,0,0,1,0


In [100]:
from sklearn.cluster import KMeans

features = df

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(features)
print(df['Cluster'].value_counts())
cluster_profiles = df.groupby('Cluster').mean()
print(cluster_profiles)

Cluster
1    827
0    787
2    602
Name: count, dtype: int64
               Income   Kidhome  Teenhome    Recency    MntWines  MntFruits  \
Cluster                                                                       
0        29087.686150  0.804320  0.337992  48.612452   34.158831   6.123253   
1        54410.155985  0.362757  0.805320  49.345828  323.383313  21.239420   
2        79552.659468  0.076412  0.312292  49.078073  634.156146  59.835548   

         MntMeatProducts  MntFishProducts  MntSweetProducts  MntGoldProds  \
Cluster                                                                     
0              26.119441         9.279543          6.170267     18.273189   
1             114.304716        28.903265         20.130593     48.220073   
2             423.549834        86.709302         63.774086     71.707641   

         ...  Response  Days_Since_Joining  Year_Enrolled  Customer_Age  \
Cluster  ...                                                              
0      