#### Import relevant libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

#### Load dataset

In [2]:
marketing_data = pd.read_csv("data/marketing_campaign.csv")

#### Subset for relevant columns

In [3]:
marketing_data = marketing_data[['MntWines','MntFruits', 'MntMeatProducts', 'MntFishProducts', 
                                 'MntSweetProducts', 'MntGoldProds', 'NumDealsPurchases', 
                                 'NumWebPurchases','NumCatalogPurchases', 'NumStorePurchases', 
                                 'NumWebVisitsMonth']]

#### Inspect first 5 rows and data types of the dataset

In [4]:
marketing_data.head()

Unnamed: 0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
0,635,88,546,172,88,88,3,8,10,4,7
1,11,1,6,2,1,6,2,1,1,2,5
2,426,49,127,111,21,42,1,8,2,10,4
3,11,4,20,10,3,5,2,2,0,4,6
4,173,43,118,46,27,15,5,5,3,6,5


In [5]:
marketing_data.shape

(2240, 11)

In [6]:
marketing_data.dtypes

MntWines               int64
MntFruits              int64
MntMeatProducts        int64
MntFishProducts        int64
MntSweetProducts       int64
MntGoldProds           int64
NumDealsPurchases      int64
NumWebPurchases        int64
NumCatalogPurchases    int64
NumStorePurchases      int64
NumWebVisitsMonth      int64
dtype: object

#### Scale the data

In [7]:
scaler = StandardScaler()
marketing_data_scaled = scaler.fit_transform(marketing_data)

#### Build Kmeans model

In [8]:
kmeans = KMeans(n_clusters= 4, init='k-means++',random_state = 1)

kmeans.fit(marketing_data_scaled)

KMeans(n_clusters=4, random_state=1)

In [9]:
label = kmeans.fit_predict(marketing_data_scaled)
marketing_data_output = marketing_data.copy()
marketing_data_output['cluster'] = label
marketing_data_output['cluster'].value_counts() 

0    1020
2     475
3     467
1     278
Name: cluster, dtype: int64

#### Profiling Clusters

In [10]:
cols  =['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts',
       'MntSweetProducts', 'MntGoldProds', 'NumDealsPurchases',
       'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases',
       'NumWebVisitsMonth']

In [11]:
overall_mean = marketing_data_output[cols].apply(np.mean).T
overall_mean = pd.DataFrame(overall_mean,columns =['overall_average'])
overall_mean

Unnamed: 0,overall_average
MntWines,303.935714
MntFruits,26.302232
MntMeatProducts,166.95
MntFishProducts,37.525446
MntSweetProducts,27.062946
MntGoldProds,44.021875
NumDealsPurchases,2.325
NumWebPurchases,4.084821
NumCatalogPurchases,2.662054
NumStorePurchases,5.790179


In [12]:
cluster_mean = marketing_data_output.groupby('cluster')[cols].mean().T
cluster_mean

cluster,0,1,2,3
MntWines,40.580392,535.892086,627.526316,411.929336
MntFruits,4.913725,98.348921,40.991579,15.188437
MntMeatProducts,21.498039,460.676259,363.021053,110.357602
MntFishProducts,7.219608,133.233813,63.473684,20.351178
MntSweetProducts,5.066667,103.719424,40.835789,15.464668
MntGoldProds,14.696078,98.370504,61.261053,58.186296
NumDealsPurchases,1.869608,1.438849,1.677895,4.505353
NumWebPurchases,2.017647,5.636691,5.277895,6.462527
NumCatalogPurchases,0.556863,5.683453,5.635789,2.436831
NumStorePurchases,3.228431,8.241007,8.713684,6.952891


In [13]:
pd.concat([cluster_mean,overall_mean],axis =1)

Unnamed: 0,0,1,2,3,overall_average
MntWines,40.580392,535.892086,627.526316,411.929336,303.935714
MntFruits,4.913725,98.348921,40.991579,15.188437,26.302232
MntMeatProducts,21.498039,460.676259,363.021053,110.357602,166.95
MntFishProducts,7.219608,133.233813,63.473684,20.351178,37.525446
MntSweetProducts,5.066667,103.719424,40.835789,15.464668,27.062946
MntGoldProds,14.696078,98.370504,61.261053,58.186296,44.021875
NumDealsPurchases,1.869608,1.438849,1.677895,4.505353,2.325
NumWebPurchases,2.017647,5.636691,5.277895,6.462527,4.084821
NumCatalogPurchases,0.556863,5.683453,5.635789,2.436831,2.662054
NumStorePurchases,3.228431,8.241007,8.713684,6.952891,5.790179
