In [1]:
%matplotlib inline

from pathlib import Path

import pandas as pd
import matplotlib.pylab as plt
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

from surprise import dataset, Reader
from surprise.prediction_algorithms import KNNBasic
from surprise.model_selection import train_test_split

In [2]:
DATA = Path('marketing')

In [3]:
market_df = pd.read_csv(DATA / 'marketing.csv', 
                        names= ['Income', 'Sex', 'MaritalStatus', 'Age', 'Education', 'Occupation', 'YearsInBayAreas', 'DualIncome', 'NumberInHoushold', 'NumberOfChildren',
                                'HousholderStatus', 'TypeOfHome', 'EthnicClassification', 'LanguageInHome'])
market_df.head(5)

Unnamed: 0,Income,Sex,MaritalStatus,Age,Education,Occupation,YearsInBayAreas,DualIncome,NumberInHoushold,NumberOfChildren,HousholderStatus,TypeOfHome,EthnicClassification,LanguageInHome
0,9,2,1.0,5,4.0,5.0,5.0,3,3.0,0,1.0,1.0,7.0,
1,9,1,1.0,5,5.0,5.0,5.0,3,5.0,2,1.0,1.0,7.0,1.0
2,9,2,1.0,3,5.0,1.0,5.0,2,3.0,1,2.0,3.0,7.0,1.0
3,1,2,5.0,1,2.0,6.0,5.0,1,4.0,2,3.0,1.0,7.0,1.0
4,1,2,5.0,1,2.0,6.0,3.0,1,4.0,2,3.0,1.0,7.0,1.0


In [4]:
market_df = market_df.dropna()
market_df.shape

(6876, 14)

In [5]:
#bin the ordinal data 
market_df_binned = market_df
market_df_binned['Age'] = pd.cut(market_df['Age'], bins= [0,4,7], labels=['AgeBlw45', 'AgeAbv45'])
market_df_binned['Education'] = pd.cut(market_df['Education'], bins= [0,3,6], labels=['HighSchool', 'College'])
market_df_binned['Income'] = pd.cut(market_df['Income'], bins= [0,5,9], labels=['IncBlw30K', 'IncAbv30K'])
market_df_binned['YearsInBayAreas'] = pd.cut(market_df['YearsInBayAreas'], bins= [0,3,5], labels=['YrsBlw7', 'YrsAbv7'])
market_df_binned['NumberInHoushold'] = pd.cut(market_df['NumberInHoushold'], bins= [0,5,9], labels=['NumHousBlw6', 'NumHousAbv6'])
market_df_binned['NumberOfChildren'] = pd.cut(market_df['NumberOfChildren'], bins= [-1,5,9], labels=['NumChlBlw6', 'NumChlAbv6'])

In [6]:
market_df_binned

Unnamed: 0,Income,Sex,MaritalStatus,Age,Education,Occupation,YearsInBayAreas,DualIncome,NumberInHoushold,NumberOfChildren,HousholderStatus,TypeOfHome,EthnicClassification,LanguageInHome
1,IncAbv30K,1,1.0,AgeAbv45,College,5.0,YrsAbv7,3,NumHousBlw6,NumChlBlw6,1.0,1.0,7.0,1.0
2,IncAbv30K,2,1.0,AgeBlw45,College,1.0,YrsAbv7,2,NumHousBlw6,NumChlBlw6,2.0,3.0,7.0,1.0
3,IncBlw30K,2,5.0,AgeBlw45,HighSchool,6.0,YrsAbv7,1,NumHousBlw6,NumChlBlw6,3.0,1.0,7.0,1.0
4,IncBlw30K,2,5.0,AgeBlw45,HighSchool,6.0,YrsBlw7,1,NumHousBlw6,NumChlBlw6,3.0,1.0,7.0,1.0
5,IncAbv30K,1,1.0,AgeAbv45,College,8.0,YrsAbv7,3,NumHousBlw6,NumChlBlw6,1.0,1.0,7.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8988,IncBlw30K,2,5.0,AgeBlw45,HighSchool,2.0,YrsAbv7,1,NumHousBlw6,NumChlBlw6,3.0,1.0,7.0,1.0
8989,IncBlw30K,1,5.0,AgeBlw45,College,1.0,YrsAbv7,1,NumHousBlw6,NumChlBlw6,3.0,1.0,7.0,1.0
8990,IncBlw30K,2,5.0,AgeBlw45,HighSchool,1.0,YrsAbv7,1,NumHousBlw6,NumChlBlw6,3.0,1.0,7.0,1.0
8991,IncBlw30K,1,1.0,AgeAbv45,College,3.0,YrsAbv7,2,NumHousBlw6,NumChlBlw6,2.0,3.0,7.0,1.0


In [7]:
#make keys for categorical data so dummies are more informative
key_Sex = {1:'Male', 2:'Female'}
key_MaritalStatus = {1: 'Married', 2: 'LivingTogetherUnmarried', 3: "Divorced/Seperated", 4: 'Widowed', 5:'Single/NeverMarried'}
key_Occupation = {1: 'Professional/Managerial', 2:'SalesWorker', 3:'Factory Worker/Laborer/Driver', 4: 'Clerical/Service Worker', 5:'Homemaker', 6:'Student', 7:'Military', 8:'Retired', 9:'Unemployed'}
key_DualIncome = {1:"Unmarried", 2:"Yes", 3:'No'}
key_HousholderStatus = {1:'Own', 2:'Rent', 3:'LiveWithFamily'}
key_TypeOfHome = {1:'House', 2:'Condominium', 3:'Apartment', 4:'MobileHome', 5:'Other'}
key_EthnicClassification = {1:'AmericanIndian', 2:'Asian', 3:'Black', 4:'EastIndian', 5:'Hispanic', 6:'PacificIslander', 7:'White', 8:'Other'}
key_LanguageInHome = {1:'English', 2:'Spanish', 3:'Other'}

In [8]:
market_df_binned['Sex']= market_df_binned['Sex'].replace(key_Sex)
market_df_binned['MaritalStatus']= market_df_binned['MaritalStatus'].replace(key_MaritalStatus)
market_df_binned['Occupation']= market_df_binned['Occupation'].replace(key_Occupation)
market_df_binned['DualIncome']= market_df_binned['DualIncome'].replace(key_DualIncome)
market_df_binned['HousholderStatus']= market_df_binned['HousholderStatus'].replace(key_HousholderStatus)
market_df_binned['TypeOfHome']= market_df_binned['TypeOfHome'].replace(key_TypeOfHome)
market_df_binned['EthnicClassification']= market_df_binned['EthnicClassification'].replace(key_EthnicClassification)
market_df_binned['LanguageInHome']= market_df_binned['LanguageInHome'].replace(key_LanguageInHome)
market_df_binned

Unnamed: 0,Income,Sex,MaritalStatus,Age,Education,Occupation,YearsInBayAreas,DualIncome,NumberInHoushold,NumberOfChildren,HousholderStatus,TypeOfHome,EthnicClassification,LanguageInHome
1,IncAbv30K,Male,Married,AgeAbv45,College,Homemaker,YrsAbv7,No,NumHousBlw6,NumChlBlw6,Own,House,White,English
2,IncAbv30K,Female,Married,AgeBlw45,College,Professional/Managerial,YrsAbv7,Yes,NumHousBlw6,NumChlBlw6,Rent,Apartment,White,English
3,IncBlw30K,Female,Single/NeverMarried,AgeBlw45,HighSchool,Student,YrsAbv7,Unmarried,NumHousBlw6,NumChlBlw6,LiveWithFamily,House,White,English
4,IncBlw30K,Female,Single/NeverMarried,AgeBlw45,HighSchool,Student,YrsBlw7,Unmarried,NumHousBlw6,NumChlBlw6,LiveWithFamily,House,White,English
5,IncAbv30K,Male,Married,AgeAbv45,College,Retired,YrsAbv7,No,NumHousBlw6,NumChlBlw6,Own,House,White,English
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8988,IncBlw30K,Female,Single/NeverMarried,AgeBlw45,HighSchool,SalesWorker,YrsAbv7,Unmarried,NumHousBlw6,NumChlBlw6,LiveWithFamily,House,White,English
8989,IncBlw30K,Male,Single/NeverMarried,AgeBlw45,College,Professional/Managerial,YrsAbv7,Unmarried,NumHousBlw6,NumChlBlw6,LiveWithFamily,House,White,English
8990,IncBlw30K,Female,Single/NeverMarried,AgeBlw45,HighSchool,Professional/Managerial,YrsAbv7,Unmarried,NumHousBlw6,NumChlBlw6,LiveWithFamily,House,White,English
8991,IncBlw30K,Male,Married,AgeAbv45,College,Factory Worker/Laborer/Driver,YrsAbv7,Yes,NumHousBlw6,NumChlBlw6,Rent,Apartment,White,English


In [9]:
#take fully binned and categorized values and turn into dummies 
market_df_Dummied = pd.get_dummies(market_df_binned,prefix_sep='_', drop_first=False)
market_df_Dummied.head(3)

Unnamed: 0,Income_IncBlw30K,Income_IncAbv30K,Sex_Female,Sex_Male,MaritalStatus_Divorced/Seperated,MaritalStatus_LivingTogetherUnmarried,MaritalStatus_Married,MaritalStatus_Single/NeverMarried,MaritalStatus_Widowed,Age_AgeBlw45,...,EthnicClassification_Asian,EthnicClassification_Black,EthnicClassification_EastIndian,EthnicClassification_Hispanic,EthnicClassification_Other,EthnicClassification_PacificIslander,EthnicClassification_White,LanguageInHome_English,LanguageInHome_Other,LanguageInHome_Spanish
1,False,True,False,True,False,False,True,False,False,False,...,False,False,False,False,False,False,True,True,False,False
2,False,True,True,False,False,False,True,False,False,True,...,False,False,False,False,False,False,True,True,False,False
3,True,False,True,False,False,False,False,True,False,True,...,False,False,False,False,False,False,True,True,False,False


In [None]:
market_df_Dummied.columns

In [11]:
# create frequent itemsets
itemsets = apriori(market_df_Dummied, min_support=0.1, use_colnames=True)
itemsets

Unnamed: 0,support,itemsets
0,0.499418,(Income_IncBlw30K)
1,0.500582,(Income_IncAbv30K)
2,0.553956,(Sex_Female)
3,0.446044,(Sex_Male)
4,0.385689,(MaritalStatus_Married)
...,...,...
10801,0.100785,"(NumberOfChildren_NumChlBlw6, NumberInHoushold..."
10802,0.141507,"(NumberOfChildren_NumChlBlw6, NumberInHoushold..."
10803,0.127400,"(NumberOfChildren_NumChlBlw6, NumberInHoushold..."
10804,0.103403,"(NumberOfChildren_NumChlBlw6, NumberInHoushold..."


In [12]:
# and convert into rules
rules = association_rules(itemsets, metric='confidence', min_threshold=0.8)
rules.sort_values(by=['lift'], ascending=False).head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
33784,"(Education_HighSchool, TypeOfHome_House, Marit...","(HousholderStatus_LiveWithFamily, Age_AgeBlw45...",0.12929,0.197062,0.108202,0.836895,4.246858,0.082724,4.922839,0.878056
51702,"(Education_HighSchool, NumberOfChildren_NumChl...","(HousholderStatus_LiveWithFamily, Age_AgeBlw45...",0.127691,0.197062,0.106603,0.834852,4.236488,0.08144,4.861927,0.875785
51709,"(Education_HighSchool, TypeOfHome_House, Marit...","(HousholderStatus_LiveWithFamily, NumberOfChil...",0.12929,0.194735,0.106603,0.824522,4.234065,0.081425,4.588976,0.877239
33782,"(Age_AgeBlw45, Education_HighSchool, TypeOfHom...","(HousholderStatus_LiveWithFamily, DualIncome_U...",0.128272,0.199825,0.108202,0.843537,4.221371,0.08257,5.114159,0.8754
51696,"(NumberOfChildren_NumChlBlw6, Education_HighSc...","(HousholderStatus_LiveWithFamily, DualIncome_U...",0.126672,0.199825,0.106603,0.841561,4.211482,0.08129,5.050377,0.873159
51707,"(Age_AgeBlw45, Education_HighSchool, TypeOfHom...","(HousholderStatus_LiveWithFamily, NumberOfChil...",0.128272,0.197499,0.106603,0.831066,4.207959,0.081269,4.750378,0.874533
