###Importing data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
%cd /content/drive/MyDrive/Colab Notebooks/Analytics_Enabled_Marketing
# !pwd

/content/drive/MyDrive/Colab Notebooks/Analytics_Enabled_Marketing


In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

In [9]:
dataset=pd.read_excel("a2_Dataset_90Percent.xlsx")
# !ls

In [10]:
#shows first few rows of the code
dataset.head()

Unnamed: 0,ID,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime
0,140,10.0,76.0,C,U,Midlands,Wales & West,Gold,16000.0,4.0
1,620,4.0,49.0,D,U,Midlands,Wales & West,Gold,6000.0,5.0
2,868,5.0,70.0,D,F,Midlands,Wales & West,Silver,0.02,8.0
3,1120,10.0,65.0,F,M,Midlands,Midlands,Tin,0.01,7.0
4,2313,11.0,68.0,A,F,Midlands,Midlands,Tin,0.01,8.0


### Data preparation

In [11]:
#dropping customer ID column from the dataset

dataset=dataset.drop(['ID'],axis=1)

dataset.head()

Unnamed: 0,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime
0,10.0,76.0,C,U,Midlands,Wales & West,Gold,16000.0,4.0
1,4.0,49.0,D,U,Midlands,Wales & West,Gold,6000.0,5.0
2,5.0,70.0,D,F,Midlands,Wales & West,Silver,0.02,8.0
3,10.0,65.0,F,M,Midlands,Midlands,Tin,0.01,7.0
4,11.0,68.0,A,F,Midlands,Midlands,Tin,0.01,8.0


In [12]:
# explore missing values

dataset.isna().sum()

DemAffl             48
DemAge              67
DemClusterGroup     28
DemGender          114
DemReg              18
DemTVReg            18
LoyalClass           0
LoyalSpend           0
LoyalTime           15
dtype: int64

In [13]:
# filling missing values with mean/mode*

dataset['DemAffl']=dataset['DemAffl'].fillna(dataset['DemAffl'].mode()[0])
dataset['DemAge']=dataset['DemAge'].fillna(dataset['DemAge'].mode()[0])
dataset['DemClusterGroup']=dataset['DemClusterGroup'].fillna(dataset['DemClusterGroup'].mode()[0])
dataset['DemGender']=dataset['DemGender'].fillna(dataset['DemGender'].mode()[0])
dataset['DemReg']=dataset['DemReg'].fillna(dataset['DemReg'].mode()[0])
dataset['DemTVReg']=dataset['DemTVReg'].fillna(dataset['DemTVReg'].mode()[0])
dataset['LoyalTime']=dataset['LoyalTime'].fillna(dataset['LoyalTime'].mean())

In [14]:
# explore missing values post missing value fix

dataset.isna().sum()

DemAffl            0
DemAge             0
DemClusterGroup    0
DemGender          0
DemReg             0
DemTVReg           0
LoyalClass         0
LoyalSpend         0
LoyalTime          0
dtype: int64

###Coverting category to numeric

In [15]:
# converting to mumeric

from sklearn.preprocessing import LabelEncoder
number = LabelEncoder()

dataset['DemClusterGroup'] = number.fit_transform(dataset['DemClusterGroup'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemGender'] = number.fit_transform(dataset['DemGender'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemReg'] = number.fit_transform(dataset['DemReg'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['DemTVReg'] = number.fit_transform(dataset['DemTVReg'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

dataset['LoyalClass'] = number.fit_transform(dataset['LoyalClass'].astype('str'))
integer_mapping = {l: i for i, l in enumerate(number.classes_)}
print(integer_mapping)

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'U': 6}
{'F': 0, 'M': 1, 'U': 2}
{'Midlands': 0, 'North': 1, 'Scottish': 2, 'South East': 3, 'South West': 4}
{'Border': 0, 'C Scotland': 1, 'East': 2, 'London': 3, 'Midlands': 4, 'N East': 5, 'N Scot': 6, 'N West': 7, 'S & S East': 8, 'S West': 9, 'Ulster': 10, 'Wales & West': 11, 'Yorkshire': 12}
{'Gold': 0, 'Platinum': 1, 'Silver': 2, 'Tin': 3}


In [16]:
dataset.head()

Unnamed: 0,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime
0,10.0,76.0,2,2,0,11,0,16000.0,4.0
1,4.0,49.0,3,2,0,11,0,6000.0,5.0
2,5.0,70.0,3,0,0,11,2,0.02,8.0
3,10.0,65.0,5,1,0,4,3,0.01,7.0
4,11.0,68.0,0,0,0,4,3,0.01,8.0


### Predictions

In [17]:
X_fresh = dataset.iloc[:, 0:9].values

In [21]:
import joblib

classifier = joblib.load('./c2_Classifier_LoyalCustomers')

In [22]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0
 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0
 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 

In [23]:
predictions = classifier.predict_proba(X_fresh)
predictions

array([[0.97549312, 0.02450688],
       [0.96899159, 0.03100841],
       [0.94465062, 0.05534938],
       ...,
       [0.96593599, 0.03406401],
       [0.76761016, 0.23238984],
       [0.49754322, 0.50245678]])

In [25]:
# writing model output file

df_prediction_prob = pd.DataFrame(predictions, columns = ['prob_0', 'prob_1'])

dfx=pd.concat([dataset,df_prediction_prob], axis=1)

dfx.to_excel("d2_BuyProb_90Percent.xlsx")

dfx.head()

Unnamed: 0,DemAffl,DemAge,DemClusterGroup,DemGender,DemReg,DemTVReg,LoyalClass,LoyalSpend,LoyalTime,prob_0,prob_1
0,10.0,76.0,2,2,0,11,0,16000.0,4.0,0.975493,0.024507
1,4.0,49.0,3,2,0,11,0,6000.0,5.0,0.968992,0.031008
2,5.0,70.0,3,0,0,11,2,0.02,8.0,0.944651,0.055349
3,10.0,65.0,5,1,0,4,3,0.01,7.0,0.890356,0.109644
4,11.0,68.0,0,0,0,4,3,0.01,8.0,0.80435,0.19565
