# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [3]:
import time
import numpy as np
import pandas as pd
import seaborn as sns
import ipywidgets as ipw
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from plotly.subplots import make_subplots

from sklearn import svm, tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, matthews_corrcoef, precision_recall_curve, roc_curve

import scipy.stats as stats
from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr, chi2, chi2_contingency

from xgboost import XGBClassifier

%matplotlib inline

## Load Custom Made Libraries

In [4]:
from Utilities.CFS import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

## Load Dataset

In [5]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Label Binarizer

In [6]:
def convert_nominal(arr, term_arr):
    tmp_dict = {val:ind for (ind, val) in enumerate(term_arr)}
    return arr.map(lambda x : tmp_dict[x])

## Split Dataset

In [7]:
index_dict = {
    "Age": ["< 25 years", "26 - 40 years", "41 - 55 years", "above 55 years"],
    "Gender": ["Male", "Female"],
    "Marital Status": ["Single", "Married", "Other"],
    "Education Level": ['Primary school', 'Secondary/High school', 'College/university', 'Graduate school', 'Other'],
    "Work Industry": ['Banking / Finance', 'Education', 'Healthcare', 'Manufacturing', 'Retail / Hypermarket', 'Other'],
    "Work Position": ['Junior management', 'Middle management', 'Top management', 'Professional', 'Other']
}

In [8]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

# Label Binarize all columns
for col in column_arr:
    mod_fac_df[col] = convert_nominal(mod_fac_df[col], index_dict[col])

In [9]:
utaut_fac_df = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    utaut_fac_df[col_name] = df.iloc[:, 28 + ind]
    
# Change Data Type to int
utaut_fac_df = utaut_fac_df.astype(int)

In [11]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

all_df_arr = [mod_fac_df, utaut_fac_df, df.iloc[:, [17, 18]]]

all_df = pd.concat(all_df_arr, axis = 1)
all_df.columns = all_df.columns.tolist()[:-2] + [col.split("[")[1][4:-1] for col in all_df.columns[-2:]]

all_df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Work Industry,Work Position,PE1,PE2,PE3,PE4,...,T1,T2,T3,T4,BI1,BI2,BI3,BI4,Have you ever purchased anything using the E-payment mode?,"In the next six months, do you plan to purchase anything using the E-payment mode?"
0,< 25 years,Female,Single,College/university,Banking / Finance,Other,2,2,2,2,...,2,1,2,3,3,3,3,3,Yes,Yes
1,< 25 years,Female,Single,College/university,Other,Other,1,1,1,1,...,2,2,2,2,2,2,2,2,Yes,Yes
2,41 - 55 years,Female,Single,College/university,Manufacturing,Middle management,4,5,4,5,...,3,3,3,4,3,4,4,4,Yes,Yes
3,< 25 years,Male,Single,College/university,Education,Other,3,3,3,3,...,3,3,3,3,4,4,3,3,No,Yes
4,< 25 years,Female,Single,College/university,Other,Other,3,3,4,4,...,3,4,4,4,4,5,4,4,Yes,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,41 - 55 years,Male,Married,Secondary/High school,Other,Other,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
282,above 55 years,Male,Married,Graduate school,Education,Top management,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
283,above 55 years,Female,Married,College/university,Other,Other,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
284,41 - 55 years,Male,Married,Graduate school,Education,Professional,4,4,4,4,...,4,4,4,4,4,4,4,4,Yes,Yes


In [19]:
df_X = all_df.iloc[:, :6]
df_Y = all_df.iloc[:, -2]

func = spearmanr

arr_list = []

for col_ind in range(df_X.shape[1]):
    name = df_X.columns[col_ind]
    corr, p_val = func(df_X.iloc[:, col_ind], df_Y)
    arr_list.append((col_ind ,name, corr))
    
# Sort Column By Correlation Value
arr_list = sorted(arr_list, key = lambda x : x[2], reverse = True)

mbf_df = pd.DataFrame(arr_list, index = [i + 1 for i in range(len(arr_list))], columns = ["Rank" ,"Factors", "Weight For Ranking"])

mbf_df["Rank"] = [ind + 1 for ind in range(mbf_df.shape[0])]

mbf_df.style.hide_index()

Rank,Factors,Weight For Ranking
1,Marital Status,0.074535
2,Age,0.023166
3,Work Industry,0.003489
4,Gender,-0.017387
5,Work Position,-0.040989
6,Education Level,-0.118909


## Moderated Variables

In [8]:
arr_list = []
for col_y in mod_fac_df.columns:
    
    tmp_X = utaut_fac_df
    tmp_Y = mod_fac_df.loc[:, col_y]
    
    tmp_list = []
    
    prob = 0.95
    
    for col_2 in tmp_X.columns:
        chi_df = pd.crosstab(tmp_X.loc[:, col_2], tmp_Y)
        
        stat, p, dof, expected = chi2_contingency(chi_df)
        
        critical = chi2.ppf(prob, dof)
        
        alpha = 1.0 - prob
        
        if abs(stat) >= critical and p <= alpha:
            tmp_list.append(col_2)
            
    arr_list.append((col_y, tmp_list))
    
tmp_df = pd.DataFrame(arr_list, columns = ["Moderated Variables", "Significantly Related Variables"])

tmp_df.style.hide_index()

Moderated Variables,Significantly Related Variables
Age,"['PE1', 'PE2', 'PE4', 'EE1', 'EE3', 'EE4', 'AT1', 'AT3', 'AT4', 'SI1', 'SI2', 'SI3', 'SI4', 'FC1', 'FC2', 'FC3', 'SE1', 'SE2', 'SE3', 'SE4', 'AX2', 'AX3', 'T2', 'T4', 'BI1', 'BI2', 'BI3', 'BI4']"
Gender,"['PE1', 'PE2', 'EE1', 'EE2', 'EE3', 'EE4', 'AT1', 'AT4', 'SI2', 'SI3', 'SI4', 'FC1', 'FC2', 'FC3', 'FC4', 'SE1', 'SE3', 'SE4', 'AX4', 'T1', 'T3', 'BI1', 'BI2', 'BI3']"
Marital Status,"['PE1', 'PE2', 'PE3', 'EE1', 'EE2', 'EE3', 'EE4', 'AT1', 'AT2', 'AT3', 'AT4', 'SI1', 'SI2', 'SI3', 'SI4', 'FC1', 'FC2', 'FC3', 'AX3', 'T1', 'T2', 'T3', 'T4', 'BI1', 'BI2', 'BI3', 'BI4']"
Education Level,"['AT1', 'AT2', 'FC1', 'FC4', 'SE2', 'SE3', 'SE4', 'T4', 'BI1', 'BI3']"
Work Industry,"['PE1', 'PE2', 'EE1', 'EE3', 'SI2', 'FC1', 'T4']"
Work Position,"['PE1', 'PE2', 'PE3', 'PE4', 'EE1', 'AT4', 'SI1', 'SI2', 'SI3', 'SI4', 'FC1', 'FC2', 'FC4', 'AX1', 'AX2', 'AX4', 'T4']"


## Use CFS on All UTAUT Factors

In [9]:
# arr_list = []

# model = DecisionTreeClassifier()

# for col in utaut_fac_df.columns:
#     print(col)
#     arr_df = [mod_fac_df, utaut_fac_df.drop([col], axis = 1)]
#     df_X = pd.concat(arr_df, axis = 1)
#     df_Y = utaut_fac_df.loc[:, col]

#     feature_set = forward_selection(df_X, df_Y, model, 8)
    
#     arr_list.append((col, feature_set, len(feature_set)))

# tmp_df = pd.DataFrame(arr_list, columns = ["UTAUT Factor", "Feature Set", "Number of Features"])
# tmp_df.style.hide_index()

## Graphs

In [12]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

In [13]:
all_df_arr = [mod_fac_df, utaut_fac_df, df.iloc[:, [17, 18]]]

all_df = pd.concat(all_df_arr, axis = 1)
all_df.columns = all_df.columns.tolist()[:-2] + [col.split("[")[1][4:-1] for col in all_df.columns[-2:]]

all_df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Work Industry,Work Position,PE1,PE2,PE3,PE4,...,T1,T2,T3,T4,BI1,BI2,BI3,BI4,Have you ever purchased anything using the E-payment mode?,"In the next six months, do you plan to purchase anything using the E-payment mode?"
0,< 25 years,Female,Single,College/university,Banking / Finance,Other,2,2,2,2,...,2,1,2,3,3,3,3,3,Yes,Yes
1,< 25 years,Female,Single,College/university,Other,Other,1,1,1,1,...,2,2,2,2,2,2,2,2,Yes,Yes
2,41 - 55 years,Female,Single,College/university,Manufacturing,Middle management,4,5,4,5,...,3,3,3,4,3,4,4,4,Yes,Yes
3,< 25 years,Male,Single,College/university,Education,Other,3,3,3,3,...,3,3,3,3,4,4,3,3,No,Yes
4,< 25 years,Female,Single,College/university,Other,Other,3,3,4,4,...,3,4,4,4,4,5,4,4,Yes,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,41 - 55 years,Male,Married,Secondary/High school,Other,Other,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
282,above 55 years,Male,Married,Graduate school,Education,Top management,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
283,above 55 years,Female,Married,College/university,Other,Other,3,3,3,3,...,3,3,3,3,3,3,3,3,Yes,Yes
284,41 - 55 years,Male,Married,Graduate school,Education,Professional,4,4,4,4,...,4,4,4,4,4,4,4,4,Yes,Yes


#### Age

In [14]:
col = "Age"

In [15]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df, "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df, "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Age',
              'textposition': 'outside',
          …

#### Gender

In [16]:
col = "Gender"

In [17]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df, "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df, "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Gender',
              'textposition': 'outside',
       …

#### Marital Status

In [18]:
col = "Marital Status"

In [19]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df.iloc[:, :-1], "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df.iloc[:, :-1], "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Marital Status',
              'textposition': 'outside',…

#### Education Level

In [20]:
col = "Education Level"

In [21]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df.iloc[:, :-1], "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df.iloc[:, :-1], "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Education Level',
              'textposition': 'outside'…

#### Work Industry

In [22]:
col = "Work Industry"

In [23]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df.iloc[:, :-1], "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df.iloc[:, :-1], "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Work Industry',
              'textposition': 'outside',
…

#### Work Position

In [24]:
col = "Work Position"

In [25]:
fig_arr = []

tmp_df = pd.DataFrame(mod_fac_df[col].value_counts()).T
tmp_df = tmp_df[index_dict[col]]
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
fig_arr.append(show_bar_graph_percentage(tmp_df.iloc[:, :-1], "", col, "", "Percentage"))

tmp_df = all_df.loc[:, [col, "In the next six months, do you plan to purchase anything using the E-payment mode?"]]

tmp_df = pd.crosstab(tmp_df.iloc[:, 1], tmp_df.iloc[:, 0])

# for tmp_col in tmp_df.columns:
#     tmp_df[tmp_col] = tmp_df[tmp_col] / tmp_df[tmp_col].sum() * 100.0

tmp_df = tmp_df[index_dict[col]]

fig_arr.append(show_bar_graph(tmp_df.iloc[:, :-1], "", col, "Intention?", "Count"))

h_arr = [go.FigureWidget(fig) for fig in fig_arr]

ipw.HBox(h_arr)

HBox(children=(FigureWidget({
    'data': [{'name': 'Work Position',
              'textposition': 'outside',
…

### Operating System

In [45]:
tmp_dict = {}

for val in df.iloc[:, 13]:
    tmp_arr = val.split(";") if ";" in val else [val]
    for _str in tmp_arr:
        if _str not in tmp_dict:
            tmp_dict[_str] = 1
        else:
            tmp_dict[_str] += 1
            
tmp_dict 

{'Android (Samsung, etc)': 219, 'iOS (iPhone)': 112, 'Microsoft phone': 3}

In [57]:
tmp_df = pd.Series(tmp_dict)
tmp_df = pd.DataFrame(pd.Series(tmp_dict), columns = [df.iloc[:, 13].name[3:]]).T
tmp_df.iloc[0, :] = tmp_df.iloc[0, :] / tmp_df.iloc[0, :].sum() * 100.0
show_bar_graph_percentage(tmp_df, "", "Type of Smart Phone", "", "Percentage")