# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [1]:
import time
import numpy as np
import pandas as pd

from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, KFold
from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## Load Custom Made Libraries

In [20]:
from Utilities.CFS import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

## Load Dataset

In [21]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Label Binarizer

In [22]:
def convert_nominal(arr, term_arr):
    tmp_dict = {val:ind for (ind, val) in enumerate(term_arr)}
    return arr.map(lambda x : tmp_dict[x])

## Split Dataset

In [23]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

# Label Binarize all columns
# Age
mod_fac_df["Age"] = convert_nominal(mod_fac_df["Age"], ["< 25 years", "26 - 40 years", "41 - 55 years", "above 55 years"])

# Gender
mod_fac_df["Gender"] = convert_nominal(mod_fac_df["Gender"], ["Male", "Female"])

# Marital Status
mod_fac_df["Marital Status"] = convert_nominal(mod_fac_df["Marital Status"], ["Single", "Married", "Other"])

# Education Level
mod_fac_df["Education Level"] = convert_nominal(mod_fac_df["Education Level"], ['Primary school', 'Secondary/High school', 'College/university', 'Graduate school', 'Other'])

# Work Industry
mod_fac_df["Work Industry"] = convert_nominal(mod_fac_df["Work Industry"], ['Banking / Finance', 'Education', 'Healthcare', 'Manufacturing', 'Retail / Hypermarket', 'Other'])

# Work Position
mod_fac_df["Work Position"] = convert_nominal(mod_fac_df["Work Position"], ['Junior management', 'Middle management', 'Top management', 'Professional', 'Other'])


In [24]:
utaut_fac_df = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    utaut_fac_df[col_name] = df.iloc[:, 28 + ind]
    
# Change Data Type to int
utaut_fac_df = utaut_fac_df.astype(int)

## Target Variables

## BI1

In [65]:
# BI3 as Target Variable
df_X = pd.DataFrame()
df_X = pd.concat([df_X, mod_fac_df], axis = 1)
df_X = pd.concat([df_X, utaut_fac_df.drop(["BI1", "BI2", "BI3", "BI4"], axis = 1)], axis = 1)
df_Y = utaut_fac_df.loc[:, "BI1"]

In [66]:
# Correlation Function
func = spearmanr

arr_list = []

threshold = 0.05

# Get List of P_Values
for col in df_X.columns:
    corr, p_val = func(df_X.loc[:, col], df_Y)
    # Threshold => Only append variables that have a p_value smaller than 0.05
    if p_val <= threshold:
        arr_list.append((col, p_val))
        
# Sort Variables Descending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "P Value"])

arr_df.style.hide_index()

Variables,P Value
T1,0.0
AT1,0.0
T2,0.0
PE1,0.0
T3,0.0
T4,0.0
FC2,0.0
EE3,0.0
PE4,0.0
AT4,0.0


In [67]:
model = DecisionTreeClassifier()

feature_set = backward_selection(df_X.loc[:, arr_df["Variables"]], df_Y, model, 8)

feature_set

['T1', 'AT1', 'T3', 'EE3', 'AT2', 'PE2', 'FC4', 'Work Position']

In [68]:
model = DecisionTreeClassifier()

acc_score = get_acc_score_kcv(df_X, df_Y, model)

print(f"Accuracy Score (Without Selective Features): {acc_score:.2f}")

acc_score = get_acc_score_kcv(df_X.loc[:, feature_set], df_Y, model)

print(f"Accuracy Score (With Selective Features): {acc_score:.2f}")

Accuracy Score (Without Selective Features): 69.15
Accuracy Score (With Selective Features): 76.56


### BI3

In [69]:
# BI3 as Target Variable
df_X = pd.DataFrame()
df_X = pd.concat([df_X, mod_fac_df], axis = 1)
df_X = pd.concat([df_X, utaut_fac_df.drop(["BI1", "BI2", "BI3", "BI4"], axis = 1)], axis = 1)
df_Y = utaut_fac_df.loc[:, "BI3"]

#### Get List of Variables that are significant

In [70]:
# Correlation Function
func = spearmanr

arr_list = []

threshold = 0.05

# Get List of P_Values
for col in df_X.columns:
    corr, p_val = func(df_X.loc[:, col], df_Y)
    # Threshold => Only append variables that have a p_value smaller than 0.05
    if p_val <= threshold:
        arr_list.append((col, p_val))
        
# Sort Variables Descending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "P Value"])

arr_df.style.hide_index()

Variables,P Value
PE1,0.0
PE4,0.0
T4,0.0
AT1,0.0
PE2,0.0
T2,0.0
AT2,0.0
T1,0.0
PE3,0.0
AT4,0.0


We Can Observe that Work Position, Education Level and Gender have a significant relationship with Behavioral Intention  to have Blockchain / Cryptocurrency Coin as an E-Wallet.

In [71]:
model = DecisionTreeClassifier()

feature_set = backward_selection(df_X.loc[:, arr_df["Variables"]], df_Y, model, 8)

feature_set

['PE1', 'PE4', 'T4', 'AT2', 'FC4', 'SE2', 'SI2', 'SI1']

In [72]:
model = DecisionTreeClassifier()

acc_score = get_acc_score_kcv(df_X, df_Y, model)

print(f"Accuracy Score (Without Selective Features): {acc_score:.2f}")

acc_score = get_acc_score_kcv(df_X.loc[:, feature_set], df_Y, model)

print(f"Accuracy Score (With Selective Features): {acc_score:.2f}")

Accuracy Score (Without Selective Features): 73.44
Accuracy Score (With Selective Features): 76.58


## BI4

In [73]:
# BI3 as Target Variable
df_X = pd.DataFrame()
df_X = pd.concat([df_X, mod_fac_df], axis = 1)
df_X = pd.concat([df_X, utaut_fac_df.drop(["BI1", "BI2", "BI3", "BI4"], axis = 1)], axis = 1)
df_Y = utaut_fac_df.loc[:, "BI4"]

In [74]:
# Correlation Function
func = spearmanr

arr_list = []

threshold = 0.05

# Get List of P_Values
for col in df_X.columns:
    corr, p_val = func(df_X.loc[:, col], df_Y)
    # Threshold => Only append variables that have a p_value smaller than 0.05
    if p_val <= threshold:
        arr_list.append((col, p_val))
        
# Sort Variables Descending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "P Value"])

arr_df.style.hide_index()

Variables,P Value
T3,0.0
PE4,0.0
PE1,0.0
T1,0.0
PE2,0.0
T2,0.0
AT1,0.0
AT2,0.0
T4,0.0
EE3,0.0


In [75]:
model = DecisionTreeClassifier()

feature_set = backward_selection(df_X.loc[:, arr_df["Variables"]], df_Y, model, 8)

feature_set

['PE4', 'T2', 'AT1', 'AT2', 'SE4', 'FC2', 'SI4', 'AT3']

In [76]:
model = DecisionTreeClassifier()

acc_score = get_acc_score_kcv(df_X, df_Y, model)

print(f"Accuracy Score (Without Selective Features): {acc_score:.2f}")

acc_score = get_acc_score_kcv(df_X.loc[:, feature_set], df_Y, model)

print(f"Accuracy Score (With Selective Features): {acc_score:.2f}")

Accuracy Score (Without Selective Features): 71.66
Accuracy Score (With Selective Features): 76.61


### 6. E-payment purchasing, Loyalty Points and Crypto Coin [(1) Have you ever purchased anything using the E-payment mode?

In [87]:
# BI3 as Target Variable
df_X = pd.DataFrame()
df_X = pd.concat([df_X, mod_fac_df], axis = 1)
df_X = pd.concat([df_X, utaut_fac_df.drop(["BI1", "BI2", "BI3", "BI4"], axis = 1)], axis = 1)
df_Y = df.iloc[:, 17]

df_Y = df_Y.replace("Yes", 1)
df_Y = df_Y.replace("No", 0)
df_Y = df_Y.astype(int)

In [89]:
# Correlation Function
func = pointbiserialr

arr_list = []

threshold = 0.05

# Get List of P_Values
for col in df_X.columns:
    corr, p_val = func(df_X.loc[:, col], df_Y)
    # Threshold => Only append variables that have a p_value smaller than 0.05
    if p_val <= threshold:
        arr_list.append((col, p_val))
        
# Sort Variables Descending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "P Value"])

arr_df.style.hide_index()

Variables,P Value
Education Level,0.000754
T4,0.003072
SE4,0.012958
T2,0.019329
SE3,0.027324
SE2,0.029958
T3,0.030066
FC4,0.03227
AT1,0.03782
T1,0.046191


In [90]:
model = DecisionTreeClassifier()

feature_set = backward_selection(df_X.loc[:, arr_df["Variables"]], df_Y, model, 8)

feature_set

['T4', 'SE4', 'T2', 'SE3', 'SE2', 'T3', 'AT1', 'T1']

In [91]:
model = DecisionTreeClassifier()

acc_score = get_acc_score_kcv(df_X, df_Y, model)

print(f"Accuracy Score (Without Selective Features): {acc_score:.2f}")

acc_score = get_acc_score_kcv(df_X.loc[:, feature_set], df_Y, model)

print(f"Accuracy Score (With Selective Features): {acc_score:.2f}")

Accuracy Score (Without Selective Features): 76.51
Accuracy Score (With Selective Features): 85.69


### 6. E-payment purchasing, Loyalty Points and Crypto Coin [(4) Do you have the Investment in Crypto?]

In [94]:
# BI3 as Target Variable
df_X = pd.DataFrame()
df_X = pd.concat([df_X, mod_fac_df], axis = 1)
df_X = pd.concat([df_X, utaut_fac_df.drop(["BI1", "BI2", "BI3", "BI4"], axis = 1)], axis = 1)
df_Y = df.iloc[:, 20]

df_Y = df_Y.replace("Yes", 1)
df_Y = df_Y.replace("No", 0)
df_Y = df_Y.astype(int)

In [96]:
# Correlation Function
func = pointbiserialr

arr_list = []

threshold = 0.05

# Get List of P_Values
for col in df_X.columns:
    corr, p_val = func(df_X.loc[:, col], df_Y)
    # Threshold => Only append variables that have a p_value smaller than 0.05
    if p_val <= threshold:
        arr_list.append((col, p_val))
        
# Sort Variables Descending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "P Value"])

arr_df.style.hide_index()

Variables,P Value
FC2,0.0
EE1,0.0
T1,0.0
PE1,0.0
SE4,0.0
SE2,0.0
FC1,0.0
FC4,0.0
T4,0.0
EE2,0.0


In [97]:
model = DecisionTreeClassifier()

feature_set = backward_selection(df_X.loc[:, arr_df["Variables"]], df_Y, model, 8)

feature_set

['T1', 'SE2', 'FC1', 'PE2', 'SE3', 'T3', 'AX4', 'Education Level']

In [98]:
model = DecisionTreeClassifier()

acc_score = get_acc_score_kcv(df_X, df_Y, model)

print(f"Accuracy Score (Without Selective Features): {acc_score:.2f}")

acc_score = get_acc_score_kcv(df_X.loc[:, feature_set], df_Y, model)

print(f"Accuracy Score (With Selective Features): {acc_score:.2f}")

Accuracy Score (Without Selective Features): 79.70
Accuracy Score (With Selective Features): 80.73
