# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [13]:
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import svm, tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, KFold
from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, matthews_corrcoef, precision_recall_curve, roc_curve

from xgboost import XGBClassifier

%matplotlib inline

## Load Custom Made Libraries

In [3]:
from Utilities.CFS import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

## Load Dataset

In [4]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Label Binarizer

In [5]:
def convert_nominal(arr, term_arr):
    tmp_dict = {val:ind for (ind, val) in enumerate(term_arr)}
    return arr.map(lambda x : tmp_dict[x])

## Split Dataset

In [6]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

# Label Binarize all columns
# Age
mod_fac_df["Age"] = convert_nominal(mod_fac_df["Age"], ["< 25 years", "26 - 40 years", "41 - 55 years", "above 55 years"])

# Gender
mod_fac_df["Gender"] = convert_nominal(mod_fac_df["Gender"], ["Male", "Female"])

# Marital Status
mod_fac_df["Marital Status"] = convert_nominal(mod_fac_df["Marital Status"], ["Single", "Married", "Other"])

# Education Level
mod_fac_df["Education Level"] = convert_nominal(mod_fac_df["Education Level"], ['Primary school', 'Secondary/High school', 'College/university', 'Graduate school', 'Other'])

# Work Industry
mod_fac_df["Work Industry"] = convert_nominal(mod_fac_df["Work Industry"], ['Banking / Finance', 'Education', 'Healthcare', 'Manufacturing', 'Retail / Hypermarket', 'Other'])

# Work Position
mod_fac_df["Work Position"] = convert_nominal(mod_fac_df["Work Position"], ['Junior management', 'Middle management', 'Top management', 'Professional', 'Other'])


In [7]:
utaut_fac_df = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    utaut_fac_df[col_name] = df.iloc[:, 28 + ind]
    
# Change Data Type to int
utaut_fac_df = utaut_fac_df.astype(int)

## Moderated Variables

In [8]:
arr_list = []
for col_y in mod_fac_df.columns:
    
    tmp_list = []
    for col_x in utaut_fac_df.columns:
        corr, p_val = spearmanr(utaut_fac_df.loc[:, col_x], mod_fac_df.loc[:, col_y])
        
        if p_val <= 0.05:
            tmp_list.append(col_x)
            
    arr_list.append((col_y, ", ".join(tmp_list)))
    
tmp_df = pd.DataFrame(arr_list, columns = ["Moderated Variables", "Significantly Related Variables"])

tmp_df.style.hide_index()

Moderated Variables,Significantly Related Variables
Age,"AT3, SI2"
Gender,"PE1, PE2, PE3, EE1, EE2, EE3, EE4, AT1, AT2, AT3, AT4, SI1, SI3, SI4, FC1, FC2, FC3, FC4, SE1, SE3, SE4, T1, T2, T3, T4, BI1, BI2, BI3, BI4"
Marital Status,SI1
Education Level,"AT1, FC1, FC2, FC4, SE2, SE3, SE4, T4, BI2, BI3"
Work Industry,"PE1, EE1, EE3, AT4, SI2, FC1, FC2, FC3, FC4, SE2, T1, T4"
Work Position,"PE1, PE2, PE3, PE4, EE1, EE2, EE3, AT4, SI1, SI2, SI4, FC1, FC2, FC3, FC4, SE1, T1, T2, T4, BI1, BI2, BI3, BI4"


## Use CFS on All UTAUT Factors

In [12]:
col = 'PE1'

arr_list = []

for col in utaut_fac_df.columns:
    print(col)
    arr_df = [mod_fac_df, utaut_fac_df.drop([col], axis = 1)]
    df_X = pd.concat(arr_df, axis = 1)
    df_Y = utaut_fac_df.loc[:, col]

    feature_set = CFS(df_X, df_Y, spearmanr)
    
    arr_list.append((col, feature_set, len(feature_set)))

tmp_df = pd.DataFrame(arr_list, columns = ["UTAUT Factor", "Feature Set", "Number of Features"])
tmp_df.style.hide_index()

PE1
PE2
PE3
PE4
EE1
EE2
EE3
EE4
AT1
AT2
AT3
AT4
SI1
SI2
SI3
SI4
FC1
FC2
FC3
FC4
SE1
SE2
SE3
SE4
AX1
AX2
AX3
AX4
T1
T2
T3
T4
BI1
BI2
BI3
BI4


UTAUT Factor,Feature Set
PE1,"['PE2', 'EE1', 'BI3']"
PE2,"['PE1', 'PE3']"
PE3,"['PE2', 'PE4', 'EE2']"
PE4,"['PE2', 'PE3', 'EE4', 'AT2', 'SI3', 'SI4', 'AX4', 'BI3', 'BI4']"
EE1,"['Gender', 'Work Industry', 'PE1', 'EE2', 'EE3', 'AT4', 'FC1', 'FC2', 'T4']"
EE2,"['PE3', 'EE1', 'EE3', 'EE4', 'FC2', 'FC3', 'SE2', 'AX4', 'BI3']"
EE3,"['Gender', 'PE2', 'EE1', 'EE2', 'EE4', 'SI2', 'FC3', 'AX4', 'T1']"
EE4,"['EE2', 'EE3', 'T3']"
AT1,"['PE2', 'AT2', 'AT4', 'SE3', 'BI1', 'BI2']"
AT2,"['PE4', 'AT1', 'AT3', 'SE4', 'BI3']"
