# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [1]:
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

from sklearn import svm, tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, matthews_corrcoef, precision_recall_curve, roc_curve

from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr, chi2, chi2_contingency

from xgboost import XGBClassifier

%matplotlib inline

## Load Custom Made Libraries

In [2]:
from Utilities.CFS import *
from Utilities.reliefF import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

from skfeature.utility.mutual_information import *

## Load Dataset

In [3]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Label Binarizer

In [4]:
def convert_nominal(arr, term_arr):
    tmp_dict = {val:ind for (ind, val) in enumerate(term_arr)}
    return arr.map(lambda x : tmp_dict[x])

## Split Dataset

In [5]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

# Label Binarize all columns
# Age
mod_fac_df["Age"] = convert_nominal(mod_fac_df["Age"], ["< 25 years", "26 - 40 years", "41 - 55 years", "above 55 years"])

# Gender
mod_fac_df["Gender"] = convert_nominal(mod_fac_df["Gender"], ["Male", "Female"])

# Marital Status
mod_fac_df["Marital Status"] = convert_nominal(mod_fac_df["Marital Status"], ["Single", "Married", "Other"])

# Education Level
mod_fac_df["Education Level"] = convert_nominal(mod_fac_df["Education Level"], ['Primary school', 'Secondary/High school', 'College/university', 'Graduate school', 'Other'])

# Work Industry
mod_fac_df["Work Industry"] = convert_nominal(mod_fac_df["Work Industry"], ['Banking / Finance', 'Education', 'Healthcare', 'Manufacturing', 'Retail / Hypermarket', 'Other'])

# Work Position
mod_fac_df["Work Position"] = convert_nominal(mod_fac_df["Work Position"], ['Junior management', 'Middle management', 'Top management', 'Professional', 'Other'])


In [6]:
utaut_fac_df = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    utaut_fac_df[col_name] = df.iloc[:, 28 + ind]
    utaut_fac_df[col_name] = utaut_fac_df[col_name].map(lambda x : x - 1)
    
# Change Data Type to int
utaut_fac_df = utaut_fac_df.astype(int)

## Chi-Square Analysis

In [18]:
arr_df = [mod_fac_df, utaut_fac_df]

c_df = pd.concat(arr_df, axis = 1)

# Convert Values to Int
c_df = c_df.astype(int)

In [37]:
arr_list = []

for col in c_df.columns:
    tmp_X = c_df.drop(col, axis = 1)
    tmp_Y = c_df.loc[:, col]
    
    tmp_list = []
    
    prob = 0.95
    
    for col_2 in tmp_X.columns:
        chi_df = pd.crosstab(tmp_X.loc[:, col_2], tmp_Y)
        
        stat, p, dof, expected = chi2_contingency(chi_df)
        
        critical = chi2.ppf(prob, dof)
        
        alpha = 1.0 - prob
        
        if abs(stat) >= critical and p <= alpha:
            tmp_list.append(col_2)
            
    arr_list.append((col, tmp_list))
    
arr_list

[('Age',
  ['Marital Status',
   'Education Level',
   'Work Industry',
   'Work Position',
   'PE1',
   'PE2',
   'PE4',
   'EE1',
   'EE3',
   'EE4',
   'AT1',
   'AT3',
   'AT4',
   'SI1',
   'SI2',
   'SI3',
   'SI4',
   'FC1',
   'FC2',
   'FC3',
   'SE1',
   'SE2',
   'SE3',
   'SE4',
   'AX2',
   'AX3',
   'T2',
   'T4',
   'BI1',
   'BI2',
   'BI3',
   'BI4']),
 ('Gender',
  ['PE1',
   'PE2',
   'EE1',
   'EE2',
   'EE3',
   'EE4',
   'AT1',
   'AT4',
   'SI2',
   'SI3',
   'SI4',
   'FC1',
   'FC2',
   'FC3',
   'FC4',
   'SE1',
   'SE3',
   'SE4',
   'AX4',
   'T1',
   'T3',
   'BI1',
   'BI2',
   'BI3']),
 ('Marital Status',
  ['Age',
   'Education Level',
   'Work Industry',
   'Work Position',
   'PE1',
   'PE2',
   'PE3',
   'EE1',
   'EE2',
   'EE3',
   'EE4',
   'AT1',
   'AT2',
   'AT3',
   'AT4',
   'SI1',
   'SI2',
   'SI3',
   'SI4',
   'FC1',
   'FC2',
   'FC3',
   'AX3',
   'T1',
   'T2',
   'T3',
   'T4',
   'BI1',
   'BI2',
   'BI3',
   'BI4']),
 ('Education Lev

In [27]:
tmp_X = c_df
tmp_Y = df.iloc[:, 17]

# Replace Values
tmp_Y = tmp_Y.replace("No", 0)
tmp_Y = tmp_Y.replace("Yes", 1)

# Convert Data to Int
tmp_Y = tmp_Y.astype(int)

tmp_list = []
    
prob = 0.95

for col in tmp_X.columns:
    chi_df = pd.crosstab(tmp_X.loc[:, col], tmp_Y)
        
    stat, p, dof, expected = chi2_contingency(chi_df)
        
    critical = chi2.ppf(prob, dof)
        
    if abs(stat) >= critical:
        tmp_list.append(col)
        
print(tmp_Y.name)
print(tmp_list)
    

6. E-payment purchasing, Loyalty Points and Crypto Coin [(1) Have you ever purchased anything using the E-payment mode?]
['Age', 'Gender', 'Marital Status', 'Work Industry', 'Work Position', 'PE1', 'PE2', 'PE4', 'EE1', 'EE2', 'EE3', 'EE4', 'AT1', 'AT2', 'AT3', 'AT4', 'SI1', 'SI2', 'SI3', 'SI4', 'FC1', 'FC2', 'FC3', 'SE1', 'SE2', 'SE4', 'AX1', 'AX2', 'AX3', 'AX4', 'T2', 'T3', 'BI2', 'BI3', 'BI4']


In [34]:
tmp_X = c_df
tmp_Y = df.iloc[:, 18]

# Replace Values
tmp_Y = tmp_Y.replace("No", 0)
tmp_Y = tmp_Y.replace("Yes", 1)

# Convert Data to Int
tmp_Y = tmp_Y.astype(int)

arr_list = []
    
prob = 0.95

for col in tmp_X.columns:
    chi_df = pd.crosstab(tmp_X.loc[:, col], tmp_Y)
        
    stat, p_val, dof, expected = chi2_contingency(chi_df)
        
    critical = chi2.ppf(prob, dof)
        
    if abs(stat) >= critical:
        arr_list.append((col, stat))
        
print(tmp_Y.name)
# Sort Variables Ascending by P_val
arr_list = sorted(arr_list, key = lambda x : x[1])

arr_df = pd.DataFrame(arr_list, columns = ["Variables", "Chi-Square Value"])

arr_df.style.hide_index()
    

6. E-payment purchasing, Loyalty Points and Crypto Coin [(2) In the next six months, do you plan to purchase anything using the E-payment mode?]


Variables,Chi-Square Value
PE4,10.270128
PE2,10.450457
SE4,10.742271
T1,11.540111
BI3,12.029613
T2,12.034293
SE3,12.072054
BI2,13.310899
FC4,14.176667
T4,16.012116
