# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [3]:
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import svm, tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split, KFold
from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, matthews_corrcoef

from xgboost import XGBClassifier

%matplotlib inline

## Load Custom Made Libraries

In [4]:
from Utilities.CFS import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

## Load Dataset

In [5]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Label Binarizer

In [6]:
def convert_nominal(arr, term_arr):
    tmp_dict = {val:ind for (ind, val) in enumerate(term_arr)}
    return arr.map(lambda x : tmp_dict[x])

## Split Dataset

In [7]:
index_dict = {
    "Age": ["< 25 years", "26 - 40 years", "41 - 55 years", "above 55 years"],
    "Gender": ["Male", "Female"],
    "Marital Status": ["Single", "Married", "Other"],
    "Education Level": ['Primary school', 'Secondary/High school', 'College/university', 'Graduate school', 'Other'],
    "Work Industry": ['Banking / Finance', 'Education', 'Healthcare', 'Manufacturing', 'Retail / Hypermarket', 'Other'],
    "Work Position": ['Junior management', 'Middle management', 'Top management', 'Professional', 'Other']
}

In [8]:
mod_fac_df = pd.DataFrame()

column_arr = ["Age", "Gender", "Marital Status", "Education Level", "Work Industry", "Work Position"]

for (ind, col_name) in enumerate(column_arr):
    mod_fac_df[col_name] = df.iloc[:, 6 + ind]
    
# Replace Values in Work Industry
mod_fac_df = mod_fac_df.replace("Baking / Finance", "Banking / Finance")

# # Label Binarize all columns
# for col in column_arr:
#     mod_fac_df[col] = convert_nominal(mod_fac_df[col], index_dict[col])

In [9]:
utaut_fac_df = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    utaut_fac_df[col_name] = df.iloc[:, 28 + ind]
    utaut_fac_df[col_name] = utaut_fac_df[col_name].map(lambda x : x - 1)
    
# Change Data Type to int
utaut_fac_df = utaut_fac_df.astype(int)

## Create New CSV File

In [10]:
output_df = pd.DataFrame()

tmp_df = df.iloc[:, [17]]

# Replace Values 
tmp_df = tmp_df.replace("No", 0)
tmp_df = tmp_df.replace("Yes", 1)

# Convert to INT
tmp_df = tmp_df.astype(int)

# Change Columns Name
tmp_df.columns = [col.split("[")[1][4:-1] for col in tmp_df.columns.tolist()]

arr_df = [mod_fac_df, utaut_fac_df, tmp_df]
output_df = pd.concat(arr_df, axis = 1)

In [11]:
output_df

Unnamed: 0,Age,Gender,Marital Status,Education Level,Work Industry,Work Position,PE1,PE2,PE3,PE4,...,AX4,T1,T2,T3,T4,BI1,BI2,BI3,BI4,Have you ever purchased anything using the E-payment mode?
0,< 25 years,Female,Single,College/university,Banking / Finance,Other,1,1,1,1,...,2,1,0,1,2,2,2,2,2,1
1,< 25 years,Female,Single,College/university,Other,Other,0,0,0,0,...,3,1,1,1,1,1,1,1,1,1
2,41 - 55 years,Female,Single,College/university,Manufacturing,Middle management,3,4,3,4,...,2,2,2,2,3,2,3,3,3,1
3,< 25 years,Male,Single,College/university,Education,Other,2,2,2,2,...,2,2,2,2,2,3,3,2,2,0
4,< 25 years,Female,Single,College/university,Other,Other,2,2,3,3,...,3,2,3,3,3,3,4,3,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,41 - 55 years,Male,Married,Secondary/High school,Other,Other,2,2,2,2,...,2,2,2,2,2,2,2,2,2,1
282,above 55 years,Male,Married,Graduate school,Education,Top management,2,2,2,2,...,2,2,2,2,2,2,2,2,2,1
283,above 55 years,Female,Married,College/university,Other,Other,2,2,2,2,...,2,2,2,2,2,2,2,2,2,1
284,41 - 55 years,Male,Married,Graduate school,Education,Professional,3,3,3,3,...,2,3,3,3,3,3,3,3,3,1


In [12]:
output_df.to_csv("BI_analysis.csv", index=False)