# Title: CORRELATION MODEL IN THE ADOPTION OF E-PAYMENT SERVICES

## Load Libraries

In [1]:
import time
import numpy as np
import pandas as pd

from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, confusion_matrix
from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr

## Load Custom Made Libraries

In [2]:
from Utilities.CFS import *
from Utilities.accuracy import *
from Utilities.corr_matrix import *
from Utilities.forward_selection import *
from Utilities.backward_elimination import *

from Visualization.model_graph import *
from Visualization.network_graph import *

## Load Dataset

In [3]:
df = pd.read_csv("Dataset/E-payment Cryptocurrency Coin.csv")

## Split Dataset

In [4]:
df_X = pd.DataFrame()

column_arr = df.iloc[:, 28:].columns
column_arr = [col.split(": ")[0] for col in column_arr]

for (ind, col_name) in enumerate(column_arr):
    df_X[col_name] = df.iloc[:, 28 + ind]
    
# Change Data Type to int
df_X = df_X.astype(int)

In [5]:
df_Y = df.iloc[:, [17]]

# Replace Values
df_Y = df_Y.replace("Yes", 1)
df_Y = df_Y.replace("No", 0)

# Change Data Type to int
df_Y = df_Y.astype(int)

# Convert Dataframe to Series
df_Y = df_Y.iloc[:, 0]

## Analysis

#### Merit Based Ranking

![merit_based_ranking.jpg](img/merit_based_ranking.jpg)

In [6]:
arr_list = []

for col_ind in range(df_X.shape[1]):
    name = df_X.columns[col_ind]
    merit = merit_calculation(df_X.iloc[:, [col_ind]], df_Y, pointbiserialr)
    arr_list.append((col_ind ,name, merit))
    
# Sort Column By Merit Value
arr_list = sorted(arr_list, key = lambda x : x[2], reverse = True)

mbf_df = pd.DataFrame(arr_list, index = [i + 1 for i in range(len(arr_list))], columns = ["Rank" ,"Factors", "Merit"])

mbf_df["Rank"] = [ind + 1 for ind in range(mbf_df.shape[0])]

mbf_df.style.hide_index()

Rank,Factors,Merit
1,T4,0.174474
2,BI1,0.170224
3,SE4,0.146783
4,T2,0.138256
5,SE3,0.130507
6,SE2,0.128384
7,T3,0.1283
8,FC4,0.126647
9,AT1,0.122876
10,T1,0.117991


#### Correlation Pairwise Ranking

![correlation_pairwise_ranking.jpg](img/correlation_pairwise_ranking.jpg)

In [7]:
corr = create_corr_matrix(df_X, spearmanr)
corr.values[np.tril_indices_from(corr.values)] = np.nan

corr

Unnamed: 0,PE1,PE2,PE3,PE4,EE1,EE2,EE3,EE4,AT1,AT2,...,AX3,AX4,T1,T2,T3,T4,BI1,BI2,BI3,BI4
PE1,,0.835233,0.7256,0.730739,0.780623,0.654947,0.677241,0.617229,0.709411,0.628712,...,-0.12309,-0.184555,0.66005,0.672744,0.627753,0.657686,0.705704,0.719527,0.746661,0.727006
PE2,,,0.794513,0.765794,0.675508,0.645924,0.661695,0.661066,0.690479,0.647245,...,-0.051329,-0.101325,0.595035,0.675372,0.642348,0.604658,0.636311,0.654216,0.715026,0.718001
PE3,,,,0.774089,0.608697,0.677799,0.604741,0.622669,0.636145,0.679483,...,-0.090783,-0.016761,0.547468,0.621414,0.630102,0.533779,0.574985,0.586452,0.669027,0.645617
PE4,,,,,0.636272,0.681118,0.639758,0.702504,0.674317,0.733741,...,-0.043111,-0.13788,0.641736,0.681015,0.697144,0.594439,0.655556,0.62168,0.7274,0.729449
EE1,,,,,,0.76244,0.750047,0.716287,0.653017,0.596293,...,-0.128873,-0.174636,0.701972,0.674657,0.677308,0.640904,0.634953,0.661093,0.655963,0.65685
EE2,,,,,,,0.758405,0.797431,0.635274,0.63311,...,-0.168232,-0.225122,0.680571,0.679625,0.673919,0.599671,0.628496,0.60827,0.662576,0.620644
EE3,,,,,,,,0.806975,0.640132,0.638569,...,-0.116824,-0.235763,0.69628,0.670219,0.700538,0.593044,0.655935,0.585558,0.626968,0.672501
EE4,,,,,,,,,0.644354,0.6386,...,-0.085887,-0.166941,0.6458,0.654654,0.746758,0.53741,0.629173,0.56352,0.599056,0.663359
AT1,,,,,,,,,,0.780686,...,-0.013025,-0.139762,0.616839,0.646978,0.650341,0.639711,0.725117,0.704624,0.722441,0.711177
AT2,,,,,,,,,,,...,0.005595,-0.094146,0.57539,0.639295,0.656581,0.604475,0.644678,0.60689,0.711945,0.691402


#### Correlation Based Ranking

![correlation_based_ranking.jpg](img/correlation_based_ranking.jpg)

In [8]:
func = pointbiserialr

arr_list = []

for col_ind in range(df_X.shape[1]):
    name = df_X.columns[col_ind]
    corr, p_val = func(df_X.iloc[:, col_ind], df_Y)
    arr_list.append((col_ind ,name, corr))
    
# Sort Column By Correlation Value
arr_list = sorted(arr_list, key = lambda x : x[2], reverse = True)

mbf_df = pd.DataFrame(arr_list, index = [i + 1 for i in range(len(arr_list))], columns = ["Rank" ,"Factors", "Weight For Ranking"])

mbf_df["Rank"] = [ind + 1 for ind in range(mbf_df.shape[0])]

mbf_df.style.hide_index()

Rank,Factors,Weight For Ranking
1,T4,0.174474
2,BI1,0.170224
3,SE4,0.146783
4,T2,0.138256
5,SE3,0.130507
6,SE2,0.128384
7,T3,0.1283
8,FC4,0.126647
9,AT1,0.122876
10,T1,0.117991


## Network Graph

![correlation_based_network.jpg](img/correlation_based_network.jpg)

In [12]:
# Create Pairwise Correlation Matrix
corr_df = pairwise_correlation(df_X, spearmanr)

# "Winner Takes All Method"
threshold = 0.75

# Retain Features where correlation is above threshold
corr_df = corr_df.loc[abs(corr_df["correlation"]) >= threshold]

network_graph(corr_df, "Important Features")