# Importing Packages

In [43]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.cluster import Birch
from sklearn.metrics import matthews_corrcoef as corr
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import confusion_matrix as cmat
from sklearn.preprocessing import MinMaxScaler

# Loading data

In [44]:
orig_data=pd.read_csv('creditcard.csv')

# Data Preprocessing and Cleaning

In [45]:
orig_data.V1[orig_data["V1"]==',']=np.nan
orig_data.V1[orig_data["V1"]=='?']=np.nan
orig_data.V2[orig_data["V2"]==']']=np.nan
orig_data.V3[orig_data["V3"]=="'"]=np.nan
orig_data.V4[orig_data["V4"]=='%']=np.nan
orig_data.V5[orig_data["V5"]==']']=np.nan
orig_data.V5[orig_data["V5"]=='[']=np.nan
orig_data.V6[orig_data["V6"]=="'"]=np.nan
orig_data.V24[orig_data["V24"]=='/']=np.nan
orig_data.V24[orig_data["V24"]==',']=np.nan
orig_data.V24[orig_data["V24"]=="'"]=np.nan
orig_data.V24[orig_data["V24"]==';']=np.nan
orig_data.V24[orig_data["V24"]=='?']=np.nan
orig_data.V24[orig_data["V24"]=='.']=np.nan
orig_data.V25[orig_data["V25"]=='[']=np.nan
orig_data.V25[orig_data["V25"]=='.']=np.nan
orig_data = orig_data.applymap(lambda x: float(x))
orig_data=orig_data.apply(lambda x: x.fillna(x.mean()),axis=0)

# Data Normalization

In [46]:
scaler=MinMaxScaler().fit(orig_data)
scaled_data=scaler.transform(orig_data)
scaled_data_pd=pd.DataFrame(scaled_data,columns=orig_data.columns)
X_n=scaled_data_pd.drop(columns=['Time','Class'],axis=1)
X_t_n=scaled_data_pd.drop(columns=['Class'],axis=1)
X=orig_data.drop(columns=['Time','Class'],axis=1)
X_t=orig_data.drop(columns=['Class'],axis=1)
y=orig_data.Class

# Metrics Function

In [47]:
def metrics(pred,true):
    print("accuracy=",accuracy_score(pred,true))
    print("mathew's correaltion coefficient=",corr(pred,true))
    print("root mean square error=",np.sqrt(mse(pred,true)))
    print("neagtive predictive value:",cmat(true,pred)[1][1]/(cmat(true,pred)[1][0]+cmat(true,pred)[1][1]))

# Kmeans Algorithm with Normalization and Time Feature Dropped

In [48]:
kmeans = KMeans(n_clusters = 2,init = 'k-means++',random_state = 42,n_jobs=-1)
pred=kmeans.fit_predict(X_n)
metrics(pred,y)

accuracy= 0.6775005352586264
mathew's correaltion coefficient= 0.010568935549511861
root mean square error= 0.567890363310889
neagtive predictive value: 0.4410569105691057


# Kmeans Algorithm without Normalization and Time Feature Dropped

In [49]:
kmeans = KMeans(n_clusters = 2,init = 'k-means++',random_state = 42,n_jobs=-1)
pred=kmeans.fit_predict(X)
metrics(pred,y)

accuracy= 0.9801410274859692
mathew's correaltion coefficient= 0.005065018763291622
root mean square error= 0.14092186669935505
neagtive predictive value: 0.034552845528455285


# Kmeans Algorithm with Normalization and Time Feature

In [50]:
kmeans = KMeans(n_clusters = 2,init = 'k-means++',random_state = 42,n_jobs=-1)
pred=kmeans.fit_predict(X_t_n)
metrics(pred,y)

accuracy= 0.5363466931546564
mathew's correaltion coefficient= -0.011835841431817767
root mean square error= 0.6809209255452087
neagtive predictive value: 0.32113821138211385


# Kmeans Algorithm without Normalization and Time Feature

In [51]:
kmeans = KMeans(n_clusters = 2,init = 'k-means++',random_state = 42,n_jobs=-1)
pred=kmeans.fit_predict(X_t)
metrics(pred,y)

accuracy= 0.5369925133990151
mathew's correaltion coefficient= -0.01110720293094497
root mean square error= 0.6804465347115708
neagtive predictive value: 0.32926829268292684


# Birch Algorithm with Normalization and Time Feature Dropped

In [52]:
bc=Birch(n_clusters=2)
pred=bc.fit_predict(X_n)
metrics(pred,y)

accuracy= 0.9982731328248668
mathew's correaltion coefficient= 0.0
root mean square error= 0.041555591382305135
neagtive predictive value: 0.0


# Birch Algorithm with Normalization and Time Feature 

In [53]:
bc=Birch(n_clusters=2)
pred=bc.fit_predict(X_t_n)
metrics(pred,y)

accuracy= 0.9982731328248668
mathew's correaltion coefficient= 0.0
root mean square error= 0.041555591382305135
neagtive predictive value: 0.0
