In [1]:
# General Data Processing and Visualization Tools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

In [2]:
# Specific Models for Testing
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

In [3]:
# Visualization Tools
from yellowbrick.classifier import ClassificationReport
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

def make_vis(model_clf,train,val):
  viz = ClassificationReport(model_clf, classes=['0', '1'], support = True)
  viz.fit(train, np.ravel(label_train))
  viz.score(val, np.ravel(label_val))
  viz.poof()

def make_cmatrix(predictions,clf_name):
  cm = confusion_matrix(label_val, predictions)
  cm_display = ConfusionMatrixDisplay(cm,display_labels=['Normal','Fraud'])
  cm_display.plot(values_format='d')
  plt.title("Confusion Matrix: "+clf_name)
  plt.grid(False)
  plt.show()

In [4]:
from sklearn.decomposition import PCA
def pca_auto(n):
    print("Scores on PCA",n)
    PCA_nd = PCA(n_components=n)
    PCA_view_nd = PCA_nd.fit_transform(train_raw)
    pca_val = PCA_nd.transform(label_val)

    #####
    d_tree = DecisionTreeClassifier(random_state=255).fit(PCA_view_nd,np.ravel(label_train))

    prediction_0 = d_tree.predict(pca_val)
    print("d_tree:")
    print(classification_report(np.ravel(label_val), prediction_0))

    #####
    linear_regresser_n = LogisticRegression(solver="lbfgs", max_iter=500, random_state=255).fit(PCA_view_nd,np.ravel(label_train))

    prediction_1 = linear_regresser_n.predict(pca_val)
    print("linreg:")
    print(classification_report(np.ravel(label_val), prediction_1))
    
    #####
    svc_clf_n = LinearSVC(random_state=255).fit(PCA_view_nd,np.ravel(label_train))

    prediction_2 = svc_clf_n.predict(pca_val)
    print("svc:")
    print(classification_report(np.ravel(label_val), prediction_2))

    #####
    mlp_clf_n = MLPClassifier(solver='adam', activation='relu',
                            hidden_layer_sizes=(train_raw.shape[1],16,2), random_state = 255,
                            max_iter=500).fit(PCA_view_nd,np.ravel(label_train))

    prediction_3 = svc_clf_n.predict(pca_val)
    print("mlp:")
    print(classification_report(np.ravel(label_val), prediction_3))


In [5]:
data_raw = pd.read_csv('../data/creditcard.csv', sep=",")

In [6]:
data_only = data_raw.drop(["Class"],axis=1)
labels_only = data_raw.take([-1],axis=1)

In [7]:
from sklearn.model_selection import train_test_split
train_raw, val_raw, label_train, label_val = train_test_split(data_only, labels_only,\
                                                                stratify=labels_only,\
                                                                test_size = 0.2,\
                                                               random_state = 255)

In [8]:
pca_auto(30)

Scores on PCA 30
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962

mlp:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
pca_auto(25)

Scores on PCA 25
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962

mlp:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
pca_auto(20)

Scores on PCA 20
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962

mlp:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
pca_auto(15)

Scores on PCA 15
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962

mlp:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     56864
           1       0.00      1.00      0.00        98

    accuracy                           0.00     56962
   macro avg       0.00      0.50      0.00     56962
weighted avg       0.00      0.00      0.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
pca_auto(10)

Scores on PCA 10
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962

mlp:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
pca_auto(5)

Scores on PCA 5
d_tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


linreg:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


svc:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962

mlp:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



  _warn_prf(average, modifier, msg_start, len(result))


# Results

All models fail to generate proper results at any level of PCA, confirming our hypothesis that applying decomposition to already decomposed values is not useful