In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# import time
# start_time = time.time()

data = pd.read_csv("D:/ACM/Cycle_2/Day_2_Support_Vector_Machines/creditcard.csv") 
print("Dataset shape:", data.shape)
print(data['Class'].value_counts())

# Sampling a small part of the dataset (While keeping all the fraud part)
fraud = data[data['Class'] == 1]
non_fraud = data[data['Class'] == 0].sample(n=5000, random_state=1)
data_small = pd.concat([fraud, non_fraud])
#Accuracy at 50k samples 164.69 sec
# Linear SVM Accuracy:     0.9980
# RBF SVM Accuracy:        0.9979
# Polynomial SVM Accuracy: 0.9978

X = data_small.drop('Class', axis=1)
y = data_small['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=1)

#Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Dimensionality Reduction (PCA)
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

import plotly.express as px #https://plotly.com/python/pca-visualization/
fig = px.scatter(np.vstack([X_train_pca, X_test_pca]), x=0, y=1, color=data_small['Class'])
fig.show()

#Train SVM Classifiers

#Linear Kernel
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train_scaled, y_train)
y_pred_linear = svm_linear.predict(X_test_scaled)
acc_linear = accuracy_score(y_test, y_pred_linear)
print("Linear Kernel Accuracy:", acc_linear)

#RBF Kernel
svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_train_scaled, y_train)
y_pred_rbf = svm_rbf.predict(X_test_scaled)
acc_rbf = accuracy_score(y_test, y_pred_rbf)
print("RBF Kernel Accuracy:", acc_rbf)

#Polynomial Kernel
svm_poly = SVC(kernel='poly')
svm_poly.fit(X_train_scaled, y_train)
y_pred_poly = svm_poly.predict(X_test_scaled)
acc_poly = accuracy_score(y_test, y_pred_poly)
print("Polynomial Kernel Accuracy:", acc_poly)

#Summary
print("\nSVM Model Accuracies")
print(f"Linear SVM Accuracy:     {acc_linear:.4f}")
print(f"RBF SVM Accuracy:        {acc_rbf:.4f}")
print(f"Polynomial SVM Accuracy: {acc_poly:.4f}")

# end_time = time.time()
# execution_time = end_time - start_time
# print(f"Execution Time: {execution_time:.2f} seconds")

print("""
Kernel effect:
1.A linear kernel is the simplest form of kernel used in SVM. It is suitable when the data is linearly separable
2.A RBF kernel maps the data into an infinite-dimensional space making it highly effective for complex classification problems.
3.A polynomial kernel is useful when the data is not linearly separable but still follows a pattern by transforming the feature space.
""")

Dataset shape: (284807, 31)
Class
0    284315
1       492
Name: count, dtype: int64


Linear Kernel Accuracy: 0.9818016378525932
RBF Kernel Accuracy: 0.9799818016378526
Polynomial Kernel Accuracy: 0.9736123748862603

SVM Model Accuracies
Linear SVM Accuracy:     0.9818
RBF SVM Accuracy:        0.9800
Polynomial SVM Accuracy: 0.9736

Kernel effect:
1.A linear kernel is the simplest form of kernel used in SVM. It is suitable when the data is linearly separable
2.A RBF kernel maps the data into an infinite-dimensional space making it highly effective for complex classification problems.
3.A polynomial kernel is useful when the data is not linearly separable but still follows a pattern by transforming the feature space.

