In [None]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Step 2: Load the dataset
data = pd.read_csv('bank_transactions_data_2.csv')

# Step 3: Inspect the data
print(data.head())  # Check the first few rows of the dataset
print(data.info())  # Check the structure of the data

# Step 4: Data Preprocessing

# Handle missing values if any
# For simplicity, we drop rows with missing values here, but you can fill them using fillna() if necessary
data = data.dropna()

# If the dataset contains categorical variables, we need to encode them using LabelEncoder or OneHotEncoder
# Example: encoding a column 'Category' (if exists)
# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# data['Category'] = label_encoder.fit_transform(data['Category'])

# Step 5: Define features and target variable
# Assuming 'target' is the target column, replace 'target' with the actual column name of your dataset.
X = data.drop('target', axis=1)  # Drop the target column for features
y = data['target']  # The target variable

# Step 6: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 7: Scale the features (important for SVM performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 8: Train the SVM model with a Linear Kernel
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train_scaled, y_train)

# Step 9: Predict on the test data with Linear Kernel
y_pred_linear = svm_linear.predict(X_test_scaled)

# Step 10: Evaluate the Linear Kernel model
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f'Linear Kernel Accuracy: {accuracy_linear:.4f}')
print('Linear Kernel Classification Report:')
print(classification_report(y_test, y_pred_linear))
print('Linear Kernel Confusion Matrix:')
cm_linear = confusion_matrix(y_test, y_pred_linear)
print(cm_linear)

# Step 11: Train the SVM model with a Polynomial Kernel
svm_poly = SVC(kernel='poly', degree=3)  # You can change the degree for experimentation
svm_poly.fit(X_train_scaled, y_train)

# Step 12: Predict on the test data with Polynomial Kernel
y_pred_poly = svm_poly.predict(X_test_scaled)

# Step 13: Evaluate the Polynomial Kernel model
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print(f'Polynomial Kernel Accuracy: {accuracy_poly:.4f}')
print('Polynomial Kernel Classification Report:')
print(classification_report(y_test, y_pred_poly))
print('Polynomial Kernel Confusion Matrix:')
cm_poly = confusion_matrix(y_test, y_pred_poly)
print(cm_poly)

# Step 14: Visualize the Confusion Matrices for both models
plt.figure(figsize=(12, 6))

# Linear Kernel Confusion Matrix
plt.subplot(1, 2, 1)
sns.heatmap(cm_linear, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Linear Kernel Confusion Matrix')

# Polynomial Kernel Confusion Matrix
plt.subplot(1, 2, 2)
sns.heatmap(cm_poly, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Polynomial Kernel Confusion Matrix')

plt.tight_layout()
plt.show()

# Step 15: Compare the accuracy of both models
print(f'Comparison of Accuracies:\nLinear Kernel Accuracy: {accuracy_linear:.4f}\nPolynomial Kernel Accuracy: {accuracy_poly:.4f}')
