In [2]:
# ======================================================
# CREDIT CARD FRAUD DETECTION: DECISION TREE VS SVM
# ======================================================

In [None]:
from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize, StandardScaler
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.svm import LinearSVC
import warnings
warnings.filterwarnings('ignore')

In [None]:
# download the dataset
url= "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%203/data/creditcard.csv"

# read the input data
raw_data=pd.read_csv(url)
raw_data

In [8]:
# 1. DATA PREPROCESSING
# Standardize features by removing the mean and scaling to unit variance
raw_data.iloc[:, 1:30] = StandardScaler().fit_transform(raw_data.iloc[:, 1:30])
data_matrix = raw_data.values

# X: feature matrix (excluding the Time variable)
X = data_matrix[:, 1:30]
# y: labels vector
y = data_matrix[:, 30]

# Data normalization
X = normalize(X, norm="l1")

In [None]:
# 2. TRAIN/TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Compute sample weights to handle class imbalance

In [None]:
w_train = compute_sample_weight('balanced', y_train)

In [None]:
# 3. MODEL 1: DECISION TREE
dt = DecisionTreeClassifier(max_depth=4, random_state=35)
dt.fit(X_train, y_train, sample_weight=w_train)

In [None]:
# 4. MODEL 2: SUPPORT VECTOR MACHINE (SVM)
svm = LinearSVC(class_weight='balanced', random_state=31, loss="hinge", fit_intercept=False)

In [19]:
# 5. EVALUATION
# Probabilities for Decision Tree
y_pred_dt = dt.predict_proba(X_test)[:, 1]
roc_auc_dt = roc_auc_score(y_test, y_pred_dt)

# Confidence scores for SVM (decision_function is used for LinearSVC)
y_pred_svm = svm.decision_function(X_test)
roc_auc_svm = roc_auc_score(y_test, y_pred_svm)

print(f'Decision Tree ROC-AUC score : {roc_auc_dt:.3f}')
print(f'SVM ROC-AUC score           : {roc_auc_svm:.3f}')

Decision Tree ROC-AUC score : 0.939
SVM ROC-AUC score           : 0.986


In [None]:
## Conclusion and Model Selection
Based on the ROC-AUC evaluation:
* **Decision Tree:** 0.939
* **SVM:** 0.986

The **SVM model** is the winner. In a fraud detection context, a higher ROC-AUC means the model is much better at distinguishing between a normal transaction and a fraudulent one, even with a highly imbalanced dataset. 

**Recommendation:** I would deploy the SVM model for this use case as it provides superior precision for high-stakes financial security.