In [36]:
import pandas as pd
import numpy as np
import xgboost as xgb
import umap
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, precision_recall_curve, auc, classification_report
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Loading the Dataset

In [38]:
dataset = pd.read_csv("/content/drive/MyDrive/kaggle_datasets/creditcardfraud/creditcard.csv")
dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [39]:
dataset.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.168375e-15,3.416908e-16,-1.379537e-15,2.074095e-15,9.604066e-16,1.487313e-15,-5.556467e-16,1.213481e-16,-2.406331e-15,...,1.654067e-16,-3.568593e-16,2.578648e-16,4.473266e-15,5.340915e-16,1.683437e-15,-3.660091e-16,-1.22739e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [40]:
frauds = dataset[dataset['Class'] == 1]
print(frauds.shape)   # number of fraud cases
frauds.head()         # show first few fraud rows
frauds

(492, 31)


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


## Training using XGBoost

In [41]:
X = dataset.drop("Class", axis=1)
y = dataset["Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y
)

In [42]:
n_pos = np.sum(y_train == 1)
n_neg = np.sum(y_train == 0)
scale = n_neg / n_pos

model = xgb.XGBClassifier(
    objective="binary:logistic",
    eval_metric="aucpr",
    scale_pos_weight=scale,
    n_estimators=1000,
    learning_rate=0.025,
    max_depth=10,
    subsample=0.8,
    colsample_bytree=0.8,
    n_jobs=-1
)

model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=25
)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_proba))

precision, recall, _ = precision_recall_curve(y_test, y_proba)
print("PR-AUC:", auc(recall, precision))

print("\nClassification Report:\n", classification_report(y_test, y_pred))

[0]	validation_0-aucpr:0.64798
[25]	validation_0-aucpr:0.77973
[50]	validation_0-aucpr:0.77440
[75]	validation_0-aucpr:0.78561
[100]	validation_0-aucpr:0.78194
[125]	validation_0-aucpr:0.79718
[150]	validation_0-aucpr:0.81151
[175]	validation_0-aucpr:0.81342
[200]	validation_0-aucpr:0.82842
[225]	validation_0-aucpr:0.85907
[250]	validation_0-aucpr:0.86423
[275]	validation_0-aucpr:0.87005
[300]	validation_0-aucpr:0.87367
[325]	validation_0-aucpr:0.87837
[350]	validation_0-aucpr:0.87972
[375]	validation_0-aucpr:0.88145
[400]	validation_0-aucpr:0.88169
[425]	validation_0-aucpr:0.88204
[450]	validation_0-aucpr:0.88247
[475]	validation_0-aucpr:0.88278
[500]	validation_0-aucpr:0.88389
[525]	validation_0-aucpr:0.88406
[550]	validation_0-aucpr:0.88435
[575]	validation_0-aucpr:0.88453
[600]	validation_0-aucpr:0.88450
[625]	validation_0-aucpr:0.88486
[650]	validation_0-aucpr:0.88510
[675]	validation_0-aucpr:0.88511
[700]	validation_0-aucpr:0.88538
[725]	validation_0-aucpr:0.88534
[750]	validatio

## Plotting Normal vs Fraud and Prediction Results

In [43]:
y_pred_series = pd.Series(y_pred, index=X_test.index)
y_proba_series = pd.Series(y_proba, index=X_test.index)

fraud_idx = y_test[y_test == 1].index
normal_idx = y_test[y_test == 0].sample(1000).index
subset_idx = fraud_idx.union(normal_idx)

X_subset = X_test.loc[subset_idx]
y_subset = y_test.loc[subset_idx]
y_pred_subset = y_pred_series.loc[subset_idx]
y_proba_subset = y_proba_series.loc[subset_idx]

reducer = umap.UMAP()
X_2d = reducer.fit_transform(X_subset)

plot_df = pd.DataFrame({
    "UMAP1": X_2d[:,0],
    "UMAP2": X_2d[:,1],
    "TrueClass": y_subset.values,
    "PredictedClass": y_pred_subset.values,
    "FraudProbability": y_proba_subset.values
})

fig_truth = px.scatter(
    plot_df, x="UMAP1", y="UMAP2",
    color=plot_df["TrueClass"].map({0:"Normal", 1:"Fraud"}),
    title="True Fraud vs Normal (UMAP Projection)",
    opacity=0.7
)
fig_truth.show()

fig_pred = px.scatter(
    plot_df, x="UMAP1", y="UMAP2",
    color=plot_df["PredictedClass"].map({0:"Normal", 1:"Fraud"}),
    size="FraudProbability",
    title="XGBoost Predictions on Test Set (UMAP Projection)",
    opacity=0.7
)
fig_pred.show()


In [44]:
conditions = []
for true, pred in zip(y_subset, y_pred_subset):
    if true == 0 and pred == 0:
        conditions.append("True Negative")
    elif true == 1 and pred == 1:
        conditions.append("True Positive")
    elif true == 0 and pred == 1:
        conditions.append("False Positive")
    elif true == 1 and pred == 0:
        conditions.append("False Negative")

plot_df["ErrorType"] = conditions

fig_errors = px.scatter(
    plot_df, x="UMAP1", y="UMAP2",
    color="ErrorType",
    title="Error Analysis: Fraud Detection Outcomes (UMAP Projection)",
    symbol="ErrorType",
    opacity=0.7,
)

fig_errors.show()

## Conclusion

For this project I decided to go with XGBoost as it is particularly good for tabular datasets like this one where each row and each column represents one thing with no inconsistencies. Unlike deep learning or neuroevolution methods, which can be better for images, text or inconsistent datasets with varying data types, ranges and points; XGBoost can handle smaller feature sets efficiently and gives excellent performance on row/column based imbalanced data. It also trains much faster and is easier to tune compared to more complex approaches.

The results were strong: the model reached over 99 percent overall accuracy, though this is easy to reach as the dataset is heavily skewed towards non-fraudulent activity, but with fraud detection performance at around 94 percent precision and 84 percent recall with a 89 percent F1 score which is the main target. This means it was able to correctly identify most fraudulent transactions while keeping false positives relatively low.

Through dimensionality reduction with UMAP, PCA and t-SNE, we also saw that fraud cases cluster in small, distinct regions rather than being dispersed equally throughout the dataset, which helps explain why XGBoost was effective.

Finally, the error analysis showed that the few missed frauds tend to be cases that are hard to separate from normal transactions, suggesting that these would be challenging for any model.

Overall, the project shows that XGBoost is both a practical and reliable choice for credit card fraud detection, balancing accuracy, interpretability and training efficiency.