In [15]:
import os
import torch
import kagglehub
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
from torch.optim import SGD
from sklearn.preprocessing import LabelEncoder,StandardScaler

In [12]:
path=kagglehub.dataset_download("miadul/credit-card-fraud-detection-dataset")
print(os.listdir(path))
df=pd.read_csv(path+"/credit_card_fraud_10k.csv")

Using Colab cache for faster access to the 'credit-card-fraud-detection-dataset' dataset.
['.nfs00000000697d2ca50000041c', 'credit_card_fraud_10k.csv']


In [14]:
df.drop("transaction_id",inplace=True,axis=1)

In [16]:
df

Unnamed: 0,amount,transaction_hour,merchant_category,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,is_fraud
0,84.47,22,Electronics,0,0,66,3,40,0
1,541.82,3,Travel,1,0,87,1,64,0
2,237.01,17,Grocery,0,0,49,1,61,0
3,164.33,4,Grocery,0,1,72,3,34,0
4,30.53,15,Food,0,0,79,0,44,0
...,...,...,...,...,...,...,...,...,...
9995,350.91,22,Food,0,0,99,4,37,0
9996,410.04,5,Clothing,0,0,70,3,25,0
9997,527.75,21,Electronics,0,0,44,2,45,0
9998,91.20,2,Electronics,0,0,38,0,37,0


In [17]:
encoder=LabelEncoder()
df["merchant_category"]=encoder.fit_transform(df["merchant_category"])

In [21]:
df

Unnamed: 0,amount,transaction_hour,merchant_category,foreign_transaction,location_mismatch,device_trust_score,velocity_last_24h,cardholder_age,is_fraud
0,84.47,22,1,0,0,66,3,40,0
1,541.82,3,4,1,0,87,1,64,0
2,237.01,17,3,0,0,49,1,61,0
3,164.33,4,3,0,1,72,3,34,0
4,30.53,15,2,0,0,79,0,44,0
...,...,...,...,...,...,...,...,...,...
9995,350.91,22,2,0,0,99,4,37,0
9996,410.04,5,0,0,0,70,3,25,0
9997,527.75,21,1,0,0,44,2,45,0
9998,91.20,2,1,0,0,38,0,37,0


In [22]:
X=df.drop("is_fraud",axis=1)
y=df["is_fraud"]

In [46]:
y.min()

tensor(0)

In [23]:
scaler=StandardScaler()
X_=scaler.fit_transform(X)

In [26]:
x_train,x_test,y_train,y_test=train_test_split(X_,y,test_size=0.2)

In [29]:
x_torch=torch.tensor(x_train,dtype=torch.float32)
x_test_torch=torch.tensor(x_test,dtype=torch.float32)
y_torch=torch.tensor(y_train.values,dtype=torch.long)
y_test_torch=torch.tensor(y_test.values,dtype=torch.long)

In [31]:
class CustomDataset(Dataset):
    def __init__(self,x,y):
        self.x=x
        self.y=y
    def __len__(self):
        return self.x.shape[0]
    def __getitem__(self,index):
        return self.x[index],self.y[index]

In [33]:
x_dataset=CustomDataset(x_torch,y_torch)
train_load=DataLoader(x_dataset,batch_size=64,shuffle=True)
y_test_dataset=CustomDataset(x_test_torch,y_test_torch)
test_load=DataLoader(y_test_dataset,batch_size=64,shuffle=True)

In [72]:
epochs=10
lr=0.1
criterion=nn.BCEWithLogitsLoss()
class MYNN(nn.Module):
    def __init__(self,n):
        super().__init__()
        self.n=n
        self.model=nn.Sequential(
            nn.Linear(self.n,64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.BatchNorm1d(32),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(32,1),
            nn.Sigmoid()
        )
    def forward(self,x):
        return self.model(x)

In [73]:
model=MYNN(x_torch.shape[1])

In [74]:
optimizer=SGD(model.parameters(),lr=lr,weight_decay=1e-4)

In [75]:
for epoch in range(epochs):
    total_loss=0
    for x,y in train_load:
        y_pred=model(x)
        y_pred=y_pred.squeeze(1)
        loss=criterion(y_pred,y.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
    avg=total_loss/(len(train_load))
    print(f"epochs:: {epoch+1} with average loss :: {avg:.4f}")



epochs:: 1 with average loss :: 0.7583
epochs:: 2 with average loss :: 0.7044
epochs:: 3 with average loss :: 0.6992
epochs:: 4 with average loss :: 0.6973
epochs:: 5 with average loss :: 0.6962
epochs:: 6 with average loss :: 0.6956
epochs:: 7 with average loss :: 0.6953
epochs:: 8 with average loss :: 0.6949
epochs:: 9 with average loss :: 0.6947
epochs:: 10 with average loss :: 0.6945


In [76]:
model.eval()

MYNN(
  (model): Sequential(
    (0): Linear(in_features=8, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Dropout(p=0.2, inplace=False)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Dropout(p=0.2, inplace=False)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=1, bias=True)
    (9): Sigmoid()
  )
)

In [77]:
correct=0
total=0
with torch.no_grad():
    for x,y in test_load:
        y_pre=model(x)
        y_pre=(y_pre>0.5).float()
        total+=len(x)
        correct+=(y_pre==y).sum().item()
    accuracy=correct/total
    print(accuracy)

62.528
