In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [2]:
data = pd.read_csv('fraudTest.csv', index_col=0)
data.drop(columns=["cc_num", "first", "last", "street",
    "trans_num", "trans_date_trans_time",
    "job", "merchant", "dob", "city", "zip"],
          inplace=True)
data.head()

Unnamed: 0,category,amt,gender,state,lat,long,city_pop,unix_time,merch_lat,merch_long,is_fraud
0,personal_care,2.86,M,SC,33.9659,-80.9355,333497,1371816865,33.986391,-81.200714,0
1,personal_care,29.84,F,UT,40.3207,-110.436,302,1371816873,39.450498,-109.960431,0
2,health_fitness,41.28,F,NY,40.6729,-73.5365,34496,1371816893,40.49581,-74.196111,0
3,misc_pos,60.05,M,FL,28.5697,-80.8191,54767,1371816915,28.812398,-80.883061,0
4,travel,3.19,M,MI,44.2529,-85.017,1126,1371816917,44.959148,-85.884734,0


In [5]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

gender_le = LabelEncoder()
ohe = OneHotEncoder(drop='first', sparse_output=False)
scaler = StandardScaler()

data['gender'] = gender_le.fit_transform(data['gender'])

cols_ohe = ['category', 'state']

ohe_encoded = ohe.fit_transform(data[cols_ohe])

ohe_df = pd.DataFrame(ohe_encoded,
                      columns=ohe.get_feature_names_out(cols_ohe),
                      index=data.index)
df = pd.concat([data.drop(columns=cols_ohe), ohe_df],
               axis = 1)

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['is_fraud'])
y = df['is_fraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

num_cols = ['amt','lat','long','city_pop','unix_time','merch_lat','merch_long']
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.fit_transform(X_test[num_cols])

In [None]:
from torch.utils.data import TensorDataset, DataLoader
device = torch.device('cpu')

X_train_torch = torch.tensor(X_train.values, dtype = torch.float32)
X_test_torch = torch.tensor(X_test.values, dtype = torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_torch = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

train_ds = TensorDataset(X_train_torch, y_train_torch)
test_ds = TensorDataset(X_test_torch, y_test_torch)

batch_size = 2048

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [15]:
h1, h2 = 128, 64

class FraudNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, h1),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.3),
            nn.Linear(h1, h2),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.3),
            nn.Linear(h2, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.net(self, x)
    
input_dim = X_train.shape[1]
model = FraudNet(input_dim=input_dim)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)

In [16]:
epochs = 10
train_losses, val_losses = [], []


for epoch in range(epochs):

    model.train()
    train_loss_sum, val_loss_sum = 0, 0

    for xb_train, yb_train in train_loader:
        optimizer.zero_grad()
        pred_train = model(xb_train)
        loss_train = criterion(pred_train, yb_train)
        loss_train.backward()
        optimizer.step()
        train_loss_sum += loss_train.item()*xb_train.size(0)

    train_losses.append(train_loss_sum / len(train_loader))

    with torch.no_grad():
        for xb_val, yb_val in val_loader:
            pred_val = model(xb_val)
            loss_val = criterion(pred_val, yb_val)
            val_loss_sum += loss_val.item()*xb_val.size(0)
    
    val_losses.append(val_loss_sum / len(val_loader))

    print(f"Epoch {epoch+1} / {epochs} | Train {train_losses[epoch]}, Val {val_losses[epoch]}")

TypeError: Sequential.forward() takes 2 positional arguments but 3 were given