In [10]:
import torch
import random
import numpy as np
from src.utils.config import get_small_classifier_config
from src.training.classifier_trainer import SimpleTextDataset, train_classifier, evaluate
import csv, random, time, datetime as dt
import pandas as pd
from pathlib import Path

### Model Config

In [12]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

cfg = get_small_classifier_config()
cfg.num_classes = 2  # binary

# Adjust hyper parameters
cfg.learning_rate = 1e-4
cfg.weight_decay = 0.01
cfg.max_epochs = 8

### Data Preprocessing

In [13]:
csv_path = Path("fake-test-data.csv")  # adjust if stored elsewhere
df = pd.read_csv(csv_path)

# ensure numeric types
df["rev_usd"] = df["rev_usd"].astype(float)
df["event_timestamp"] = df["event_timestamp"].astype("int64")

display(df.head())
print(df.dtypes)
print(f"Rows: {len(df)}")

Unnamed: 0,user_pseudo_id,session_id,date_formatted,event_timestamp,event_name,rev_usd,unique_items,qty,page_location,page_title
0,u_001,s_001,2025-08-28,1756358400000,session_start,0.0,0,0,https://example.com/,Home
1,u_001,s_001,2025-08-28,1756358405000,page_view,0.0,0,0,https://example.com/category/shoes,Shoes Category
2,u_001,s_001,2025-08-28,1756358410000,view_item,0.0,1,1,https://example.com/product/sku123,Running Shoe Model A
3,u_001,s_001,2025-08-28,1756358413000,add_to_cart,0.0,1,1,https://example.com/product/sku123,Running Shoe Model A
4,u_001,s_001,2025-08-28,1756358418000,view_item,0.0,1,1,https://example.com/product/sku456,Trail Shoe Model B


user_pseudo_id      object
session_id          object
date_formatted      object
event_timestamp      int64
event_name          object
rev_usd            float64
unique_items         int64
qty                  int64
page_location       object
page_title          object
dtype: object
Rows: 42


### Train Test Split

In [5]:
split = int(0.8 * len(samples))
train_ds = SimpleTextDataset(samples[:split])
val_ds = SimpleTextDataset(samples[split:])

### Train and Test Model

In [6]:
model = train_classifier(cfg, train_ds, val_ds)
torch.save(model.state_dict(), "classifier_model.pt")

epoch 0 step 0 lr 6.25e-06 loss 0.4298 acc 1.0000 elapsed 0.1s
[best] val_loss 0.0113 acc 1.0000
val_loss 0.0138 acc 1.0000
epoch 2 step 100 lr 8.19e-05 loss 0.8947 acc 0.7500 elapsed 4.1s
val_loss 0.0257 acc 1.0000
val_loss 0.0178 acc 1.0000
val_loss 0.0195 acc 1.0000
epoch 5 step 200 lr 3.33e-05 loss 0.0279 acc 1.0000 elapsed 8.3s
val_loss 0.0174 acc 1.0000
val_loss 0.0165 acc 1.0000
epoch 7 step 300 lr 9.61e-07 loss 0.0222 acc 1.0000 elapsed 12.3s
val_loss 0.0164 acc 1.0000
