In [1]:
!pip install torch==2.7.1 tqdm==4.66.4 scikit-learn==1.4.2 pandas numpy




[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [28]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import (
    StandardScaler,
    MinMaxScaler,
    LabelEncoder,
    OneHotEncoder,
    OrdinalEncoder,
)

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer

In [106]:
df = pd.read_csv('data/lab3/AdClick.csv')
df.sample(5)

Unnamed: 0,TimeSpentSeconds,PagesVisited,AdType,Clicked
278,36.316519,5,Adult,0
306,21.788734,4,Adult,0
703,4.33048,7,Adult,0
432,28.88717,5,Commercial,0
389,49.326275,2,Commercial,1


In [107]:
df['AdType'].value_counts()

AdType
Adult          532
Commercial     401
Educational    276
Recruitment    125
Name: count, dtype: int64

In [108]:
label_encoder = LabelEncoder()
df['AdType'] = label_encoder.fit_transform(df['AdType'])

In [109]:
X = df.drop(columns=['Clicked'])
y = df['Clicked'].to_numpy()

In [110]:
ct = ColumnTransformer(
    transformers=[
        (
        "categorical_encoder",            
        OneHotEncoder(
            drop="first", 
            sparse_output=False
        ),
        ['AdType']
        )
    ],
    remainder="passthrough"
)
X = ct.fit_transform(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

In [111]:
class ClickedDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_ds = ClickedDataset(X_train, y_train)
val_ds = ClickedDataset(X_val, y_val)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)

len(train_ds), len(val_ds) 

(1067, 267)

In [112]:
class ClickedNet(nn.Module): # za 1
    def __init__(self, in_features: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )

    def forward(self, x):
        return self.net(x).squeeze(1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = StrokeNet(in_features=X_train.shape[1]).to(device)
model

Using device: cpu


StrokeNet(
  (net): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [118]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import accuracy_score

class UniversalNet(nn.Module): # prima funkcii (za poveke arhitekturi)
    def __init__(self, input_dim, hidden_layers, activation_class, dropout_p):
        super().__init__()
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(prev_dim, hidden_dim))      # 1. Линеарен слој
            layers.append(activation_class())                   # 2. Активација (ReLU, Tanh, итн.)
            if dropout_p > 0:
                layers.append(nn.Dropout(dropout_p))            # 3. Регуларизација (Dropout) - Ако Dropout = 0, нема регуларизација
            prev_dim = hidden_dim
            
        layers.append(nn.Linear(prev_dim, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x).squeeze(1)

In [120]:
layer_configs = [ # za poveke arhitekturi
    [32],                  
    [64, 32],              
    [128, 64, 32]         
]

activation_functions = [
    nn.ReLU, 
    nn.Tanh, 
    nn.LeakyReLU
]

dropout_values = [0.0, 0.2] 

results = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = X_train.shape[1]

print(f"Start experiments on {device}...")

for layers in layer_configs:
    for act_func in activation_functions:
        for drop_p in dropout_values:
            
            act_name = act_func.__name__
            exp_name = f"L={layers} | Act={act_name} | Drop={drop_p}"
            print(f"\n Testing: {exp_name}")
            
            model = UniversalNet(input_dim, layers, act_func, drop_p).to(device)
            
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            criterion = nn.BCEWithLogitsLoss()
            
            for epoch in range(8):
                model.train()
                for X_b, y_b in train_loader:
                    X_b, y_b = X_b.to(device).float(), y_b.to(device).float()
                    optimizer.zero_grad()
                    loss = criterion(model(X_b), y_b)
                    loss.backward()
                    optimizer.step()
            
            model.eval()
            all_preds, all_targets = [], []
            with torch.no_grad():
                for X_b, y_b in val_loader:
                    X_b = X_b.to(device).float()
                    logits = model(X_b)
                    probs = torch.sigmoid(logits).cpu().numpy()
                    preds = (probs >= 0.5).astype(int)
                    all_preds.extend(preds)
                    all_targets.extend(y_b.numpy())
            
            acc = accuracy_score(all_targets, all_preds)
            print(f"   Result Accuracy: {acc:.4f}")
            
            results.append((acc, exp_name))

results.sort(key=lambda x: x[0], reverse=True)

print("\n" + "="*40)
print("TOP 3 ARCHITECTURES:")
print("="*40)
for i in range(3):
    if i < len(results):
        print(f"{i+1}. Acc: {results[i][0]:.4f}  =>  {results[i][1]}")

Start experiments on cpu...

 Testing: L=[32] | Act=ReLU | Drop=0.0
   Result Accuracy: 0.6479

 Testing: L=[32] | Act=ReLU | Drop=0.2
   Result Accuracy: 0.6255

 Testing: L=[32] | Act=Tanh | Drop=0.0
   Result Accuracy: 0.6517

 Testing: L=[32] | Act=Tanh | Drop=0.2
   Result Accuracy: 0.6592

 Testing: L=[32] | Act=LeakyReLU | Drop=0.0
   Result Accuracy: 0.6404

 Testing: L=[32] | Act=LeakyReLU | Drop=0.2
   Result Accuracy: 0.6442

 Testing: L=[64, 32] | Act=ReLU | Drop=0.0
   Result Accuracy: 0.6554

 Testing: L=[64, 32] | Act=ReLU | Drop=0.2
   Result Accuracy: 0.6554

 Testing: L=[64, 32] | Act=Tanh | Drop=0.0
   Result Accuracy: 0.6554

 Testing: L=[64, 32] | Act=Tanh | Drop=0.2
   Result Accuracy: 0.6517

 Testing: L=[64, 32] | Act=LeakyReLU | Drop=0.0
   Result Accuracy: 0.6554

 Testing: L=[64, 32] | Act=LeakyReLU | Drop=0.2
   Result Accuracy: 0.6629

 Testing: L=[128, 64, 32] | Act=ReLU | Drop=0.0
   Result Accuracy: 0.6629

 Testing: L=[128, 64, 32] | Act=ReLU | Drop=0.2

In [119]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [115]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
def train_one_epoch(epoch_idx: int):
    model.train()
    losses = []

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch_idx+1}", leave=False)

    for X_batch, y_batch in progress_bar:
        X_batch = X_batch.to(device).float()
        y_batch = y_batch.to(device).float()

        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits, y_batch)

        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    return float(np.mean(losses))

def evaluate():
    model.eval()
    all_probs = []
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device).float()
            logits = model(X_batch)

            probs = torch.sigmoid(logits).cpu().numpy()
            preds = (probs >= 0.5).astype(int)

            all_probs.append(probs)
            all_preds.append(preds)
            all_targets.append(y_batch.numpy())

    all_probs = np.concatenate(all_probs).reshape(-1)
    all_preds = np.concatenate(all_preds).reshape(-1)
    all_targets = np.concatenate(all_targets).reshape(-1)

    acc = accuracy_score(all_targets, all_preds)
    return acc

In [117]:
epochs = 10 # za edna
train_losses = []
val_accuracies = []

for epoch in range(epochs):
    train_loss = train_one_epoch(epoch)
    val_acc = evaluate()

    train_losses.append(train_loss)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{epochs} | Train loss: {train_loss:.4f} | Val acc: {val_acc:.4f}")

                                                                                                                       

Epoch 1/10 | Train loss: 0.6394 | Val acc: 0.6592


                                                                                                                       

Epoch 2/10 | Train loss: 0.6423 | Val acc: 0.6667


                                                                                                                       

Epoch 3/10 | Train loss: 0.6381 | Val acc: 0.6592


                                                                                                                       

Epoch 4/10 | Train loss: 0.6401 | Val acc: 0.6554


                                                                                                                       

Epoch 5/10 | Train loss: 0.6372 | Val acc: 0.6479


                                                                                                                       

Epoch 6/10 | Train loss: 0.6438 | Val acc: 0.6517


                                                                                                                       

Epoch 7/10 | Train loss: 0.6372 | Val acc: 0.6592


                                                                                                                       

Epoch 8/10 | Train loss: 0.6365 | Val acc: 0.6554


                                                                                                                       

Epoch 9/10 | Train loss: 0.6327 | Val acc: 0.6554


                                                                                                                       

Epoch 10/10 | Train loss: 0.6379 | Val acc: 0.6554


