In [39]:
import pandas as pd

In [40]:
df = pd.read_csv(r"C:\Users\Admin\Downloads\AdClick.csv")

In [41]:
df

Unnamed: 0,TimeSpentSeconds,PagesVisited,AdType,Clicked
0,14.078043,4,Adult,0
1,90.303643,2,Adult,0
2,39.502371,7,Adult,1
3,27.388277,9,Adult,1
4,5.088746,6,Adult,0
...,...,...,...,...
1329,20.686353,5,Educational,0
1330,24.314949,6,Commercial,1
1331,38.103568,2,Commercial,0
1332,7.780798,4,Adult,0


In [42]:
# Data handling
import pandas as pd
import numpy as np

# Train / test split
from sklearn.model_selection import train_test_split

# Preprocessing
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader




In [43]:
def split_features_target(df,target_col = "Clicked"):
    X = df.drop(columns=[target_col])
    y = df[target_col]

    return X,y

In [44]:
def build_preprocesor (num_cols,cat_cols):
    preprocesor = ColumnTransformer(
        transformers=[
            ("cat",OneHotEncoder(drop="first",handle_unknown="ignore"),cat_cols),
            ("num",StandardScaler(),num_cols)
        ]
    )
    return preprocesor

In [45]:
def preprocess_data(X,y,preprocesor,test_size=0.15,val_size=0.15,random_state=42):
    X_trainval,X_test,y_trainval,y_test = train_test_split(
        X,y,test_size=test_size,random_state=random_state,stratify=y
    )
    
    val_ratio = val_size/(1-test_size)
    
    X_train,X_val,y_train,y_val = train_test_split(
        X_trainval,y_trainval,test_size=val_ratio,random_state=random_state,stratify=y_trainval
    )

    X_train = preprocesor.fit_transform(X_train)
    X_val = preprocesor.transform(X_val)
    X_test = preprocesor.transform(X_test)

    return X_train, X_val, X_test, y_train, y_val, y_test

In [46]:
from torch.utils.data import Dataset
class AdsDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.to_numpy().reshape(-1, 1), dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [47]:
from torch.utils.data import DataLoader

def create_dataloaders(train_dataset, val_dataset, test_dataset, batch_size=32):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader


In [48]:
# # 1. Split features and target
# X, y = split_features_target(df)

# # 2. Columns
# num_cols = X.select_dtypes(include='number').columns.tolist()
# cat_cols = X.select_dtypes(include='object').columns.tolist()

# # 3. Preprocessing pipeline
# preprocessor = build_preprocesor(num_cols, cat_cols)

# # 4. Preprocess data and get splits
# X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(X, y, preprocessor)

# # 5. Create Dataset objects
# train_dataset = AdsDataset(X_train, y_train)
# val_dataset = AdsDataset(X_val, y_val)
# test_dataset = AdsDataset(X_test, y_test)

# # 6. Create DataLoaders
# train_loader, val_loader, test_loader = create_dataloaders(
#     train_dataset, val_dataset, test_dataset, batch_size=32
# )


In [53]:
class AdsNN(nn.Module):
    def __init__(self,input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim,16),
            nn.ReLU(),
            nn.Linear(16,8),
            nn.ReLU(),
            nn.Linear(8,1),
            nn.Sigmoid()
        )

    def forward(self,x):
        return self.model(x)

In [68]:
def train_model (model,train_loader,val_loader,epochs=30,lr=0.0001):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(),lr=lr)
    train_loss = 0.0
    for X_batch,y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs,y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        ### VALIDACIJA NA MODEL ###


    # ---- Validation ----
    model.eval()
    val_loss = 0.0
    all_pred, all_targets = [], []
    
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            
            preds = (outputs >= 0.5).float()
            
            # Flatten and append
            all_pred.extend(preds.squeeze().numpy())
            all_targets.extend(y_batch.squeeze().numpy())
    
    val_acc = accuracy_score(all_targets, all_pred)

# print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

In [73]:
def evaluate_model(model,test_loader):
    model.eval()
    all_preds,all_targets = [],[]
    with torch.no_grad():
        for X_batch,y_batch in test_loader:
            outputs = model(X_batch)
            preds = (outputs>0.5).float()
            all_preds.extend(preds.squeeze().numpy())       # flatten to 1D
            all_targets.extend(y_batch.squeeze().numpy())  # flatten to 1D


    print("Confusion Matrix:")
    print(confusion_matrix(all_targets, all_preds))
    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds))

In [76]:
# 1. Split features and target
X, y = split_features_target(df)

# 2. Columns
num_cols = X.select_dtypes(include='number').columns.tolist()
cat_cols = X.select_dtypes(include='object').columns.tolist()

# 3. Preprocessing
preprocessor = build_preprocesor(num_cols, cat_cols)
X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(X, y, preprocessor)

# 4. Datasets
train_dataset = AdsDataset(X_train, y_train)
val_dataset = AdsDataset(X_val, y_val)
test_dataset = AdsDataset(X_test, y_test)

# 5. DataLoaders
train_loader, val_loader, test_loader = create_dataloaders(train_dataset, val_dataset, test_dataset)

# 6. Model
model = AdsNN(input_dim=X_train.shape[1])

# 7. Train
train_model(model, train_loader, val_loader, epochs=30, lr=0.001)

# 8. Evaluate
evaluate_model(model, test_loader)


Confusion Matrix:
[[126   0]
 [ 75   0]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.63      1.00      0.77       126
         1.0       0.00      0.00      0.00        75

    accuracy                           0.63       201
   macro avg       0.31      0.50      0.39       201
weighted avg       0.39      0.63      0.48       201



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [75]:
num_pos = (y_train == 1).sum()
num_neg = (y_train == 0).sum()
weight = torch.tensor([num_neg / (num_pos + num_neg)]).float()

criterion = nn.BCELoss(weight=weight)  # if using class weighting
