In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from torch import nn, optim

df = pd.read_csv('./online_shoppers_intention.csv')
df

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.000000,0.000000,0.100000,0.000000,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.050000,0.140000,0.000000,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.500000,0.020000,0.050000,0.000000,0.0,Feb,3,3,1,4,Returning_Visitor,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12325,3,145.0,0,0.0,53,1783.791667,0.007143,0.029031,12.241717,0.0,Dec,4,6,1,1,Returning_Visitor,True,False
12326,0,0.0,0,0.0,5,465.750000,0.000000,0.021333,0.000000,0.0,Nov,3,2,1,8,Returning_Visitor,True,False
12327,0,0.0,0,0.0,6,184.250000,0.083333,0.086667,0.000000,0.0,Nov,3,2,1,13,Returning_Visitor,True,False
12328,4,75.0,0,0.0,15,346.000000,0.000000,0.021053,0.000000,0.0,Nov,2,2,3,11,Returning_Visitor,False,False


In [5]:
categorical_features = ['OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType', 'Weekend', 'Month']
numerical_features = ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration', 'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues', 'SpecialDay']

transformers = [
    ('one_hot', OneHotEncoder(), categorical_features),
    ('scale', MinMaxScaler(), numerical_features) 
]

preprocessor = ColumnTransformer(transformers=transformers)

x = df.drop('Revenue', axis=1)
y = df['Revenue'].astype(int)

x_preproc = preprocessor.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x_preproc, y, test_size=0.2, random_state=42)

scaler = StandardScaler(with_mean=False)
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Transfer from pandas to pytorch
x_train_tensor = torch.FloatTensor(x_train_scaled.toarray())
x_test_tensor = torch.FloatTensor(x_test_scaled.toarray())

# Transfer from pands to numpy to pytorch
y_train_tensor = torch.LongTensor(y_train.values)
y_test_tensor = torch.LongTensor(y_test.values)

In [6]:

from torch import nn
from torch import optim

class ANNModel(nn.Module):
    def __init__(self):
        super(ANNModel, self).__init__()
        self.layer_1 = nn.Linear(75, 254)
        self.layer_2 = nn.Linear(254, 128)
        self.layer_out = nn.Linear(128, 2)
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.relu(self.layer_2(x))
        x = self.layer_out(x)
        return x
    
model = ANNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    y_pred = model(x_train_tensor) # Forward Probagation
    loss = criterion(y_pred, y_train_tensor)
    loss.backward() # Back Probagation
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model.eval()
correct_predictions = 0
total_predictions = 0

with torch.no_grad():
    for x_batch, y_batch in test_dataloader:
        y_pred = model(x_batch)
        predicted_classes = y_pred.argmax(dim=1)
        correct_predictions += (predicted_classes == y_batch).sum().item()
        total_predictions += y_batch.size(0)

accuracy = correct_predictions / total_predictions
print(f'Accuracy: {accuracy * 100:.2f}%')

Epoch 10, Loss: 0.38973936438560486
Epoch 20, Loss: 0.3338225185871124
Epoch 30, Loss: 0.2998674213886261
Epoch 40, Loss: 0.26722511649131775
Epoch 50, Loss: 0.24052011966705322
Epoch 60, Loss: 0.22337666153907776
Epoch 70, Loss: 0.20866140723228455
Epoch 80, Loss: 0.1951146423816681
Epoch 90, Loss: 0.18135705590248108
Epoch 100, Loss: 0.16701149940490723
Accuracy: 88.32%
