## MLP

EEC 270 Website Fingerprinting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score
from sklearn.metrics import accuracy_score
import numpy as np

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Set plotting style
sns.set_style('whitegrid')
sns.set_palette('Set2')

data = pd.read_csv('data_5.csv')

In [94]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# seq_len, batch, feature_len

class MLP(nn.Module):

    def __init__(self):
        super(MLP, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(25, 200),
            nn.BatchNorm1d(200),
            nn.ReLU(),
            nn.Linear(200, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Linear(100, 31),
            nn.BatchNorm1d(31),
            nn.ReLU(),
        )

    def forward(self, inputs):
        return self.linear(inputs)

In [60]:
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
import random

BATCH_SIZE = 200

X, y = data.values[:,:-1], data.values[:,-1]
X, y = torch.Tensor(X), torch.Tensor(y)
trans_X = X.reshape((len(X), 25))
trans_X_ = X.reshape((5, len(X), 5))
X = trans_X

dataset = TensorDataset(X, y)
loader = DataLoader(
    dataset=dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

# Split dataset into train and valid, with a ratio of 4:1
dataset_size = len(dataset)
indices = list(range(dataset_size))
random.shuffle(indices)
split = int(np.floor(0.2 * dataset_size))
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    sampler=train_sampler
)
valid_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    sampler=valid_sampler
)

X_test, y_test = None, None
for _, test_sample in enumerate(valid_loader):
    X_test, y_test = test_sample[0], test_sample[1].long()


In [101]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./mlp')

model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

for epoch in range(1000):
    for _, sample in enumerate(train_loader, 0):
        model.zero_grad()
        inputs, labels = sample
        labels = labels.long()
        # pred = nn.functional.softmax(model(inputs))
        pred = model(inputs)
        
        # Training loss
        loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()
    
    correct, total = 0, 0
    with torch.no_grad():
        for sample in valid_loader:
            inputs, labels = sample
            labels = labels.long()
            # pred = nn.functional.softmax(model(inputs))
            pred = model(inputs)
            # print(pred)
            
            _, predicted = torch.max(pred.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    writer.add_scalar('Loss/train', loss.item(), epoch)
    writer.add_scalar('Accuracy/test', correct/total, epoch)
        
    if epoch % 100 == 0:
        print('Epoch', epoch, 'Training Loss:', loss.item(), 'Accuracy:', correct/total)

Epoch 0 Training Loss: 0.6521893739700317 Accuracy: 0.8397328881469115
Epoch 100 Training Loss: 0.29776132106781006 Accuracy: 0.8480801335559266
Epoch 200 Training Loss: 0.22466903924942017 Accuracy: 0.8681135225375626
Epoch 300 Training Loss: 0.26048704981803894 Accuracy: 0.8731218697829716
Epoch 400 Training Loss: 0.27816393971443176 Accuracy: 0.8664440734557596
Epoch 500 Training Loss: 0.26442909240722656 Accuracy: 0.8681135225375626
Epoch 600 Training Loss: 0.23918648064136505 Accuracy: 0.8614357262103506
Epoch 700 Training Loss: 0.22243428230285645 Accuracy: 0.8547579298831386
Epoch 800 Training Loss: 0.23108530044555664 Accuracy: 0.8664440734557596
Epoch 900 Training Loss: 0.2416336089372635 Accuracy: 0.8647746243739566


In [None]:
pred = model(X_test)
accuracy_score(pred.argmax(axis=1), y_test)

In [103]:
import joblib
joblib.dump(model, 'mlp.pkl')

['mlp.pkl']