## LSTM

EEC 270 Website Fingerprinting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score
from sklearn.metrics import accuracy_score
import numpy as np

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Set plotting style
sns.set_style('whitegrid')
sns.set_palette('Set2')

data = pd.read_csv('data_5.csv')

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# seq_len, batch, feature_len

class LSTM(nn.Module):

    def __init__(self):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(25, 100, 3, batch_first=True)
        self.linear = nn.Sequential(
            nn.Linear(100, 31),
            nn.BatchNorm1d(31),
            nn.ReLU(),
        )

    def forward(self, source):
        output, _ = self.lstm(source.view(source.shape[0], 1, 25))
        output = output[:,-1]
        return self.linear(output)

In [4]:
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
import random

BATCH_SIZE = 200

X, y = data.values[:,:-1], data.values[:,-1]
X, y = torch.Tensor(X), torch.Tensor(y)
trans_X = X.reshape((len(X), 25))
trans_X_ = X.reshape((len(X), 5, 5))
X = trans_X

dataset = TensorDataset(X, y)
loader = DataLoader(
    dataset=dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

# Split dataset into train and valid, with a ratio of 4:1
dataset_size = len(dataset)
indices = list(range(dataset_size))
random.shuffle(indices)
split = int(np.floor(0.2 * dataset_size))
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    sampler=train_sampler
)
valid_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    sampler=valid_sampler
)

X_test, y_test = None, None
for _, test_sample in enumerate(valid_loader):
    X_test, y_test = test_sample[0], test_sample[1].long()


In [8]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./lstm')

model = LSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5000):
    for _, sample in enumerate(train_loader, 0):
        model.zero_grad()
        inputs, labels = sample
        labels = labels.long()
        pred = model(inputs)
        
        # Training loss
        loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()
    
    correct, total = 0, 0
    with torch.no_grad():
        for sample in valid_loader:
            inputs, labels = sample
            labels = labels.long()
            pred = model(inputs)
            
            _, predicted = torch.max(pred.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    writer.add_scalar('Loss/train', loss.item(), epoch)
    writer.add_scalar('Accuracy/test', correct/total, epoch)
        
    if epoch % 100 == 0:
        print('Epoch', epoch, 'Training Loss:', loss.item(), 'Accuracy:', correct/total)
        



Epoch 0 Training Loss: 3.3674535751342773 Accuracy: 0.10684474123539232
Epoch 100 Training Loss: 1.69565749168396 Accuracy: 0.5258764607679466
Epoch 200 Training Loss: 1.4455851316452026 Accuracy: 0.5325542570951586
Epoch 300 Training Loss: 1.3271284103393555 Accuracy: 0.5342237061769616
Epoch 400 Training Loss: 1.160200595855713 Accuracy: 0.5275459098497496
Epoch 500 Training Loss: 1.2462680339813232 Accuracy: 0.5492487479131887
Epoch 600 Training Loss: 1.0421981811523438 Accuracy: 0.5409015025041736
Epoch 700 Training Loss: 1.0185291767120361 Accuracy: 0.5425709515859767
Epoch 800 Training Loss: 1.0275272130966187 Accuracy: 0.5392320534223706
Epoch 900 Training Loss: 0.9433490633964539 Accuracy: 0.5358931552587646
Epoch 1000 Training Loss: 0.9827002286911011 Accuracy: 0.5442404006677797
Epoch 1100 Training Loss: 0.9262630343437195 Accuracy: 0.5459098497495827
Epoch 1200 Training Loss: 0.9256651401519775 Accuracy: 0.5342237061769616
Epoch 1300 Training Loss: 0.8706201314926147 Accurac

In [9]:
pred = model(X_test)
accuracy_score(pred.argmax(axis=1), y_test)

0.542713567839196

In [10]:
import joblib
joblib.dump(model, 'lstm.pkl')

['lstm.pkl']