In [1]:
import numpy as np
from numpy import savetxt
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
import sys

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils import data


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
print (torch.cuda.is_available())
print (torch.cuda.current_device())
print (torch.cuda.get_device_name(0))
print (torch.cuda.memory_allocated())
print (torch.cuda.memory_cached())

True
0
GeForce GTX 1060 with Max-Q Design
0
0


In [3]:
df = pd.read_csv("data/cleaned_steam_data_4-15_15Kwords.csv", encoding='utf8', index_col=0)

In [4]:
title_columns = [x for x in df.columns.tolist() if x.startswith('title_')]
drop_cols = ['funny', 'is_early_access_review', 'helpful', 'review', 'cleaned_reviews', 'hour_played', 'Year', 'Month', 'Day']
drop_cols += title_columns

df.drop(drop_cols, axis=1, inplace=True)
df.head()

Unnamed: 0,recommendation,encoded_1,encoded_2,encoded_3,encoded_4,encoded_5,encoded_6,encoded_7,encoded_8,encoded_9,...,encoded_185,encoded_186,encoded_187,encoded_188,encoded_189,encoded_190,encoded_191,encoded_192,encoded_193,encoded_194
0,1,0,0,0,0,0,0,0,0,0,...,5883,8055,5987,3547,5987,5030,3547,3547,11106,5315
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9665
2,1,0,0,0,0,0,0,0,0,0,...,11106,3090,13779,7175,7891,1064,3380,1917,5409,3118
7,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,13408,11246,9544,6270,11106
9,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,11132,13289,12336,8729,12154,11106


In [5]:
MAX_SEQ_LEN = len(df.columns.tolist())-1
VOCAB_SIZE = 400000# 14845 # should ideally just transport this from prev
EMBED_DIM = 100
LSTM_DIM = 64

In [6]:
# only need 2-3 lines for attention
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.kaiming_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
    
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim 
        step_dim = self.step_dim

        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)

In [7]:
# build pytorch model
DROPOUT = 0.1
BATCH_SIZE = 128

class Attention_Net(nn.Module):
    def __init__(self):
        super(Attention_Net, self).__init__()
        
        # define architecture
        self.embedding = nn.Embedding(VOCAB_SIZE, EMBED_DIM) # add pretrained embeding
        
        self.lstm = nn.LSTM(EMBED_DIM, 
                            LSTM_DIM, 
                            bidirectional=True,
                            dropout=0.2,
                            batch_first=True)
        
        # attention layer
#         self.attention_layer = Attention(LSTM_DIM * 2, MAX_SEQ_LEN)
        # try tanh

        self.linear = nn.Linear(LSTM_DIM*2, 2)
        
    def forward(self, x):
        embedding = self.embedding(x)
        embedding = torch.squeeze(torch.unsqueeze(embedding, 0)).view(BATCH_SIZE, MAX_SEQ_LEN, -1)
        lstm_out, (hidden, cell) = self.lstm(embedding)
#         attention = self.attention_layer(lstm_out)
        
        out = self.linear(lstm_out[:, -1, :])
        return out

In [8]:
class SteamDataset(data.Dataset):
    def __init__(self, data):
        #'Initialization'
        self.data = data
        text_cols = [x for x in df.columns.tolist() if x.startswith("encoded")]
        self.train = torch.tensor(data[text_cols].values).type(torch.LongTensor).cuda()
        labels = data['recommendation'].tolist()
        
        self.one_hot_labels = torch.tensor(np.array(labels)).squeeze().type(torch.LongTensor).cuda() # change to longtensor if using custom loss
        

    def __len__(self):
        #'Denotes the total number of samples'
        return len(self.data)

    def __getitem__(self, index):
        #'Generates one sample of data'
        
        # Load data and get label
        X = self.train[index]
        Y = self.one_hot_labels[index]
        return X, Y

In [9]:
train_num = int(0.8 * len(df))
steam_dataset = SteamDataset(df[:train_num])
steam_data_loader = data.DataLoader(steam_dataset, batch_size=BATCH_SIZE, num_workers=0, drop_last=True, shuffle=True)
steam_data_loader

<torch.utils.data.dataloader.DataLoader at 0x1f441d93048>

In [10]:
PATH = 'models/amzn_date4-16_batch128_epoch10_acc83_lstm64_pretrainedembedding_noattention.pt'
attention_model = Attention_Net().cuda()
attention_model.load_state_dict(torch.load(PATH))

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(attention_model.parameters(), lr=0.0001) # even lower for transfer learning

  "num_layers={}".format(dropout, num_layers))


In [12]:
# training loop
EPOCHS = 10
start = time.time()

for i in range(EPOCHS):
    second_start = time.time()
    running_loss = 0
    correct = 0
    attention_model.train()
    
    with tqdm(total=len(steam_data_loader), file=sys.stdout) as pbar:
        for idx, (train_X, train_Y) in enumerate(steam_data_loader):
            
            optimizer.zero_grad()

            pred_y = attention_model(train_X) 
            loss = loss_function(pred_y, train_Y)
            loss.backward()
            optimizer.step()
            running_loss += loss

            # calc accuracy
            pred1_mask = pred_y[:, 1] > 0.5
            masked_trainY_1 = train_Y[pred1_mask]
            masked_trainY_0 = train_Y[~pred1_mask]
            ones_predicted_correct = torch.sum(masked_trainY_1)
            zeros_predicted_correct = torch.sum(masked_trainY_0)
            correct += ones_predicted_correct.add(zeros_predicted_correct)
            correct_ = correct.cpu().numpy()
            
            # update progress bar
            pbar.set_description('ep{} | loss: {} | acc: {}%'.format(i+1, torch.round(running_loss), round(correct_ / ((idx+1) * BATCH_SIZE)*100, 1)))
            pbar.update(1)
            tqdm._instances.clear()
            
            

    print ('Epoch {} | took {} seconds | summed loss: {} | avg loss: {}'
                   .format(i+1, time.time() - second_start, running_loss, running_loss / (len(steam_data_loader) * BATCH_SIZE)))

print ("Took {} seconds".format(time.time() - start))

HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 1 | took 121.9320011138916 seconds | summed loss: 812.6401977539062 | avg loss: 0.0025044383946806192


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 2 | took 122.2736930847168 seconds | summed loss: 761.7395629882812 | avg loss: 0.0023475701455026865


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 3 | took 122.77803039550781 seconds | summed loss: 736.038330078125 | avg loss: 0.002268362557515502


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 4 | took 122.51658225059509 seconds | summed loss: 719.1849365234375 | avg loss: 0.0022164229303598404


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 5 | took 122.5858268737793 seconds | summed loss: 705.7792358398438 | avg loss: 0.002175108529627323


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 6 | took 122.46696543693542 seconds | summed loss: 694.8030395507812 | avg loss: 0.002141281496733427


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 7 | took 123.15779399871826 seconds | summed loss: 685.0997314453125 | avg loss: 0.002111377427354455


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 8 | took 122.80027651786804 seconds | summed loss: 676.7860107421875 | avg loss: 0.002085755579173565


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 9 | took 123.1782054901123 seconds | summed loss: 668.8837890625 | avg loss: 0.002061402192339301


HBox(children=(FloatProgress(value=0.0, max=2535.0), HTML(value='')))


Epoch 10 | took 123.05745482444763 seconds | summed loss: 661.5810546875 | avg loss: 0.002038896083831787
Took 1226.7566776275635 seconds


In [13]:
print (attention_model)

Attention_Net(
  (embedding): Embedding(400000, 100)
  (lstm): LSTM(100, 64, batch_first=True, dropout=0.2, bidirectional=True)
  (linear): Linear(in_features=128, out_features=2, bias=True)
)


In [15]:
steam_eval_dataset = SteamDataset(df[train_num:])
steam_eval_data_loader = data.DataLoader(steam_eval_dataset, batch_size=BATCH_SIZE, num_workers=0, drop_last=True)

In [17]:
# evaluate
correct = 0
eval_loss = 0
attention_model.eval()
with torch.no_grad():
    for i, (test_X, test_Y) in enumerate(steam_eval_data_loader):
        preds = attention_model(test_X).squeeze()
        preds = torch.nn.functional.softmax(preds)
        for idx, each_pred in enumerate(preds):
            if each_pred[0] >= 0.5 and test_Y[idx] == 0:
                correct += 1
            elif each_pred[0] < 0.5 and test_Y[idx] == 1:
                correct += 1
        loss = loss_function(preds, test_Y)
        eval_loss += loss
        
        
print ("Eval accuracy: {}".format(correct / len(steam_eval_dataset)))
print ("Eval summed loss: {} | avg loss: {}".format(eval_loss, eval_loss / len(steam_eval_dataset)))

  


Eval accuracy: 0.8626839549387433
Eval summed loss: 288.14398193359375 | avg loss: 0.0035514580085873604


In [18]:
PATH = 'models/transfer_learning_date4-16_acc86_epoch10_batch128.pt'
torch.save(attention_model.state_dict(), PATH)

In [None]:
# 77.8% eval acc -- batch=128, lr=0.0001
# 77.8% eval acc -- batch=128, lr=0.001
# 77.9% eval acc -- batch=64, lr=0.001, embed_dim=16, lstm_dim=16
# 85.9% eval acc -- batch=128, lr=0.0001, embed=128, lstm_dim=64
# 86.1% eval acc -- batch=128, lr=0.0001, embed=128, lstm_dim=64, + attention

# scratchpaper

In [15]:
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)

input

tensor([-1.5980,  0.9168,  0.8958], requires_grad=True)

In [16]:
target

tensor([1., 0., 1.])