In [None]:
import numpy as np
from numpy import savetxt
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
import sys

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils import data


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [None]:
print (torch.cuda.is_available())
print (torch.cuda.current_device())
print (torch.cuda.get_device_name(0))
print (torch.cuda.memory_allocated())
print (torch.cuda.memory_cached())

In [None]:
df = pd.read_csv("data/cleaned_steam_data_3-29.csv", encoding='utf8', index_col=0)

In [None]:
title_columns = [x for x in df.columns.tolist() if x.startswith('title_')]
drop_cols = ['funny', 'is_early_access_review', 'helpful', 'review', 'cleaned_reviews', 'hour_played', 'Year', 'Month', 'Day']
drop_cols += title_columns

df.drop(drop_cols, axis=1, inplace=True)
df.head()

In [None]:
# # convert to classification problem --> turn helpful to 0s 1s
# new_helpful = []
# num_positive = 0
# for val in df.helpful.tolist():
#     if val > 0:
#         new_helpful.append(1)
#         num_positive += 1
#     else:
#         new_helpful.append(0)
# df.drop(['helpful'], axis=1, inplace=True)
# df['helpful'] = new_helpful
# df.head()

In [None]:
MAX_SEQ_LEN = len(df.columns.tolist())-1
VOCAB_SIZE = 41248 # should ideally just transport this from prev
EMBED_DIM = 128
LSTM_DIM = 64

In [None]:
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.kaiming_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
    
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim 
        step_dim = self.step_dim

        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)

In [None]:
# build pytorch model
DROPOUT = 0.1
BATCH_SIZE = 16

class Attention_Net(nn.Module):
    def __init__(self):
        super(Attention_Net, self).__init__()
        
        # define architecture
        self.embedding = nn.Embedding(VOCAB_SIZE, EMBED_DIM)
#         self.embedding_dropout = nn.Dropout2d(DROPOUT) # take this out potentially
        
        self.lstm = nn.LSTM(EMBED_DIM, LSTM_DIM, bidirectional=True, batch_first=True)
        
        # attention layer
        self.attention_layer = Attention(LSTM_DIM * 2, MAX_SEQ_LEN)
        
        self.linear = nn.Linear(LSTM_DIM * 2, 2) # change here to 1 or 2 depending on loss
        
        
        self.softmax = nn.Softmax()
        #self.softmax = nn.Sigmoid()

        
        
        
    def forward(self, x):
        h_embedding = self.embedding(x)
        h_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0)).view(BATCH_SIZE, MAX_SEQ_LEN, -1)
        h_lstm, _ = self.lstm(h_embedding)
        h_lstm_atten = self.attention_layer(h_lstm)
        out = self.linear(h_lstm_atten)
        softmax_out = self.softmax(out)
        return softmax_out
            
        

In [None]:
class SteamDataset(data.Dataset):
    def __init__(self, data):
        #'Initialization'
        self.data = data
        text_cols = [x for x in df.columns.tolist() if x.startswith("encoded")]
        self.train = torch.tensor(data[text_cols].values).cuda()
        labels = data['recommendation'].tolist()
        
        self.one_hot_labels = torch.tensor(np.array(labels)).squeeze().type(torch.LongTensor).cuda() # change to longtensor if using custom loss
        

    def __len__(self):
        #'Denotes the total number of samples'
        return len(self.data)

    def __getitem__(self, index):
        #'Generates one sample of data'
        
        # Load data and get label
        X = self.train[index]
        Y = self.one_hot_labels[index]
        return X, Y

In [None]:
train_num = int(0.8 * len(df))
steam_dataset = SteamDataset(df[:train_num])
steam_data_loader = data.DataLoader(steam_dataset, batch_size=BATCH_SIZE, num_workers=0, drop_last=True)
steam_data_loader

In [None]:
# num_negative = len(df) - num_positive
# print ('positive examples = ', num_positive)
# print ('negative examples = ', num_negative)

# weights = torch.tensor([1/(num_negative / len(df)), 1 / (num_positive / len(df))]).cuda()
# weights

In [None]:
attention_model = Attention_Net().cuda()
# loss_function = nn.CrossEntropyLoss(weight=weights)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(attention_model.parameters(), lr=0.1)

In [None]:
# def weighted_binary_cross_entropy(output, target, weights=None):
        
#     if weights is not None:
#         assert len(weights) == 2
        
#         loss = weights[1] * (target * torch.log(output)) + \
#                weights[0] * ((1 - target) * torch.log(1 - output))
#     else:
#         loss = target * torch.log(output) + (1 - target) * torch.log(1 - output)

#     return torch.neg(torch.mean(loss))

In [None]:
# training loop
EPOCHS = 100
start = time.time()

for i in range(EPOCHS):
    second_start = time.time()
    running_loss = 0

    with tqdm(total=len(steam_data_loader), file=sys.stdout) as pbar:
        for idx, (train_X, train_Y) in enumerate(steam_data_loader):
            attention_model.zero_grad()
            pred_y = attention_model(train_X)       

            # loss = weighted_binary_cross_entropy(pred_y, train_Y, weights)

            loss = loss_function(pred_y, train_Y)
            loss.backward()
            optimizer.step()
            running_loss += loss
            pbar.set_description('Epoch {} | summed loss = {}'.format(i+1, torch.round(running_loss)))
            pbar.update(1)
            tqdm._instances.clear()
    

    print ('Epoch {} | took {} seconds | summed loss: {} | avg loss: {}'
                   .format(i+1, time.time() - second_start, running_loss, running_loss / len(steam_dataset)))

print ("Took {} seconds".format(time.time() - start))

In [None]:
steam_eval_dataset = SteamDataset(df[train_num:])
steam_eval_data_loader = data.DataLoader(steam_eval_dataset, batch_size=16, num_workers=0, drop_last=True)

In [None]:
# evaluate
correct = 0
eval_loss = 0
with torch.no_grad():
    for i, (test_X, test_Y) in enumerate(steam_eval_data_loader):
        preds = attention_model(test_X)
        for idx, each_pred in enumerate(preds):
            if each_pred[0] >= 0.5 and test_Y[idx] == 0:
                correct += 1
            elif each_pred[0] < 0.5 and test_Y[idx] == 1:
                correct += 1
        loss = loss_function(preds, test_Y)
        eval_loss += loss
        
        
print ("Eval accuracy: {}".format(correct / len(steam_eval_dataset)))
print ("Eval summed loss: {} | avg loss: {}".format(eval_loss, eval_loss / len(steam_eval_dataset)))

In [None]:
PATH = 'models/word200_date3-31_epoch20.pt'
torch.save(attention_model.state_dict(), PATH)