In [22]:
import numpy as np
from numpy import savetxt
import pandas as pd
import matplotlib.pyplot as plt
import time

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [23]:
df = pd.read_csv("data/cleaned_steam_data_3-29.csv", encoding='utf8', index_col=0)

In [24]:
df.head()

Unnamed: 0,funny,helpful,hour_played,is_early_access_review,recommendation,review,title_ACE COMBAT™ 7: SKIES UNKNOWN,title_ARK: Survival Evolved,title_ASTRONEER,title_Battlefleet Gothic: Armada 2,...,encoded_185,encoded_186,encoded_187,encoded_188,encoded_189,encoded_190,encoded_191,encoded_192,encoded_193,encoded_194
0,2,4,578,0,1,&gt Played as German Reich&gt Declare war on B...,0,0,0,0,...,33572,15335,18484,25557,18484,12429,8541,25557,14122,23610
1,0,0,184,0,1,yes.,0,0,0,0,...,0,0,0,0,0,0,0,0,0,30552
2,0,0,892,0,1,Very good game although a bit overpriced in my...,0,0,0,0,...,14122,39490,27271,39764,28167,8875,1494,40182,6951,39745
7,295,219,71,0,1,I have never been told to kill myself more tha...,0,0,0,0,...,0,0,0,0,0,34155,14517,19363,40032,14122
9,380,271,414,0,1,if you think cs go is toxic try this game,0,0,0,0,...,0,0,0,0,10818,2776,9421,19118,3218,14122


In [25]:
title_columns = [x for x in df.columns.tolist() if x.startswith('title_')]
drop_cols = ['funny', 'is_early_access_review', 'recommendation', 'review', 'cleaned_reviews', 'hour_played', 'Year', 'Month', 'Day']
drop_cols += title_columns

df.drop(drop_cols, axis=1, inplace=True)
df.head()

Unnamed: 0,helpful,encoded_1,encoded_2,encoded_3,encoded_4,encoded_5,encoded_6,encoded_7,encoded_8,encoded_9,...,encoded_185,encoded_186,encoded_187,encoded_188,encoded_189,encoded_190,encoded_191,encoded_192,encoded_193,encoded_194
0,4,0,0,0,0,0,0,0,0,0,...,33572,15335,18484,25557,18484,12429,8541,25557,14122,23610
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,30552
2,0,0,0,0,0,0,0,0,0,0,...,14122,39490,27271,39764,28167,8875,1494,40182,6951,39745
7,219,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,34155,14517,19363,40032,14122
9,271,0,0,0,0,0,0,0,0,0,...,0,0,0,0,10818,2776,9421,19118,3218,14122


In [26]:
text_columns = [x for x in df.columns.tolist() if x.startswith("encoded")]
len(text_columns)

194

In [27]:
text_df = df[text_columns]
text_df.head()

Unnamed: 0,encoded_1,encoded_2,encoded_3,encoded_4,encoded_5,encoded_6,encoded_7,encoded_8,encoded_9,encoded_10,...,encoded_185,encoded_186,encoded_187,encoded_188,encoded_189,encoded_190,encoded_191,encoded_192,encoded_193,encoded_194
0,0,0,0,0,0,0,0,0,0,0,...,33572,15335,18484,25557,18484,12429,8541,25557,14122,23610
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,30552
2,0,0,0,0,0,0,0,0,0,0,...,14122,39490,27271,39764,28167,8875,1494,40182,6951,39745
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,34155,14517,19363,40032,14122
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,10818,2776,9421,19118,3218,14122


In [62]:
MAX_SEQ_LEN = len(text_columns)
VOCAB_SIZE = 41248 # should ideally just transport this from prev
EMBED_DIM = 128
LSTM_DIM = 64

In [63]:
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.kaiming_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
    
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim 
        step_dim = self.step_dim

        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / (torch.sum(a, 1, keepdim=True) + 1e-10)

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)

In [64]:
# build pytorch model
DROPOUT = 0.1

class Attention_Net(nn.Module):
    def __init__(self):
        super(Attention_Net, self).__init__()
        
        # define architecture
        self.embedding = nn.Embedding(VOCAB_SIZE, EMBED_DIM)
#         self.embedding_dropout = nn.Dropout2d(DROPOUT) # take this out potentially
        
        self.lstm = nn.LSTM(EMBED_DIM, LSTM_DIM, bidirectional=True, batch_first=True)
        
        # attention layer
        self.attention_layer = Attention(LSTM_DIM * 2, MAX_SEQ_LEN)
        
        self.linear = nn.Linear(LSTM_DIM * 2, 1)
        
        self.out = nn.Softplus() # turn outputs to positive
        
    def forward(self, x):
        h_embedding = self.embedding(x)
        h_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0)).view(1, MAX_SEQ_LEN, -1)
        h_lstm, _ = self.lstm(h_embedding)
        h_lstm_atten = self.attention_layer(h_lstm)
        out = self.linear(h_lstm_atten)
        out = self.out(out)
        return out
            
        

In [65]:
train_X = torch.tensor(text_df.values).cuda()
train_Y = torch.tensor([[x] for x in df.helpful.values]).float().cuda()

print(train_X.size())
print(train_Y.size())

torch.Size([405669, 194])
torch.Size([405669, 1])


In [66]:
attention_model = Attention_Net().cuda()
# loss_function = nn.MSELoss()
loss_function = nn.SmoothL1Loss()
optimizer = optim.SGD(attention_model.parameters(), lr=0.01)

In [67]:
# training loop
EPOCHS = 1
checkpoint_num = 20000
start = time.time()
second_start = time.time()

running_loss = 0
for i in range(EPOCHS):
    for idx, train_x_example in enumerate(train_X):
        if (idx+1) % checkpoint_num == 0:
            print ('For {} train examples | took {} seconds | loss: {}'.format(checkpoint_num, time.time() - second_start, running_loss))
            second_start = time.time()
            running_loss = 0
        attention_model.zero_grad()
        pred_y = attention_model(train_x_example)
#         print ("pred_y = {}, actual = {}".format(pred_y, train_Y[idx]))
        loss = loss_function(pred_y, train_Y[idx])
        loss.backward()
        optimizer.step()
        running_loss += loss
#         print ("loss = ", loss)
#         print ("\n")
print ("Took {} seconds".format(time.time() - start))
        

For 20000 train examples | took 281.82173919677734 seconds | loss: 2468.292724609375
For 20000 train examples | took 443.24078702926636 seconds | loss: 69635.6875
For 20000 train examples | took 571.2022876739502 seconds | loss: 4876.20361328125
For 20000 train examples | took 512.2792339324951 seconds | loss: 152.99459838867188
For 20000 train examples | took 642.9711706638336 seconds | loss: 0.7331940531730652
For 20000 train examples | took 514.7550427913666 seconds | loss: 47318.80078125
For 20000 train examples | took 519.5337619781494 seconds | loss: 9440.6396484375
For 20000 train examples | took 491.88903188705444 seconds | loss: 8760.83984375
For 20000 train examples | took 267.6431818008423 seconds | loss: 3124.83935546875
For 20000 train examples | took 265.54890060424805 seconds | loss: 0.8994258642196655
For 20000 train examples | took 265.5103850364685 seconds | loss: 23339.740234375
For 20000 train examples | took 265.6467182636261 seconds | loss: 8496.2177734375
For 200

In [153]:
# evaluate
with torch.no_grad():
    
    preds = attention_model(train_X[0])
    print ('predicted helpful = ', preds)
    print ('actual helpful = ', train_Y[0])

predicted helpful =  tensor([[0.6429]])
actual helpful =  tensor([4.])


In [49]:
torch.cuda.is_available()

True

In [51]:
torch.cuda.current_device()

0

In [50]:
torch.cuda.get_device_name(0)

'GeForce GTX 1060 with Max-Q Design'

In [56]:
# Returns the current GPU memory usage by 
# tensors in bytes for a given device
torch.cuda.memory_allocated()


631242752

In [57]:
# Returns the current GPU memory managed by the
# caching allocator in bytes for a given device
torch.cuda.memory_cached()

631242752

In [59]:
next(attention_model.parameters()).is_cuda

True