In [8]:
%load_ext autoreload
%autoreload 2

import datasets
import torch.nn as nn
import data_process as dp
import torchtext
import torch
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
moves_df = datasets.load_sql_to_df("SELECT * FROM english_annotated_moves", "./chess.db")
moves_df.head(5)

Unnamed: 0,index,position,move,comment,halfmove_number,game_id,sentiment,auto_sentiment
0,0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,e2e4,This is my first gameknot game against someone...,1,44,2,0
1,1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,b1c3,"I've been playing the Vienna Gambit as white, ...",3,44,-1,0
2,2,rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...,f8c5,Minor disappointment.,4,44,-1,0
3,3,r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP4/PPP2PP...,f2f4,"My idea here is to expand on the kingside, dri...",9,44,-1,0
4,4,r1bqk2r/ppp2ppp/3p1n2/n1b1pP2/2B1P3/2NP4/PPP3P...,d1f3,"Maybe this isn't the greatest plan, since with...",13,44,0,0


In [3]:
glove_embbedings = torchtext.vocab.GloVe(name="6B", dim=50)
datasets.add_padding_vector_to_embeddings(glove_embbedings)

In [4]:
moves_df = dp.prepare_data_for_sentiment_analysis_training(moves_df, glove_embbedings.stoi)
moves_df

Unnamed: 0,comment,sentiment
0,"[maybe, this, be, not, the, great, plan, since...",0
1,"[?, too, slow, maybe, .]",0
2,"[?, this, allow, a, combination, by, white, .,...",0
3,"[!, !, brilliant, move, it, give, black, the, ...",1
4,"[!, !, this, move, be, really, fantastic, ., m...",1
...,...,...
19864,"[in, hindsight, be, need, to, get, my, king, t...",0
19865,"[keep, black, alive]",0
19866,"[time, remain, minute, .]",0
19867,"[black, be, win, .]",1


In [5]:
moves_df[:20]

Unnamed: 0,comment,sentiment
0,"[maybe, this, be, not, the, great, plan, since...",0
1,"[?, too, slow, maybe, .]",0
2,"[?, this, allow, a, combination, by, white, .,...",0
3,"[!, !, brilliant, move, it, give, black, the, ...",1
4,"[!, !, this, move, be, really, fantastic, ., m...",1
5,"[?, ?, what, !, do, black, do, not, see, the, ...",0
6,"[!, wow, white, do, not, care, his, pawn, do, ...",1
7,"[?, last, mistake, may, be, be, well, make, wh...",0
8,"[!, stop, king]",1
9,"[?, why, not, simply, recapture, ?]",0


In [9]:
dataset = datasets.PretrainedEmbeddingsIndicesDataset(moves_df[:20], glove_embbedings, random_state=1)

In [10]:
for x,y in dataset:
    print(x, y)

torch.Size([3]) torch.Size([])
torch.Size([4]) torch.Size([])
torch.Size([5]) torch.Size([])
torch.Size([6]) torch.Size([])
torch.Size([11]) torch.Size([])
torch.Size([11]) torch.Size([])
torch.Size([12]) torch.Size([])
torch.Size([12]) torch.Size([])
torch.Size([13]) torch.Size([])
torch.Size([14]) torch.Size([])
torch.Size([14]) torch.Size([])
torch.Size([15]) torch.Size([])
torch.Size([16]) torch.Size([])
torch.Size([18]) torch.Size([])
torch.Size([22]) torch.Size([])
torch.Size([35]) torch.Size([])
torch.Size([39]) torch.Size([])
torch.Size([53]) torch.Size([])
torch.Size([54]) torch.Size([])
torch.Size([54]) torch.Size([])


In [9]:
def create_embedding_layer(weights_matrix, non_trainable=False):
    num_embeddings, embedding_dim = weights_matrix.size()
    emb_layer = nn.Embedding(num_embeddings, embedding_dim)
    emb_layer.load_state_dict({'weight': weights_matrix})
    if non_trainable:
        emb_layer.weight.requires_grad = False

    return emb_layer


In [12]:
class SentimentAnalysisLSTM(nn.Module):
    def __init__(self, embeddings: torchtext.vocab.Vectors, hidden_dim, num_layers = 2, dropout = 0.1):
        super().__init__()
        
        self.embedding = create_embedding_layer(embeddings.vectors, non_trainable=True)

        self.lstm = nn.LSTM(embeddings.dim, hidden_dim, num_layers=num_layers, dropout=dropout, batch_first=True)

        self.fc = nn.Linear(hidden_dim, 1)
        
        
    def forward(self, x):
        # print(f"x.shape: {x.shape}")

        embedded = self.embedding(x)

        # print(f"embedded.shape: {embedded.shape}")

        lstm_output, _ = self.lstm(embedded)

        # print(f"lstm_output.shape: {lstm_output.shape}")
        
        last_lstm_output = lstm_output[:, -1, :]

        # print(f"last_lstm_output.shape: {last_lstm_output.shape}")

        output = self.fc(last_lstm_output)

        # print(f"output.shape: {output.shape}")

        return output


In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [14]:
model = SentimentAnalysisLSTM(glove_embbedings, 128)
model.to(device)
model

SentimentAnalysisLSTM(
  (embedding): Embedding(400001, 50)
  (lstm): LSTM(50, 128, num_layers=2, batch_first=True, dropout=0.1)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)

In [20]:
train_df, val_df = train_test_split(moves_df, test_size=0.05)
display(train_df)
display(val_df)

Unnamed: 0,comment,sentiment
12274,"[well, would, have, be, now, white, have, a, w...",0
14583,"[seem, to, allow, a, glimmer, of, hope]",0
17695,"[yes, and, now, black, play, what, note, a, mo...",1
10366,"[and, now, be, start, to, worry, ..., surely, ...",1
3204,"[!, good, move, ., foolishly, dismiss, the, po...",1
...,...,...
19553,"[have, say, that, decide, to, go, after, his, ...",0
8039,"[?, ?, black, prefer, to, keep, on, harass, my...",0
15332,"[threaten, a, lot, of, thing, include, follow,...",1
3298,"[?, white, can, play, and, black, have, no, ti...",0


Unnamed: 0,comment,sentiment
893,"[!, a, good, move, which, be, which, will, cos...",1
15496,"[now, it, become, clear, just, how, much, cont...",1
18259,"[well, ., although, white, win, a, pawn, witho...",1
9774,"[!, !, knight, fork, and, sacrifice, and, prot...",1
18523,"[lose, the, track, at, once, would, have, win,...",0
...,...,...
2129,"[!, !, get, meh, da, queenie]",1
5743,"[!, white, seizes, control, of, the, square, a...",1
7499,"[?, very, bad, mistake, .]",0
12303,"[lose, a, minor, piece]",0


In [2]:
a = [("12344414", 0), ("abc", 1), ("12345", 0)]
a.sort(key=lambda x: len(x[0]))
a

[('abc', 1), ('12345', 0), ('12344414', 0)]