In [19]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from common.training import *
from common.utils import load_sql_to_df, save_to_sql, plot_history

import datasets
import data_process as dp
from models import SentimentAnalysisLSTM, SentimentAnalysisGRU, SentimentAnalysisRNN

import numpy as np
import pandas as pd
import sqlite3 as db
import torchtext
import torch
import torch.utils
import matplotlib.pyplot as plt
from langdetect import detect_langs, detect
from tqdm import tqdm_notebook

In [20]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

In [21]:
#reproducibility
# torch.manual_seed(0)
# random.seed(0)
# np.random.seed(0)

## Load data

In [22]:
important_columns = ["position", "move", "comment", "sentiment"]
gameknot_moves_df = load_sql_to_df("SELECT * FROM english_annotated_moves", "../../chess.db")[important_columns]
gameknot_moves_df

Unnamed: 0,position,move,comment,sentiment
0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,e2e4,This is my first gameknot game against someone...,2
1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,b1c3,"I've been playing the Vienna Gambit as white, ...",-1
2,rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...,f8c5,Minor disappointment.,-1
3,r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP4/PPP2PP...,f2f4,"My idea here is to expand on the kingside, dri...",-1
4,r1bqk2r/ppp2ppp/3p1n2/n1b1pP2/2B1P3/2NP4/PPP3P...,d1f3,"Maybe this isn't the greatest plan, since with...",0
...,...,...,...,...
326634,rnb5/p2p1P1N/7p/1pR5/6P1/4k3/2P1B3/3Q1RK1 b - ...,b5b4,Pawn,-1
326635,rnb5/p2p1P1N/7p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 w -...,f7f8q,Check mate in two moves,-1
326636,rnb2Q2/p2p3N/7p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 b -...,d7d6,Pawn,-1
326637,rnb2Q2/p6N/3p3p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 w -...,f8e8,Check,-1


In [23]:
chessbase_moves_df = load_sql_to_df("SELECT * FROM chessbase_moves_with_comments_2", "../../chess.db")
chessbase_moves_df = chessbase_moves_df[chessbase_moves_df.is_english == 1][important_columns]
chessbase_moves_df

Unnamed: 0,position,move,comment,sentiment
9,r1bq1k1r/ppppnBpp/8/6B1/3P4/1Q3N2/P4PPP/b4RK1 ...,f3e5,\n[%csl Gf7],-1
10,r1bq1k1r/ppppnBpp/8/4N1B1/3P4/1Q6/P4PPP/b4RK1 ...,a1d4,"Black tries to capture as much as possible, an...",-1
11,r1bq1k1r/ppppnBpp/8/4N1B1/3b4/1Q6/P4PPP/5RK1 w...,f7g6,[%csl Gf7] aha! If the king has not yet castle...,-1
13,r1bq1k1r/ppp1n1pp/6B1/3pN1B1/3b4/1Q6/P4PPP/5RK...,b3f3,"No mate, but..",-1
14,r2qk2r/ppp1n1pp/4BB2/3p4/8/5Q2/P4PPP/5RK1 w - ...,f6g7,"Powerful bishops, discovered by Greco. This\ns...",-1
...,...,...,...,...
1633567,rn1q1rk1/1b3ppp/1p1ppn2/p1p5/1PP5/P1Q1PN2/1B1P...,b4b5,It is probably a good idea for White to comple...,-1
1633568,r4rk1/1b1nqppp/1p1p1n2/pPp1p3/2P5/P1QPPN2/1B2B...,h1g1,White does not absolutely have to play with g4...,-1
1633571,rn1qk2r/1bpp1ppp/1p2pn2/p7/1PP5/P1Q5/1B1PPPPP/...,f2f3,"A somewhat unusual move, but specifically in t...",-1
1633580,rnbq1rk1/pp1pnppp/4p3/2pP4/2P1P3/2P2N2/P4PPP/R...,d7d6,leads to a complicated position with chances f...,-1


In [24]:
moves_df = pd.concat((gameknot_moves_df, chessbase_moves_df), axis=0)
# moves_df = gameknot_moves_df
moves_df

Unnamed: 0,position,move,comment,sentiment
0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,e2e4,This is my first gameknot game against someone...,2
1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,b1c3,"I've been playing the Vienna Gambit as white, ...",-1
2,rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...,f8c5,Minor disappointment.,-1
3,r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP4/PPP2PP...,f2f4,"My idea here is to expand on the kingside, dri...",-1
4,r1bqk2r/ppp2ppp/3p1n2/n1b1pP2/2B1P3/2NP4/PPP3P...,d1f3,"Maybe this isn't the greatest plan, since with...",0
...,...,...,...,...
1633567,rn1q1rk1/1b3ppp/1p1ppn2/p1p5/1PP5/P1Q1PN2/1B1P...,b4b5,It is probably a good idea for White to comple...,-1
1633568,r4rk1/1b1nqppp/1p1p1n2/pPp1p3/2P5/P1QPPN2/1B2B...,h1g1,White does not absolutely have to play with g4...,-1
1633571,rn1qk2r/1bpp1ppp/1p2pn2/p7/1PP5/P1Q5/1B1PPPPP/...,f2f3,"A somewhat unusual move, but specifically in t...",-1
1633580,rnbq1rk1/pp1pnppp/4p3/2pP4/2P1P3/2P2N2/P4PPP/R...,d7d6,leads to a complicated position with chances f...,-1


## Preprocessing

In [25]:
glove_50 = torchtext.vocab.GloVe(name="6B", dim=50, cache="../../.vector_cache/")
glove_100 = torchtext.vocab.GloVe(name="6B", dim=100, cache="../../.vector_cache/")
glove_200 = torchtext.vocab.GloVe(name="6B", dim=200, cache="../../.vector_cache/")
glove_300 = torchtext.vocab.GloVe(name="6B", dim=300, cache="../../.vector_cache/")

datasets.add_padding_vector_to_embeddings(glove_50)
datasets.add_padding_vector_to_embeddings(glove_100)
datasets.add_padding_vector_to_embeddings(glove_200)
datasets.add_padding_vector_to_embeddings(glove_300)

In [26]:
glove_embbedings = glove_50

In [27]:
comments_df = dp.prepare_data_for_sentiment_analysis_training(moves_df, glove_embbedings.stoi, max_len=120)
comments_df

Unnamed: 0,comment,sentiment,preprocessed_comment
87141,i missed this,1,"[miss, this]"
84976,[ csl,1,"[[, csl]"
49619,a.david kallai france,1,"[kallai, france]"
12339,[ csl,1,"[[, csl]"
22178,[ cal,1,"[[, cal]"
...,...,...,...
127555,not surprisingly sue attacks the n. but it doe...,0,"[not, surprisingly, sue, attack, the, n., but,..."
123246,in this position he is attacking and threateni...,0,"[in, this, position, he, be, attack, and, thre..."
4294,?? as mentioned before i was getting low on ti...,0,"[?, ?, as, mention, before, be, get, low, on, ..."
94968,the key idea is to keep the rook on the same r...,1,"[the, key, idea, be, to, keep, the, rook, on, ..."


In [28]:
train_df, test_df = dp.df_train_test_split(comments_df, comment_col='preprocessed_comment', test_size=0.05)
display(train_df)
display(test_df)

Unnamed: 0,comment,sentiment,preprocessed_comment
19263,[ csl,1,"[[, csl]"
93159,with compensation,1,"[with, compensation]"
19837,the end,1,"[the, end]"
71780,[ csl,1,"[[, csl]"
75799,for instance,1,"[for, instance]"
...,...,...,...
128236,i don't know why erika played this but best wa...,0,"[do, not, know, why, erika, play, this, but, g..."
94968,the key idea is to keep the rook on the same r...,1,"[the, key, idea, be, to, keep, the, rook, on, ..."
8680,? this is where i start to go wrong. come to t...,0,"[?, this, be, where, start, to, go, wrong, ., ..."
124285,i should have thought better then this. i am c...,0,"[should, have, think, well, then, this, ., be,..."


Unnamed: 0,comment,sentiment,preprocessed_comment
112400,the only,1,"[the, only]"
18152,ulybin iskusnyh russia ch,1,"[russia, ch]"
29184,[ csl,1,"[[, csl]"
25930,[ csl,1,"[[, csl]"
79308,for instance,1,"[for, instance]"
...,...,...,...
118806,adding tension the black position can't handle...,0,"[add, tension, the, black, position, can, not,..."
7486,? this variation is much better for black i th...,0,"[?, this, variation, be, much, well, for, blac..."
8757,? instead of continuing to develop ted advance...,0,"[?, instead, of, continue, to, develop, te, ad..."
7226,! right in the nick of time. i realise that i ...,1,"[!, right, in, the, nick, of, time, ., realise..."


In [29]:
train_dataset = datasets.PretrainedEmbeddingsIndicesDataset(train_df, glove_embbedings, comment_col='preprocessed_comment')
test_dataset = datasets.PretrainedEmbeddingsIndicesDataset(test_df, glove_embbedings,  comment_col='preprocessed_comment')

## Comments' length

In [30]:
lengths = [len(x) for x,_ in train_dataset]
plt.hist(lengths, bins=30)
plt.show()

  plt.show()


In [43]:
lengths = [len(x) for x,_ in test_dataset]
plt.hist(lengths, bins=30)
plt.show()

  plt.show()


## Training

In [44]:
batch_size = 512

train_loader = datasets.DataLoaderPadding(dataset=train_dataset, batch_size=batch_size, shuffle = True)
test_loader = datasets.DataLoaderPadding(dataset=test_dataset, batch_size=batch_size)

In [45]:
model = SentimentAnalysisLSTM(embeddings=glove_50, hidden_dim=128, num_layers=2, bidirectional=True, dropout=0.3)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model

SentimentAnalysisLSTM(
  (embedding): Embedding(400001, 50)
  (lstm): LSTM(50, 128, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [46]:
trainer = Trainer(model=model, train_dataLoader=train_loader, val_dataLoader=test_loader, optimizer=optimizer)

In [47]:
trainer.train(60, verbose=True)

Epoch 1/60
Train loss: 0.449957, accuracy: 79.01%
Val loss:   0.386670, accuracy: 82.81%
-----------------------------
Epoch 2/60
Train loss: 0.360188, accuracy: 84.02%
Val loss:   0.347701, accuracy: 85.09%
-----------------------------
Epoch 3/60
Train loss: 0.332136, accuracy: 85.67%
Val loss:   0.327435, accuracy: 86.46%
-----------------------------
Epoch 4/60
Train loss: 0.311705, accuracy: 86.74%
Val loss:   0.313871, accuracy: 87.10%
-----------------------------
Epoch 5/60
Train loss: 0.295748, accuracy: 87.47%
Val loss:   0.310094, accuracy: 86.92%
-----------------------------
Epoch 6/60
Train loss: 0.281292, accuracy: 88.21%
Val loss:   0.294272, accuracy: 87.81%
-----------------------------
Epoch 7/60
Train loss: 0.271621, accuracy: 88.62%
Val loss:   0.287481, accuracy: 88.18%
-----------------------------
Epoch 8/60
Train loss: 0.259307, accuracy: 89.19%
Val loss:   0.280887, accuracy: 88.35%
-----------------------------
Epoch 9/60
Train loss: 0.249615, accuracy: 89.76

In [49]:
trainer.plot_history()

In [50]:
best_model = trainer.best_model()


Loading best params on validation set (epoch 15, accuracy: 88.99%)



## Testing

In [51]:
test_model(test_loader, best_model)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)

In [39]:
test_model(train_loader, model_2)

Test Error: Accuracy: 93.46%, Avg loss: 0.163211


In [40]:
boundary = 0.1

In [42]:
test_high_confidence(test_loader, model_3, low_boundary=boundary)

High confidence samples: 1356/1931 = 70.22%
Accuracy for high confidence samples: 97.1%


In [43]:
all_moves_df = load_sql_to_df("SELECT * FROM english_annotated_moves", "../../chess.db",)[important_columns]
all_moves_df

Unnamed: 0,position,move,comment,sentiment
0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,e2e4,This is my first gameknot game against someone...,2
1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,b1c3,"I've been playing the Vienna Gambit as white, ...",-1
2,rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...,f8c5,Minor disappointment.,-1
3,r1bqk2r/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP4/PPP2PP...,f2f4,"My idea here is to expand on the kingside, dri...",-1
4,r1bqk2r/ppp2ppp/3p1n2/n1b1pP2/2B1P3/2NP4/PPP3P...,d1f3,"Maybe this isn't the greatest plan, since with...",0
...,...,...,...,...
326634,rnb5/p2p1P1N/7p/1pR5/6P1/4k3/2P1B3/3Q1RK1 b - ...,b5b4,Pawn,-1
326635,rnb5/p2p1P1N/7p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 w -...,f7f8q,Check mate in two moves,-1
326636,rnb2Q2/p2p3N/7p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 b -...,d7d6,Pawn,-1
326637,rnb2Q2/p6N/3p3p/2R5/1p4P1/4k3/2P1B3/3Q1RK1 w -...,f8e8,Check,-1


In [45]:
moves_df_with_preprocessed_comments = dp.prepare_data_for_sentiment_analysis_prediction(all_moves_df, glove_embbedings.stoi, max_len=150)
moves_df_with_preprocessed_comments

Unnamed: 0,position,move,comment,sentiment,preprocessed_comment
63267,2kr1b1r/ppp2ppp/2n5/4pb2/2P5/P1NP1PqP/1P4P1/R1...,e1d2,Forced.,-1,"[force, .]"
4415,r1b3k1/5p1r/p7/q1n1Q2p/2p1P1p1/6P1/PP2N1BP/R4R...,c8b7,Prevents Qe8,-1,"[prevent, qe8]"
302202,2k5/p1p2pp1/2p1p1r1/7p/4P3/P4P2/1PPr3K/R4R2 w ...,h2h3,a role,-1,"[a, role]"
221065,r1b2r2/p2n3k/1p4pp/2pp4/3P2Q1/P3P3/5PPP/2R3KR ...,d4c5,trading down,-1,"[trade, down]"
165029,r2q4/2p2k2/P1p2pp1/2P5/3p3R/6Q1/P4PPK/8 b - - ...,f7g7,prevents it,-1,"[prevent, it]"
...,...,...,...,...,...
237207,rnbqkb1r/pp2pppp/5n2/3p4/3P4/5N2/PPP2PPP/RNBQK...,f1b5,"So, I said, and, that's a good thing because t...",-1,"[so, ,, say, ,, and, ,, that, be, a, good, thi..."
294455,r2qk2r/5pp1/p3pn1p/1p1pBb2/2pP4/bPP1P2P/P2NBPP...,b3c4,"good move , white cant avoid blacks threat Bb2...",-1,"[good, move, ,, white, can, not, avoid, black,..."
277372,rnbqkbnr/ppp2ppp/3p4/4p3/3PP3/5N2/PPP2PPP/RNBQ...,f7f6,"But, she, however, does not. The most common m...",-1,"[but, ,, she, ,, however, ,, do, not, ., the, ..."
264663,2k4r/p2p3p/6pR/6p1/P3Pr2/2P2P2/B1P3PP/5K2 b - ...,f4h4,"This is why my remaining R, as noted earlier, ...",-1,"[this, be, why, my, remain, r, ,, as, note, ea..."


In [46]:
moves_df_with_preprocessed_comments = moves_df_with_preprocessed_comments.reset_index()
moves_df_with_preprocessed_comments

Unnamed: 0,index,position,move,comment,sentiment,preprocessed_comment
0,63267,2kr1b1r/ppp2ppp/2n5/4pb2/2P5/P1NP1PqP/1P4P1/R1...,e1d2,Forced.,-1,"[force, .]"
1,4415,r1b3k1/5p1r/p7/q1n1Q2p/2p1P1p1/6P1/PP2N1BP/R4R...,c8b7,Prevents Qe8,-1,"[prevent, qe8]"
2,302202,2k5/p1p2pp1/2p1p1r1/7p/4P3/P4P2/1PPr3K/R4R2 w ...,h2h3,a role,-1,"[a, role]"
3,221065,r1b2r2/p2n3k/1p4pp/2pp4/3P2Q1/P3P3/5PPP/2R3KR ...,d4c5,trading down,-1,"[trade, down]"
4,165029,r2q4/2p2k2/P1p2pp1/2P5/3p3R/6Q1/P4PPK/8 b - - ...,f7g7,prevents it,-1,"[prevent, it]"
...,...,...,...,...,...,...
315743,237207,rnbqkb1r/pp2pppp/5n2/3p4/3P4/5N2/PPP2PPP/RNBQK...,f1b5,"So, I said, and, that's a good thing because t...",-1,"[so, ,, say, ,, and, ,, that, be, a, good, thi..."
315744,294455,r2qk2r/5pp1/p3pn1p/1p1pBb2/2pP4/bPP1P2P/P2NBPP...,b3c4,"good move , white cant avoid blacks threat Bb2...",-1,"[good, move, ,, white, can, not, avoid, black,..."
315745,277372,rnbqkbnr/ppp2ppp/3p4/4p3/3PP3/5N2/PPP2PPP/RNBQ...,f7f6,"But, she, however, does not. The most common m...",-1,"[but, ,, she, ,, however, ,, do, not, ., the, ..."
315746,264663,2k4r/p2p3p/6pR/6p1/P3Pr2/2P2P2/B1P3PP/5K2 b - ...,f4h4,"This is why my remaining R, as noted earlier, ...",-1,"[this, be, why, my, remain, r, ,, as, note, ea..."


In [52]:
unlabeled_dataset = datasets.PretrainedEmbeddingsIndicesDataset(moves_df_with_preprocessed_comments, glove_embbedings, comment_col="preprocessed_comment")
unlabeled_loader = datasets.DataLoaderPadding(dataset=unlabeled_dataset, batch_size = 128)
predictions = predict(unlabeled_loader, model_2, low_boundary=0.4)
moves_df_with_preprocessed_comments.loc[:, "sentiment"] = predictions.numpy().astype(np.int32)
moves_df_with_preprocessed_comments

Unnamed: 0,index,position,move,comment,sentiment,preprocessed_comment
0,63267,2kr1b1r/ppp2ppp/2n5/4pb2/2P5/P1NP1PqP/1P4P1/R1...,e1d2,Forced.,1,"[force, .]"
1,4415,r1b3k1/5p1r/p7/q1n1Q2p/2p1P1p1/6P1/PP2N1BP/R4R...,c8b7,Prevents Qe8,1,"[prevent, qe8]"
2,302202,2k5/p1p2pp1/2p1p1r1/7p/4P3/P4P2/1PPr3K/R4R2 w ...,h2h3,a role,-1,"[a, role]"
3,221065,r1b2r2/p2n3k/1p4pp/2pp4/3P2Q1/P3P3/5PPP/2R3KR ...,d4c5,trading down,1,"[trade, down]"
4,165029,r2q4/2p2k2/P1p2pp1/2P5/3p3R/6Q1/P4PPK/8 b - - ...,f7g7,prevents it,1,"[prevent, it]"
...,...,...,...,...,...,...
315743,237207,rnbqkb1r/pp2pppp/5n2/3p4/3P4/5N2/PPP2PPP/RNBQK...,f1b5,"So, I said, and, that's a good thing because t...",0,"[so, ,, say, ,, and, ,, that, be, a, good, thi..."
315744,294455,r2qk2r/5pp1/p3pn1p/1p1pBb2/2pP4/bPP1P2P/P2NBPP...,b3c4,"good move , white cant avoid blacks threat Bb2...",1,"[good, move, ,, white, can, not, avoid, black,..."
315745,277372,rnbqkbnr/ppp2ppp/3p4/4p3/3PP3/5N2/PPP2PPP/RNBQ...,f7f6,"But, she, however, does not. The most common m...",0,"[but, ,, she, ,, however, ,, do, not, ., the, ..."
315746,264663,2k4r/p2p3p/6pR/6p1/P3Pr2/2P2P2/B1P3PP/5K2 b - ...,f4h4,"This is why my remaining R, as noted earlier, ...",0,"[this, be, why, my, remain, r, ,, as, note, ea..."


In [53]:
classified_comments = moves_df_with_preprocessed_comments[moves_df_with_preprocessed_comments.sentiment.isin([0,1])].reset_index()
classified_comments

Unnamed: 0,level_0,index,position,move,comment,sentiment,preprocessed_comment
0,0,63267,2kr1b1r/ppp2ppp/2n5/4pb2/2P5/P1NP1PqP/1P4P1/R1...,e1d2,Forced.,1,"[force, .]"
1,1,4415,r1b3k1/5p1r/p7/q1n1Q2p/2p1P1p1/6P1/PP2N1BP/R4R...,c8b7,Prevents Qe8,1,"[prevent, qe8]"
2,3,221065,r1b2r2/p2n3k/1p4pp/2pp4/3P2Q1/P3P3/5PPP/2R3KR ...,d4c5,trading down,1,"[trade, down]"
3,4,165029,r2q4/2p2k2/P1p2pp1/2P5/3p3R/6Q1/P4PPK/8 b - - ...,f7g7,prevents it,1,"[prevent, it]"
4,5,19167,r1bqkbnr/p1p2ppp/3p4/2p1p3/2P1P3/5N2/PP1P1PPP/...,d2d3,facing off,1,"[face, off]"
...,...,...,...,...,...,...,...
276886,315743,237207,rnbqkb1r/pp2pppp/5n2/3p4/3P4/5N2/PPP2PPP/RNBQK...,f1b5,"So, I said, and, that's a good thing because t...",0,"[so, ,, say, ,, and, ,, that, be, a, good, thi..."
276887,315744,294455,r2qk2r/5pp1/p3pn1p/1p1pBb2/2pP4/bPP1P2P/P2NBPP...,b3c4,"good move , white cant avoid blacks threat Bb2...",1,"[good, move, ,, white, can, not, avoid, black,..."
276888,315745,277372,rnbqkbnr/ppp2ppp/3p4/4p3/3PP3/5N2/PPP2PPP/RNBQ...,f7f6,"But, she, however, does not. The most common m...",0,"[but, ,, she, ,, however, ,, do, not, ., the, ..."
276889,315746,264663,2k4r/p2p3p/6pR/6p1/P3Pr2/2P2P2/B1P3PP/5K2 b - ...,f4h4,"This is why my remaining R, as noted earlier, ...",0,"[this, be, why, my, remain, r, ,, as, note, ea..."


In [54]:
con = db.connect("../../chess.db")
classified_comments.sentiment =  pd.to_numeric(classified_comments.sentiment)
classified_comments_to_save = classified_comments[['position', 'move', 'comment', 'sentiment']]
classified_comments_to_save.to_sql('classified_moves', con, if_exists='replace')

276891

In [58]:
angelfire_moves = load_sql_to_df("SELECT * FROM angelfire_moves", "../../chess.db")[['position', 'move', 'comment']]
angelfire_moves['sentiment'] = -1
angelfire_moves

Unnamed: 0,position,move,comment,sentiment
0,r1bq1rk1/2p1bppp/p1n2n2/1p1pp3/4P3/1BP2N2/PP1P...,d2d3,"An excellent reply, avoiding the complications...",-1
1,r3r1k1/1b2bppp/p1n2n2/1ppqp1B1/8/2PP1N2/PPB1QP...,g5h4,threatening to win the e-pawn with Bg3 as well...,-1
2,r3r1k1/1b2bppp/p1n2n2/1ppqp3/7B/2PP1N2/PPB1QPP...,a8d8,Apparently Black still stands very well. His p...,-1
3,3rr1k1/1b2bppp/p1n2n2/1ppqp3/7B/2PP1N2/PPB1QPP...,c2b3,This deep moves demonstrates otherwise,-1
4,4r1k1/1b2bppp/p4n2/1pp1R3/7B/1BPr4/PP3PPP/R4NK...,c5c4,"The Black pieces are\nawkardly ties up, while ...",-1
...,...,...,...,...
27500,2Q2b1k/1p3q2/p6p/5n2/3p1r2/5NR1/PP3K2/7R w - -...,g3h3,"White defends very\nactively, with his major p...",-1
27501,2Q2b1k/1p3q2/p6p/5n2/3p1r2/5N1R/PP3K2/7R b - -...,d4d3,"A decoy, to tempt the White Queen\naway from t...",-1
27502,5b1k/1p6/p6p/5n2/5r2/3Q1N1R/P3K3/q6R b - - 3 43,a1a2,And this brings about a very unusual middlegam...,-1
27503,5b1k/1p6/p6p/5n2/5r2/3Q3R/q2NK3/7R b - - 1 44,f4d4,Consistent chess. Black hammers away at the\nw...,-1


In [59]:
angelfire_moves_preprocessed = dp.prepare_data_for_sentiment_analysis_prediction(angelfire_moves, glove_embbedings.stoi, max_len=150)
angelfire_moves_preprocessed

Unnamed: 0,position,move,comment,sentiment,preprocessed_comment
25957,3qkb1r/3b3p/prnp2p1/3Npp2/2N1n3/R1P5/1P3PPP/3Q...,c4b6,according to Fritz7,-1,"[accord, to]"
11058,2rr2k1/pp1q1pb1/5pp1/3P1b1p/2nP4/1QN2BNP/PP3PP...,c4d6,Blockades.,-1,"[blockade, .]"
4863,1r1n3R/ppk1qpQ1/2bppNp1/2p5/2P2P2/2PP2P1/P2KP1...,c7b6,22...Bxg2 23.Re8,-1,"[22, ...]"
13104,rnbqkb1r/pp2pppp/3p1n2/8/3NP3/2N5/PPP2PPP/R1BQ...,a7a6,Covers b5,-1,"[cover, b5]"
1973,2r1k2r/1bqp1ppp/1p2pn2/p7/2P5/P1Q1PNP1/1P3PBP/...,d7d5,well timed,-1,"[well, time]"
...,...,...,...,...,...
25658,4r3/6kp/3p2p1/2q1p1b1/8/5BP1/Q4PKP/1R6 b - - 8 36,e5e4,At the end Polgar could force an entry to Blac...,-1,"[at, the, end, polgar, could, force, an, entry..."
22580,rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBN...,g1f3,"This could be just a move-order trick, or a si...",-1,"[this, could, be, just, a, move, -, order, tri..."
9418,r1bq1rk1/ppp3pp/2np1n2/5p2/2PPpN2/2P1P3/P4PPP/...,h2h4,Kramnik mentioned that all the moves up to her...,-1,"[kramnik, mention, that, all, the, move, up, t..."
1723,3r1qk1/1p1n2p1/r1p2p1p/p2Pp3/P1P1P3/3Q1P2/5BPP...,c6c5,ACT III) T\nhe move finishes the first stage ...,-1,"[act, iii, ), t, he, move, finish, the, first,..."


In [61]:
unlabeled_dataset = datasets.PretrainedEmbeddingsIndicesDataset(angelfire_moves_preprocessed, glove_embbedings, comment_col="preprocessed_comment")
unlabeled_loader = datasets.DataLoaderPadding(dataset=unlabeled_dataset, batch_size = 128)
predictions = predict(unlabeled_loader, model_2, low_boundary=0.4)
angelfire_moves_preprocessed.loc[:, "sentiment"] = predictions.numpy().astype(np.int32)
angelfire_moves_preprocessed

Unnamed: 0,position,move,comment,sentiment,preprocessed_comment
25957,3qkb1r/3b3p/prnp2p1/3Npp2/2N1n3/R1P5/1P3PPP/3Q...,c4b6,according to Fritz7,1,"[accord, to]"
11058,2rr2k1/pp1q1pb1/5pp1/3P1b1p/2nP4/1QN2BNP/PP3PP...,c4d6,Blockades.,1,"[blockade, .]"
4863,1r1n3R/ppk1qpQ1/2bppNp1/2p5/2P2P2/2PP2P1/P2KP1...,c7b6,22...Bxg2 23.Re8,0,"[22, ...]"
13104,rnbqkb1r/pp2pppp/3p1n2/8/3NP3/2N5/PPP2PPP/R1BQ...,a7a6,Covers b5,1,"[cover, b5]"
1973,2r1k2r/1bqp1ppp/1p2pn2/p7/2P5/P1Q1PNP1/1P3PBP/...,d7d5,well timed,1,"[well, time]"
...,...,...,...,...,...
25658,4r3/6kp/3p2p1/2q1p1b1/8/5BP1/Q4PKP/1R6 b - - 8 36,e5e4,At the end Polgar could force an entry to Blac...,0,"[at, the, end, polgar, could, force, an, entry..."
22580,rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBN...,g1f3,"This could be just a move-order trick, or a si...",0,"[this, could, be, just, a, move, -, order, tri..."
9418,r1bq1rk1/ppp3pp/2np1n2/5p2/2PPpN2/2P1P3/P4PPP/...,h2h4,Kramnik mentioned that all the moves up to her...,0,"[kramnik, mention, that, all, the, move, up, t..."
1723,3r1qk1/1p1n2p1/r1p2p1p/p2Pp3/P1P1P3/3Q1P2/5BPP...,c6c5,ACT III) T\nhe move finishes the first stage ...,1,"[act, iii, ), t, he, move, finish, the, first,..."


In [64]:
angelfire_moves.dtypes

position                object
move                    object
comment                 object
sentiment                int64
preprocessed_comment    object
dtype: object

In [66]:
classified_comments_angelfire = angelfire_moves_preprocessed[angelfire_moves_preprocessed.sentiment.isin([0,1])].reset_index()
classified_comments_angelfire

Unnamed: 0,index,position,move,comment,sentiment,preprocessed_comment
0,25957,3qkb1r/3b3p/prnp2p1/3Npp2/2N1n3/R1P5/1P3PPP/3Q...,c4b6,according to Fritz7,1,"[accord, to]"
1,11058,2rr2k1/pp1q1pb1/5pp1/3P1b1p/2nP4/1QN2BNP/PP3PP...,c4d6,Blockades.,1,"[blockade, .]"
2,4863,1r1n3R/ppk1qpQ1/2bppNp1/2p5/2P2P2/2PP2P1/P2KP1...,c7b6,22...Bxg2 23.Re8,0,"[22, ...]"
3,13104,rnbqkb1r/pp2pppp/3p1n2/8/3NP3/2N5/PPP2PPP/R1BQ...,a7a6,Covers b5,1,"[cover, b5]"
4,1973,2r1k2r/1bqp1ppp/1p2pn2/p7/2P5/P1Q1PNP1/1P3PBP/...,d7d5,well timed,1,"[well, time]"
...,...,...,...,...,...,...
24450,25658,4r3/6kp/3p2p1/2q1p1b1/8/5BP1/Q4PKP/1R6 b - - 8 36,e5e4,At the end Polgar could force an entry to Blac...,0,"[at, the, end, polgar, could, force, an, entry..."
24451,22580,rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBN...,g1f3,"This could be just a move-order trick, or a si...",0,"[this, could, be, just, a, move, -, order, tri..."
24452,9418,r1bq1rk1/ppp3pp/2np1n2/5p2/2PPpN2/2P1P3/P4PPP/...,h2h4,Kramnik mentioned that all the moves up to her...,0,"[kramnik, mention, that, all, the, move, up, t..."
24453,1723,3r1qk1/1p1n2p1/r1p2p1p/p2Pp3/P1P1P3/3Q1P2/5BPP...,c6c5,ACT III) T\nhe move finishes the first stage ...,1,"[act, iii, ), t, he, move, finish, the, first,..."


In [67]:
con = db.connect("../../chess.db")
classified_comments.sentiment =  pd.to_numeric(classified_comments_angelfire.sentiment)
classified_comments_to_save = classified_comments_angelfire[['position', 'move', 'comment', 'sentiment']]
classified_comments_to_save.to_sql('classified_moves', con, if_exists='append')

24455