In [1]:
!pip install aitextgen
!pip install chess

Collecting aitextgen
  Downloading aitextgen-0.5.2.tar.gz (572 kB)
[K     |████████████████████████████████| 572 kB 291 kB/s eta 0:00:01
Collecting fire>=0.3.0
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 3.4 MB/s eta 0:00:011
Building wheels for collected packages: aitextgen, fire
  Building wheel for aitextgen (setup.py) ... [?25ldone
[?25h  Created wheel for aitextgen: filename=aitextgen-0.5.2-py3-none-any.whl size=575905 sha256=1fe3ddee3555049e02bca8c305ed67fba14764227ebf2d29030ebdb4691130f3
  Stored in directory: /root/.cache/pip/wheels/83/e2/74/46c887b0989a51a7acee0c09551a3ae9d34b939fb4bea404a0
  Building wheel for fire (setup.py) ... [?25ldone
[?25h  Created wheel for fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115943 sha256=068e812a99cdc2f0f5ee31e805f7320ce5ddefefbdd12f03d650873ce3733b1f
  Stored in directory: /root/.cache/pip/wheels/8a/67/fb/2e8a12fa16661b9d5af1f654bd199366799740a85c64981226
Successfully built aitextgen 

In [2]:
import os
import numpy as np
import pandas as pd

# GPT
from aitextgen import aitextgen

import datasets
import transformers
import pandas as pd
from datasets import Dataset

#Tokenizer
from transformers import RobertaTokenizerFast

#Encoder-Decoder Model
from transformers import EncoderDecoderModel

from transformers import Seq2SeqTrainer
from transformers import Seq2SeqTrainingArguments
from dataclasses import dataclass, field
from typing import Optional

from transformers import RobertaTokenizer, RobertaForCausalLM, RobertaConfig
import torch

# evaluation
from sklearn.metrics import mean_squared_error, accuracy_score

In [3]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

## Load Trained Models for Chess Task 2

### Helper functions

In [15]:
def is_float(element) -> bool:
    try:
        float(element)
        return True
    except ValueError:
        return False

def get_metrics(labels, predictions):
    float_pred, not_float_pred = [], []
    type_match = {"float":0, "string":0, "diff": 0}
    for i in range(len(predictions)):
        if is_float(predictions[i]) == is_float(labels[i]):
            if is_float(predictions[i]) and is_float(labels[i]):
                type_match["float"] += 1
                float_pred.append((float(labels[i]), float(predictions[i])))
            else:
                type_match["string"] += 1
                not_float_pred.append((labels[i], predictions[i]))
        else:
            #mismatch.append((labels[i], predictions[i]))
            type_match["diff"] += 1
    mse = mean_squared_error(list(zip(*float_pred))[0], list(zip(*float_pred))[1]) if len(float_pred)!=0 else -1 
    accuracy = accuracy_score(list(zip(*not_float_pred))[0], list(zip(*not_float_pred))[1]) if len(not_float_pred)!=0 else -1
    type_match["float"] /= len(labels)
    type_match["string"] /= len(labels)
    type_match["diff"] /= len(labels)
    return mse, accuracy, type_match
    
def get_pred_token_encdec(labels):
    new_label = np.zeros(len(labels), dtype=object)
    for i in range(len(labels)):
        new_label[i] = labels[i].split()[0][2:]
    return new_label

def get_pred_token_gpt(labels):
    new_label = np.zeros(len(labels), dtype=object)
    for i in range(len(labels)):
        new_label[i] = labels[i].split()[-1]
    return new_label

def pred_enc_dec(model, tokenizer, data):
    preds = np.zeros(len(data), dtype=object)
    for i,d in enumerate(data):
        input_ids = tokenizer.encode(d, return_tensors='pt')
        sample_outputs = model.generate(
            input_ids,
            do_sample=True, 
            max_length=5, 
            top_k=100, 
            top_p=0.7, 
            num_return_sequences=3
        )
        output = sample_outputs[0]
        output = tokenizer.decode(output, skip_special_tokens=True)
        preds[i] = output
    return preds
        
def pred_gpt(model, data):
    preds = np.zeros(len(data), dtype=object)
    for i, d in enumerate(data):
        preds[i] = model.generate_one(max_length = 512, prompt = d, top_k = 100, top_p = 0.9, temperature = 0.9)
    return preds

def save_preds(prompts, preds, filename="out.csv"):
    out = np.zeros((len(preds), 2), dtype=object)
    out[:, 0] = prompts
    out[:, 1] = preds
    pd.DataFrame(data=out, columns=["prompt", "prediction"]).to_csv(filename, index=False)

def save_metrics(filename, mse, accuracy, type_match):
    with open(filename, "w") as f:
        f.write("We have three cases. The first case the generated evaluation has the same type as the true evaluation\n")
        f.write("In the case they are both real values (e.g. 1.0 and -2.0) we compute the MSE\n")
        f.write("In the case they are both strings (#1 or 0-1) we compute the accuracy (exact match)\n")
        f.write("The type mismatch is the fraction of times the types are the different for the generated evaluation and the true one\n")
        f.write("Note: -1 for MSE or accuracy means that it was not computed\n")
        f.write("-"*100)
        f.write("\n")
        f.write("-"*100)
        f.write("\n")
        f.write(f"Matched floats: {type_match['float']}\n")
        f.write(f"Mean Squared Error: {mse}\n\n")
        f.write(f"Matched string: {type_match['string']}\n")
        f.write(f"Accuracy: {accuracy}\n\n")
        f.write(f"Type mismatch: {type_match['diff']}")
        f.write(f"")

### Load data

In [6]:
#array = np.loadtxt("../input/chess-evaluation-dataset/enc_dec_chess_evaluation.csv", dtype=object, delimiter=",", max_rows=1_000)
df=pd.read_csv('../input/chess-evaluation-dataset/enc_dec_chess_evaluation.csv', usecols=['input','target'], low_memory=True)
print('Num Examples: ',len(df))
print('Null Values\n', df.isna().sum())

test_size = 1_000
test_dataset=df.sample(n=test_size,random_state = 5).reset_index(drop=True) # use a different random_state to hopefully get different sample
test_dataset = test_dataset.to_numpy()
test_dataset[0]

Num Examples:  12746073
Null Values
 input     0
target    0
dtype: int64


array(['chess eval: Nf3 d5 e4 dxe4 Ng5 Nf6 d3 exd3 Bxd3 h6 Nxf7 Kxf7 Bg6+ Kxg6 Qxd8 Bf5 Qxc7 Nbd7 O-O e6 Nc3 Bc5 Qg3+ Kf7 Be3 Bxe3',
       '7.0'], dtype=object)

In [17]:
tmp = 100
test_prompt, test_label = test_dataset[: , 0][:tmp], test_dataset[:, 1][:tmp]

### Load GPT model

In [10]:
ai = aitextgen(model_folder="../input/gpt-finetuned-model/chess_task2_gpt_finetune_model_v1/chess_task2_gpt_finetune_model_v1",
               tokenizer_file="../input/gpt-finetuned-model/chess_task2_gpt_finetune_model_v1/chess_task2_gpt_finetune_model_v1/aitextgen.tokenizer.json",
              )#to_gpu=True)

In [18]:
gpt_preds = pred_gpt(ai, test_prompt)
save_preds(test_prompt, gpt_preds, "gpt_preds.csv")

In [19]:
gpt_preds_tokens = get_pred_token_gpt(gpt_preds)

In [20]:
gpt_mse, gpt_accuracy, gpt_type_match = get_metrics(test_label, gpt_preds_tokens)

In [21]:
save_metrics("gpt_metrics.txt", gpt_mse, gpt_accuracy, gpt_type_match)

### Load Encoder Decoder model, generate, evaluate

In [23]:
tokenizer = RobertaTokenizer.from_pretrained('../input/chessevaluationmodel/FinetunedModel')
enc_dec_model = EncoderDecoderModel.from_pretrained('../input/chessevaluationmodel/FinetunedModel')

Didn't find file ../input/chessevaluationmodel/FinetunedModel/added_tokens.json. We won't load it.
loading file ../input/chessevaluationmodel/FinetunedModel/vocab.json
loading file ../input/chessevaluationmodel/FinetunedModel/merges.txt
loading file None
loading file ../input/chessevaluationmodel/FinetunedModel/special_tokens_map.json
loading file ../input/chessevaluationmodel/FinetunedModel/tokenizer_config.json
loading file ../input/chessevaluationmodel/FinetunedModel/tokenizer.json


In [54]:
enc_dec_preds = pred_enc_dec(enc_dec_model, tokenizer, test_prompt)
save_preds(test_prompt, enc_dec_preds, "enc_dec_preds.csv")

In [55]:
encdec_preds_tokens = get_pred_token_encdec(enc_dec_preds)

In [56]:
encdec_mse, encdec_accuracy, encdec_type_match = get_metrics(test_label, encdec_preds_tokens)

In [57]:
save_metrics("encdec_metrics.txt", encdec_mse, encdec_accuracy, encdec_type_match)

## Others