In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

#Standard GPT2: Tokenizer and model

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


model = GPT2LMHeadModel.from_pretrained("gpt2", trust_remote_code=True)
model.eval()


# Fine-tuned GPT2: Tokenizer1 and model1

tokenizer1 = GPT2Tokenizer.from_pretrained("Model90K\\model")
if tokenizer1.pad_token is None:
    tokenizer1.pad_token = tokenizer1.eos_token

model1 = GPT2LMHeadModel.from_pretrained("Model90K\\model", trust_remote_code=True)
model1.eval()

  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:

def generate_text(model,tokenizer,prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0.95):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True)
    prompt_length = len(inputs["input_ids"][0])
    
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],  
            max_length=prompt_length + max_new_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id  
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    new_text = generated_text[len(tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)):]
    
    return new_text.strip()

In [3]:
import chess
## if starting_fen is None, the board will be initialized to the starting position
def get_fen_from_uci_sequence(uci_sequence, starting_fen=None):

    
    if starting_fen:
        board = chess.Board(starting_fen)
    else:
        board = chess.Board()  
    
    moves = uci_sequence.strip().split()
    
    for move_str in moves:
        
        try:
            move = chess.Move.from_uci(move_str)
            
            if move in board.legal_moves:
                board.push(move)

            else:
                print(f"Moves {move_str} not legal!")
                print("Moves sequence was interrupted.")
                break
        except ValueError:
            print(f"Moves {move_str} not in UCI format!")
            print("Moves sequence was interrupted.")
            break
    
    # Stampa la scacchiera nello stato attuale
    print("\nState of the board:")
    print(board)
    
    # Ritorna la FEN finale
    final_fen = board.fen()
    print(f"\nFEN: {final_fen}")
    
    return final_fen, board

Selection of the Hystory of the Game from the start

In [15]:
movesHistory="c2c4 c7c5 b1a3 g8f6 d2d3 d7d6 a3b5 g7g6 c1h6 f8g7 h6g7 c8g4 b5d4"

FEN,board=get_fen_from_uci_sequence(movesHistory)


State of the board:
r n . q k . . r
p p . . p p B p
. . . p . n p .
. . p . . . . .
. . P N . . b .
. . . P . . . .
P P . . P P P P
R . . Q K B N R

FEN: rn1qk2r/pp2ppBp/3p1np1/2p5/2PN2b1/3P4/PP2PPPP/R2QKBNR b KQkq - 2 7


### ZeroShot

In [None]:
prompt1 = f"""You are a chess assistant. Given a sequence of UCI moves, suggest two reasonable next moves for the side to play.
Remeber the first moves of the sequence I gave is played by white, the second by black and so on.

Moves so far:
{movesHistory}

Your suggestions:"""

In [None]:
stdGPT2Response=generate_text(model,tokenizer,prompt1,50)
print(f"Standard GPT2 response:\n{stdGPT2Response}\n")

fineTuneGPT2Response=generate_text(model1,tokenizer1,prompt1,50)
print(f"Fine-tuned GPT2 response:\n{fineTuneGPT2Response}\n")

Standard GPT2 response:
Move #1:

4. d4 dxe4 5. Nf2

Move #2:

5. Bc4

Move #3:

6. b5 e6 7. B

Fine-tuned GPT2 response:
g4d1 d1c2 d6d5 c4d5 f6d5 g7e5 d8c7 e5d7 e7e6 d5e6 d7e8 e6f7 e8



The part of the prompt asking for 2 possibile moves got completely ignored, the finetuned model simply start trying playing for both the player.
The slice of the prompt: "Remeber the first moves of the sequence I gave is played by white, the second by black and so on" is crucial for the second model otherwise the response often start with a moves of the wrong player

The difference between the two model is clear stdGPT2 doesn't get what is a UCI format by itself and as a single move show some random char, on the other hand during the other model seem not to understand anymore the request of having to suggestion.

Sometimes the finetuned model start the response with a single char followed by a space and the a sequence of UCI moves, if the single CHAR is ignored the following move is usually legal

In [20]:
movesHistoryLong="d2d4 d7d5 c2c4 e7e6 b1c3 c7c5 c4d5 e6d5 g1f3 g8f6 c1g5 c5d4 f3d4 f8e7 e2e3 b8c6 f1e2 e8g8 e1g1 a7a6 a1c1 f8e8 e2f3 c8e6 c3e2 c6e5 e2f4 e5f3 d1f3 e6g4 f3g3 d8d7 g5f6 e7f6 h2h3 g4f5 d4f5 d7f5 c1c5 a8d8 f1d1 f6b2 c5d5 d8d5 f4d5 f5c2 d5e7 g8f8 d1d7 c2c5 e7d5 b2e5 f2f4 e5d6 g3g5 e8e3 d5e3 c5e3 g1f1 e3d3 f1f2 d3d4 f2f3 d4d1 f3f2 d1d2 f2f1 d2d1 f1f2 d1d2 f2f1 d2d3 f1f2 d3d2 f2f1 d2d3 f1f2 d3d2 f2f1 d2d3 f1f2 d3d4 f2f3 d4d1 f3f2"

FEN,board=get_fen_from_uci_sequence(movesHistoryLong)


State of the board:
. . . . . k . .
. p . R . p p p
p . . b . . . .
. . . . . . Q .
. . . . . P . .
. . . . . . . P
P . . . . K P .
. . . q . . . .

FEN: 5k2/1p1R1ppp/p2b4/6Q1/5P2/7P/P4KP1/3q4 b - - 27 43


In [18]:
promptL = f"""You are a chess assistant. Given a sequence of UCI moves, suggest two reasonable next moves for the side to play.
Remeber the first moves of the sequence I gave is played by white, the second by black and so on.

Moves so far:
{movesHistoryLong}

Your suggestions:"""

In [19]:
stdGPT2Response=generate_text(model,tokenizer,promptL,50)
print(f"Standard GPT2 response:\n{stdGPT2Response}\n")

fineTuneGPT2Response=generate_text(model1,tokenizer1,promptL,50)
print(f"Fine-tuned GPT2 response:\n{fineTuneGPT2Response}\n")

Standard GPT2 response:
This is the most reasonable move, given the sequence I gave.

The last move is played by white, the second by black and so on.

Remeber the first moves of the sequence I gave is played by white

Fine-tuned GPT2 response:
1 f2d3 f1 f1f2 d3d2 f2f3 d1f2 d3d4 f3d2 f2 d7d5 f2 f2f3 d3d2 f1 f



Already with an history not huge compared to the one in the dataset also the fineTuned model start messing up proposing unlegal moves,sometimes moving a piece like it was another, and also start showing "UCI moves" of 2 char; that probably caused by the short context GPT2 can handle

### MultiShot

In [12]:


prompt2=f"""
You are a chess assistant. Given a sequence of UCI moves, suggest two plausible next moves for the player to play.
Remeber the first moves of the sequence I gave is played by white, the second by black and so on.
    
Example 1  
Moves so far:  
d2d4 g8f6 c2c4 e7e6 g1f3 d7d5 b1c3 f8e7 c1g5 h7h6  
Suggestions: g5h4 e2e3  

Example 2  
Moves so far:  
e2e4 e7e5 g1f3 b8c6 f1b5 a7a6 b5a4 g8f6 e1g1 f8e7 f1e1 b7b5 a4b3  
Suggestions: c2c3 h2h3  

Now it's your turn:

Moves so far:  
{movesHistory}  
Suggestions: """

In [14]:
stdGPT2Response=generate_text(model,tokenizer,prompt2,50)
print(f"Standard GPT2 response:\n{stdGPT2Response}\n")

fineTuneGPT2Response=generate_text(model1,tokenizer1,prompt2,50)
print(f"Fine-tuned GPT2 response:\n{fineTuneGPT2Response}\n")

Standard GPT2 response:
Example 3  

Moves so far:  

e2e4 e7e5  

Ditto.

Moves so far:  

e2e4 e7e

Fine-tuned GPT2 response:
1c1 d8e8 g7f5 f5e4 d3e2 e2a6 a2a4 e4f5 g4f3 g6f5 e7d7 a6b7 b7c



The problems seen with GPT2 stay the same, also the thing it say are not coherent with the movesHistory: "c2c4 c7c5 b1a3 g8f6 d2d3 d7d6 a3b5 g7g6 c1h6 f8g7 h6g7 c8g4 b5d4"

The fineTuned model here doesn't improve in any way and more often start the sequence with a 3 char which doesn't represent any moves in the UCI format

### Conclusion

For the Chess-bot will use a zeroShot method as it seem to be more consistent