In [1]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import pandas as pd

In [2]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## dataset

In [3]:
dataset = load_dataset("csv", data_files="./filtered_data/data.csv")
dataset

DatasetDict({
    train: Dataset({
        features: ['moves', 'explanation'],
        num_rows: 500
    })
})

In [4]:
def format_instruction(sample):
    return f"""You are a chess expert. Explain the rationale behind the last move from the given chess moves in Algebraic notation - 
        {sample["moves"]}

        ### Response:
        {sample["explanation"]}
    """

In [5]:
sample = dataset["train"][0]
print(format_instruction(sample))

You are a chess expert. Explain the rationale behind the last move from the given chess moves in Algebraic notation - 
        e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h5 Ne4 Nc6 Be3 Qe7 Qd2 Bh6 Bxh6 Nxh6 Nf6+ Kd8 Bh7 Nf5 Bxf5 exf5 c3 h4 Qg5 g3 fxg3 hxg3 Qxg3 Qf8 Rxf5 Ne7 Rg5 Ng6 Nd2 Qh6 Rh5 Qg7 Qg4 Bc8 Rxh8+ Qxh8 Rf1 d6 Qg5 Qh4 Qe3 Bb7 e6

        ### Response:
        ### Game Analysis

**Move Pair 1:** 
- **White: e4**: White opens with the most common 1.e4 move, aiming for central control and freeing the bishop and queen.
- **Black: e6**: Black responds with the French Defense, a solid choice, intending to challenge the center with d5 later.

**Move Pair 2:** 
- **White: d4**: White aims to further control the center and allows for the development of the bishop.
- **Black: b6**: Black prepares to fianchetto the light-squared bishop, potentially targeting the e4 pawn.

**Move Pair 3:**
- **White: e5**: White advances the pawn, gaining more space in the center and setting up an a

## Load model

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [7]:
model_id = "mistralai/Mistral-7B-v0.1"

# load model 
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    use_cache=False, 
    device_map="auto"
)
model.config.pretraining_tp = 1 #parallel GPU

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## LoRA

In [8]:
'''
lora_alpha - scaling factor applied to the low-rank matrices. It helps in balancing the contribution of the low-rank update to the original weights. 
Higher values of lora_alpha can increase the influence of the low-rank updates. It's a form of regularization to ensure the model doesn't deviate too much from the original weights.

bias - "none", "all", or "lora_only".
need more research on this.

'''
peft_config = LoraConfig(
    r=32,
    lora_alpha=64, 
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

## Training

In [9]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [10]:
from trl import SFTTrainer
from transformers import TrainingArguments

model_args = TrainingArguments(
    output_dir="mistral-7b",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit", # apparently more efficient for 32 bit GPUs
    logging_steps=20,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    peft_config=peft_config,
    max_seq_length=2048,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=format_instruction,
    args=model_args,
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Generating train split: 0 examples [00:00, ? examples/s]



Step,Training Loss
20,0.9764
40,0.8897
60,0.845
80,0.7839
100,0.7202
120,0.7515
140,0.7373
160,0.6493
180,0.602
200,0.6008




TrainOutput(global_step=219, training_loss=0.7448688533208142, metrics={'train_runtime': 3923.4628, 'train_samples_per_second': 0.444, 'train_steps_per_second': 0.056, 'total_flos': 1.5411741419844403e+17, 'train_loss': 0.7448688533208142, 'epoch': 3.0})

## test

In [14]:
test_input = "Explain the rationale behind the last move from the given chess moves in Algebraic notation -"
test_moves = "d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4"
test_prompt = f""" {test_input}
        ### Input:
        {test_moves}

        ### Response:

    """

In [16]:
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")

In [17]:
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9
    )

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


In [18]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

 Explain the rationale behind the last move from the given chess moves in Algebraic notation -
        ### Input:
        d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4

        ### Response:

    1. **White: d4, Black: d5**
        - **White:** Opens the center and aims for control.
        - **Black:** Mirrors White’s move to contest the center.
    2. **White: c4, Black: c6**
        - **White:** Initiates the Queen’s Gambit, offering a pawn to challenge Black's central pawns.
        - **Black:** Supports the pawn on d5 and prepares to challenge White’s pawn structure.
    3. **White: cxd5, Black: e6**
        - **White:** Exchanges the pawn to gain space and undermine Black's center.
        - **Black:** Recaptures the pawn and opens lines for the bishop and queen.
    4. **White: dxe6, Black: fxe6**
        - **White:** Further weakens Black’s pawn structure and opens lines for development.
        - **Black:** Recaptures the pawn, opening lines for the queen and maintaining 

## saving

In [19]:
trainer.save_model()



In [20]:
from huggingface_hub import login
login("")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/s448780/.cache/huggingface/token
Login successful


In [21]:
hf_repo = "adnaan525/mistral_ACIS"

In [22]:
trainer.model.push_to_hub("mistral_7b_ACIS")
tokenizer.push_to_hub("mistral_7b_ACIS")

adapter_model.safetensors:   0%|          | 0.00/865M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/adnaan525/mistral_7b_ACIS/commit/9455cdcbb7a79a5e2c3cb9fb65f4994e6e458640', commit_message='Upload tokenizer', commit_description='', oid='9455cdcbb7a79a5e2c3cb9fb65f4994e6e458640', pr_url=None, pr_revision=None, pr_num=None)

## testing different prompt

In [None]:
test_input = "Assume you are a chess master. can you suggest what should be the next move? "
test_moves = "d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4"
test_prompt = f""" {test_input}
        ### Input:
        {test_moves}

        ### Response:

    """
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
#  Assume you are a chess master. can you suggest what should be the next move? 
#         ### Input:
#         d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4

#         ### Response:

#      ### Game Analysis

# 1. **White: d4, Black: d5**
#    - **White's Move**: Opening with the Queen's Pawn to control the center.
#    - **Black's Move**: Responds symmetrically to contest the center.

# 2. **White: c4, Black: c6**
#    - **White's Move**: Prepares the Queen's Gambit to challenge Black's center.
#    - **Black's Move**: Prepares to fianchetto the King's Bishop and supports the center.

# 3. **White: cxd5, Black: e6**
#    - **White's Move**: Accepts the gambit and challenges Black's center pawns.
#    - **Black's Move**: Prepares to recapture the gambit pawn and solidifies the center.

# 4. **White: dxe6, Black: fxe6**
#    - **White's Move**: Exchanges the gambit pawn to weaken Black's pawn structure.
#    - **Black's Move**: Recaptures the pawn, opening the f-file for the Rook.

# 5. **White: Nf3, Black: Bb4+**
#    - **White's Move**: Develops a knight, preparing to castle.
#    - **Black's Move**: Pins the knight to the king, creating pressure.

# 6. **White: Nc3, Black: Ba5**
#    - **White's Move**: Defends the knight and further develops.
#    - **Black's Move**: Moves the bishop, maintaining the pin and potential pressure on the d4 pawn.

# 7. **White: Bf4**
#    - **White's Move**: Develops the bishop, aiming to control the e5 square and prepare for further attack.

# ### Summary of Strategy Used and Winner

# #### Strategies:
# - **White's Strategy**: Early central control, aggressive gambit play to weaken Black's pawn structure, and rapid development.
# - **Black's Strategy**: Fianchetto to control the center, counter-gambit to challenge White’s aggression, and maintain pressure through pins and potential threats.

# #### Winner:
# Determining the winner requires further moves and strategic depth. However, at this stage, both players have developed pieces and controlled the center, and the game is open for continued development. The winner would depend on subsequent moves and tactics.