In [5]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login, login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
hf_token = os.getenv("hf_token")

In [6]:
login(hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/s448780/.cache/huggingface/token
Login successful


## Load model

In [30]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [31]:
model_id = "mistralai/Mistral-7B-v0.1"

# load model 
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    use_cache=False, 
    device_map="auto"
)
model.config.pretraining_tp = 1 #parallel GPU

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## dataset

In [16]:
df = pd.read_csv("filtered_data/data.csv")
df["instruction"] = "Assume you are a chess master, explain the strategy used by each player based on the provided chess moves."
df.head()

Unnamed: 0,moves,explanation,instruction
0,e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h...,### Game Analysis\n\n**Move Pair 1:** \n- **Wh...,"Assume you are a chess master, explain the str..."
1,e4 e6 d4 d5 Nd2 dxe4 Nxe4 Nf6 Bd3 Nxe4 Bxe4 Nc...,"### Game Analysis\n1. **White: e4, Black: e6**...","Assume you are a chess master, explain the str..."
2,e4 e6 b3 d5 f3 Nc6 Bb2 dxe4 fxe4 Qh4+ Ke2 Qxe4...,"### Game Analysis\n\n1. **White: e4, Black: e6...","Assume you are a chess master, explain the str..."
3,e4 e5 Nf3 Nc6 d4 Bb4+ c3 Bd6 Bb5 Qf6 O-O exd4 ...,"### Game Analysis:\n\n1. **White: e4, Black: e...","Assume you are a chess master, explain the str..."
4,d4 Nf6 c4 e6 Nf3 d5 c5 b6 b4 a5 Ba3 axb4 Bxb4 ...,"### Game Analysis\n\n1. **d4, Nf6**\n - **Wh...","Assume you are a chess master, explain the str..."


In [17]:
df.to_csv("./filtered_data/data.csv", index = False)

In [18]:
dataset = load_dataset("csv", data_files="./filtered_data/data.csv")
dataset

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['moves', 'explanation', 'instruction'],
        num_rows: 500
    })
})

In [19]:
# def format_instruction(sample):
#     return f"""You are a chess expert. Explain the rationale behind the last move from the given chess moves in Algebraic notation - 
#         {sample["moves"]}

#         ### Response:
#         {sample["explanation"]}
#     """

In [20]:
# sample = dataset["train"][0]
# print(format_instruction(sample))

In [50]:
def create_text_row(instruction, moves, explanation):
    return f"""<s>[INST] {instruction} Here are the chess moves in Algenraic Notaion - {moves} [/INST] \\n {explanation} </s>"""

In [51]:
EOS_TOKEN = tokenizer.eos_token
def formatting_func(df):
    instructions = df["instruction"]
    inputs      = df["moves"]
    outputs     = df["explanation"]
    texts = []
    for instruction, inp, output in zip(instructions, inputs, outputs):
        text = create_text_row(instruction, inp, output) # eos token coming from create_text_row function
        #print(text)
        texts.append(text)
    return {"text" : texts}

In [52]:
dataset = dataset.map(formatting_func, batched = True)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [53]:
dataset

DatasetDict({
    train: Dataset({
        features: ['moves', 'explanation', 'instruction', 'text'],
        num_rows: 500
    })
})

In [55]:
dataset["train"]["text"][0]

"<s>[INST] Assume you are a chess master, explain the strategy used by each player based on the provided chess moves. Here are the chess moves in Algenraic Notaion - e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h5 Ne4 Nc6 Be3 Qe7 Qd2 Bh6 Bxh6 Nxh6 Nf6+ Kd8 Bh7 Nf5 Bxf5 exf5 c3 h4 Qg5 g3 fxg3 hxg3 Qxg3 Qf8 Rxf5 Ne7 Rg5 Ng6 Nd2 Qh6 Rh5 Qg7 Qg4 Bc8 Rxh8+ Qxh8 Rf1 d6 Qg5 Qh4 Qe3 Bb7 e6 [/INST] \\n ### Game Analysis\n\n**Move Pair 1:** \n- **White: e4**: White opens with the most common 1.e4 move, aiming for central control and freeing the bishop and queen.\n- **Black: e6**: Black responds with the French Defense, a solid choice, intending to challenge the center with d5 later.\n\n**Move Pair 2:** \n- **White: d4**: White aims to further control the center and allows for the development of the bishop.\n- **Black: b6**: Black prepares to fianchetto the light-squared bishop, potentially targeting the e4 pawn.\n\n**Move Pair 3:**\n- **White: e5**: White advances the pawn, gaining more space in

## LoRA

In [56]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [57]:
'''
lora_alpha - scaling factor applied to the low-rank matrices. It helps in balancing the contribution of the low-rank update to the original weights. 
Higher values of lora_alpha can increase the influence of the low-rank updates. It's a form of regularization to ensure the model doesn't deviate too much from the original weights.

bias - "none", "all", or "lora_only".
need more research on this.

'''
peft_config = LoraConfig(
    r=32,
    lora_alpha=16, 
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

## Training

In [58]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [61]:
from trl import SFTTrainer
from transformers import TrainingArguments

model_args = TrainingArguments(
    output_dir="mistral_7b",
    # num_train_epochs=3,
    max_steps=50,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit", # apparently more efficient for 32 bit GPUs
    logging_steps=20,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    dataset_text_field = "text",
    peft_config=peft_config,
    max_seq_length=2048,
    tokenizer=tokenizer,
    packing=True,
    args=model_args,
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Generating train split: 0 examples [00:00, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
20,1.0006
40,0.8808




TrainOutput(global_step=50, training_loss=0.9168302154541016, metrics={'train_runtime': 893.7981, 'train_samples_per_second': 0.448, 'train_steps_per_second': 0.056, 'total_flos': 3.53626333642752e+16, 'train_loss': 0.9168302154541016, 'epoch': 0.684931506849315})

## test

In [14]:
# test_input = "Explain the rationale behind the last move from the given chess moves in Algebraic notation -"
# test_moves = "d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4"
# test_prompt = f""" {test_input}
#         ### Input:
#         {test_moves}

#         ### Response:

#     """

In [86]:
test_prompt = '''
[INST] Assume you are a chess master, explain the strategy used by each player based on the provided chess moves. Here are the chess moves in Algenraic Notaion - e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h5 Ne4 Nc6 Be3 Qe7 Qd2 Bh6 Bxh6 Nxh6 Nf6+ Kd8 Bh7 Nf5 Bxf5 exf5 c3 h4 Qg5 g3 fxg3 hxg3 Qxg3 Qf8 Rxf5 Ne7 Rg5 Ng6 Nd2 Qh6 Rh5 Qg7 Qg4 Bc8 Rxh8+ Qxh8 Rf1 d6 Qg5 Qh4 Qe3 Bb7 e6 [/INST]
'''

In [87]:
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")

In [88]:
model.eval()
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



[INST] Assume you are a chess master, explain the strategy used by each player based on the provided chess moves. Here are the chess moves in Algenraic Notaion - e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h5 Ne4 Nc6 Be3 Qe7 Qd2 Bh6 Bxh6 Nxh6 Nf6+ Kd8 Bh7 Nf5 Bxf5 exf5 c3 h4 Qg5 g3 fxg3 hxg3 Qxg3 Qf8 Rxf5 Ne7 Rg5 Ng6 Nd2 Qh6 Rh5 Qg7 Qg4 Bc8 Rxh8+ Qxh8 Rf1 d6 Qg5 Qh4 Qe3 Bb7 e6 [/INST]

## Game analysis:

### Move Pair 1: White: e4, Black: e6
- **White:** e4 is the King's Pawn Opening, controlling the center and allowing the knight to develop quickly.
- **Black:** e6 supports the d5 square, allowing for flexibility in the opening.

### Move Pair 2: White: d4, Black: b6
- **White:** d4 challenges Black's control of the center.
- **Black:** b6 prepares to fianchetto the light-squared bishop, increasing control of the long diagonal.

### Move Pair 3: White: e5, Black: Bb7
- **White:** e5 continues to control the center and opens lines for the pieces.
- **Black:** Bb7 develops the bishop 

## testing base knowledge

In [91]:
test_prompt = '''
[INST]You will be given the name of a chess piece. Print how the chess piece can move. Do not add any game analysis. 
The chess piece  -
Knight.[/INST]
'''
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9)
    
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



[INST]You will be given the name of a chess piece. Print how the chess piece can move. Do not add any game analysis. 
The chess piece  -
Knight.[/INST]



- **Knight:**
  - **Legal Moves:**
    - Knight is a piece that moves in an "L" shape, jumping over other pieces. It can move to any square that is two squares away, in a straight line, and one square to the side.
    - Example move: b1-c3 or c4-d6.
  - **Tactical Uses:**
    - The knight is a versatile piece and can create pressure in different parts of the board.
    - It can reach squares inaccessible to other pieces.
    - Knights can control key squares and can be used in attacks or defenses.

**White: Kg1**
- **Kg1:** This is the king’s safety move. The king is trying to get out of check, and this is a natural response.

**Black: Rxe2+**
- **Rxe2+:** Black captures the pawn, creating a double check situation. The rook forces White to make an immediate decision regarding the king's safety.

**White: Kf1**
- **Kf1:** This is ano

In [84]:
test_prompt = '''
[INST]You will be given a list of Algebraic notation based chess moves. Your job is to predict the winner only. Do not print anything else -
d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4.[/INST]
'''
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9)
    
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



[INST]You will be given a list of Algebraic notation based chess moves. Your job is to predict the winner only. Do not print anything else -
d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4.[/INST]

**Pair 1:**
- **White: d4**: White opens with the Queen's Pawn Opening, aiming for control of the center and potential lateral pressure.
- **Black: d5**: Black mirrors the control of the center and opens up lines for both the Queen and the Bishop.

**Pair 2:**
- **White: c4**: White aggressively attacks Black's center with the English Opening, aiming for a strong pawn structure and central control.
- **Black: c6**: Black prepares for the Slav Defense, reinforcing the d5 pawn and preparing to develop pieces.

**Pair 3:**
- **White: cxd5**: White captures the pawn in the center, aiming to disrupt Black's pawn structure and gain tempo.
- **Black: e6**: Black recaptures with the pawn, maintaining central tension and opening lines for the Bishops.

**Pair 4:**
- **White: dxe6**: White capture

In [85]:
test_prompt ='''
[INST]Assume you are a geography expert. You will be provided with the name of a country. What's the capital of the country?",
Australia[/INST].
'''
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")

with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9)
    
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



[INST]Assume you are a geography expert. You will be provided with the name of a country. What's the capital of the country?",
Australia[/INST].

# Summary of the Country and Capital
- **Country**: Australia
- **Capital**: Canberra

# Discussion
### Country: Australia
- **Geography**: Australia is the largest country in Oceania, comprising the mainland of the Australian continent, the island of Tasmania, and numerous smaller islands.
- **Population**: Around 25 million people reside in Australia, with the majority concentrated in the cities of Sydney, Melbourne, Brisbane, and Perth.
- **Economy**: Australia has a diverse economy, including agriculture, mining, and services. It is known for its exports of minerals such as iron ore and coal, as well as for its agriculture, especially in the areas of wine and wheat.
- **Political System**: Australia is a federal parliamentary constitutional monarchy with a democratic political system.

### Capital: Canberra
- **Location**: Canberra is lo

## testing different prompt

In [92]:
test_input = "Assume you are a chess master. your job is to suggest the next move."
test_moves = "d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4"
test_prompt = f"[INST]{test_input} Here are the moves - {test_moves}[\INST]"
input_ids = tokenizer(test_prompt, return_tensors="pt", truncation=True).input_ids.to("cuda:0")
with torch.inference_mode():
    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=2048,
        do_sample=True, 
        top_p=0.9,
        temperature=0.9
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST]Assume you are a chess master. your job is to suggest the next move. Here are the moves - d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4[\INST]

**1. White: d4, Black: d5**
    - *White* aims to control the center and open lines for the queen and bishop.
    - *Black* responds symmetrically to maintain balance.

**2. White: c4, Black: c6**
    - *White* continues to control the center, possibly preparing to launch a pawn storm on d5.
    - *Black* prepares to counter with the Dutch Defense, a flexible and complex pawn structure.

**3. White: cxd5, Black: e6**
    - *White* initiates pawn exchanges to disrupt Black's pawn structure.
    - *Black* opens lines for the bishop and prepares to castle.

**4. White: dxe6, Black: fxe6**
    - *White* continues to challenge Black's pawn structure.
    - *Black* recaptures with the pawn, maintaining central control.

**5. White: Nf3, Black: Bb4+**
    - *White* develops the knight to attack the e6 pawn and prepare for kingside castling.

## saving

In [None]:
#  Assume you are a chess master. can you suggest what should be the next move? 
#         ### Input:
#         d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4

#         ### Response:

#      ### Game Analysis

# 1. **White: d4, Black: d5**
#    - **White's Move**: Opening with the Queen's Pawn to control the center.
#    - **Black's Move**: Responds symmetrically to contest the center.

# 2. **White: c4, Black: c6**
#    - **White's Move**: Prepares the Queen's Gambit to challenge Black's center.
#    - **Black's Move**: Prepares to fianchetto the King's Bishop and supports the center.

# 3. **White: cxd5, Black: e6**
#    - **White's Move**: Accepts the gambit and challenges Black's center pawns.
#    - **Black's Move**: Prepares to recapture the gambit pawn and solidifies the center.

# 4. **White: dxe6, Black: fxe6**
#    - **White's Move**: Exchanges the gambit pawn to weaken Black's pawn structure.
#    - **Black's Move**: Recaptures the pawn, opening the f-file for the Rook.

# 5. **White: Nf3, Black: Bb4+**
#    - **White's Move**: Develops a knight, preparing to castle.
#    - **Black's Move**: Pins the knight to the king, creating pressure.

# 6. **White: Nc3, Black: Ba5**
#    - **White's Move**: Defends the knight and further develops.
#    - **Black's Move**: Moves the bishop, maintaining the pin and potential pressure on the d4 pawn.

# 7. **White: Bf4**
#    - **White's Move**: Develops the bishop, aiming to control the e5 square and prepare for further attack.

# ### Summary of Strategy Used and Winner

# #### Strategies:
# - **White's Strategy**: Early central control, aggressive gambit play to weaken Black's pawn structure, and rapid development.
# - **Black's Strategy**: Fianchetto to control the center, counter-gambit to challenge White’s aggression, and maintain pressure through pins and potential threats.

# #### Winner:
# Determining the winner requires further moves and strategic depth. However, at this stage, both players have developed pieces and controlled the center, and the game is open for continued development. The winner would depend on subsequent moves and tactics.

In [93]:
trainer.save_model()

