In [None]:
!pip install sentencepiece

In [1]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

In [2]:
from trl import SFTTrainer
from transformers import TrainingArguments

In [3]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
device = "cuda:0"

In [6]:
# model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model_id = "microsoft/Orca-2-13b"
bnb_config = BitsAndBytesConfig(
    load_in_8bit = True
)

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map = device,
    trust_remote_code = True,
    quantization_config = bnb_config
)
# tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [8]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
        "microsoft/Orca-2-13b",
        use_fast=False,
    )

In [9]:
# system_message = "Act as a Chess Master who knows all legal moves of chess pieces. You will be given the position of a chess piece, your job is to find its legal moves relative to its current position"
# user_message = '''
# A Knight in chess, moves like L.
# So Knight is on g1. 
# The legal move can be computed by 2 moves forward from g1 so g3, then either left or right so f3 or h3.
# So legal moves for Knight at g1 are f3 or h3.
# What are the legal moves of a knight if it's on b1, show the reasoning and analysis like the previous example'''

# prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"


In [10]:
# inputs = tokenizer(prompt, return_tensors='pt')
# output_ids = model.generate(inputs["input_ids"],)
# answer = tokenizer.batch_decode(output_ids)[0]

# print(answer)

In [11]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [12]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32003, 5120, padding_idx=0)
    (layers): ModuleList(
      (0-39): 40 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
          (k_proj): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
          (v_proj): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
          (o_proj): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=5120, out_features=13824, bias=False)
          (up_proj): Linear8bitLt(in_features=5120, out_features=13824, bias=False)
          (down_proj): Linear8bitLt(in_features=13824, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()


In [13]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

## data

In [14]:
import pandas as pd

In [15]:
df = pd.read_csv("../data/generated_data_last_500.csv")
df = df[["moves", "explanation"]]
df.head()

Unnamed: 0,moves,explanation
0,e4 e6 d4 b6 e5 Bb7 Nf3 h6 Bd3 g5 O-O g4 Nfd2 h...,### Game Analysis\n\n**Move Pair 1:** \n- **Wh...
1,e4 e6 d4 d5 Nd2 dxe4 Nxe4 Nf6 Bd3 Nxe4 Bxe4 Nc...,"### Game Analysis\n1. **White: e4, Black: e6**..."
2,e4 e6 b3 d5 f3 Nc6 Bb2 dxe4 fxe4 Qh4+ Ke2 Qxe4...,"### Game Analysis\n\n1. **White: e4, Black: e6..."
3,e4 e5 Nf3 Nc6 d4 Bb4+ c3 Bd6 Bb5 Qf6 O-O exd4 ...,"### Game Analysis:\n\n1. **White: e4, Black: e..."
4,d4 Nf6 c4 e6 Nf3 d5 c5 b6 b4 a5 Ba3 axb4 Bxb4 ...,"### Game Analysis\n\n1. **d4, Nf6**\n - **Wh..."


In [16]:
!mkdir filtered_data

mkdir: cannot create directory ‘filtered_data’: File exists


In [17]:
df.to_csv("./filtered_data/data.csv", index= False)

In [18]:
from datasets import load_dataset
dataset = load_dataset("csv", data_files="./filtered_data/data.csv")

Generating train split: 0 examples [00:00, ? examples/s]

In [19]:
dataset

DatasetDict({
    train: Dataset({
        features: ['moves', 'explanation'],
        num_rows: 500
    })
})

In [20]:
def generate_prompt(data_point):
    user_prompt = "Based on the provided Algebraic chess moves, explain the rationaled behind the last move and the strategy being used by the player - "
    moves = data_point["moves"]
    explanation = data_point["explanation"]
    return f"<|im_start|>user\n{user_prompt, moves}<|im_end|>\n<|im_start|>assistant\n{explanation}<|im_end|>"


In [21]:
def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [22]:
data = dataset["train"].shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

## finetune

In [23]:
training_args = transformers.TrainingArguments(
    auto_find_batch_size=True,
    num_train_epochs=1,
    learning_rate=2e-4,
    bf16=False,
    save_total_limit=4,
    logging_steps=10,
    output_dir="experiments",
    save_strategy='epoch',
)

In [24]:
torch.cuda.current_device()

0

In [25]:
trainer = transformers.Trainer(
    model=model.to("cuda:0"),
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()



Step,Training Loss
10,1.1699
20,0.7979
30,0.8892
40,0.8712
50,0.7715
60,0.8792
70,0.8816
80,0.7924
90,0.7801
100,0.8204




TrainOutput(global_step=500, training_loss=0.7715341873168945, metrics={'train_runtime': 3928.7001, 'train_samples_per_second': 0.127, 'train_steps_per_second': 0.127, 'total_flos': 9.502370422972416e+16, 'train_loss': 0.7715341873168945, 'epoch': 1.0})

In [26]:
model.save_pretrained("./orca_2")



## Testing model

In [3]:

model = AutoModelForCausalLM.from_pretrained("./orca_2/", device_map="auto", load_in_8bit=True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [4]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32003, 5120, padding_idx=0)
    (layers): ModuleList(
      (0-39): 40 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=5120, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=5120, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (k_proj): lora.Linear8bitLt(
            (base_layer): Linear8bitLt(in_features=5120, out_features=5120, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropo

In [5]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
        "microsoft/Orca-2-13b",
        use_fast=False,
    )

In [13]:
prompt = "From the following game provide in Algebraic Notation, who will be the winner -  "
moves = "d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5 Bf4"
final_prompt = f"<|im_start|>system\n Provide a concise answer only<im_end>\n<|im_start|>user\n{prompt, moves}<|im_end|>\n<|im_start|>assistant"
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [14]:
device = "cuda:0"

encoding = tokenizer(final_prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))



<|im_start|> user
d4 d5 <|im_end|> 
 <|im_start|> assistant
### Game Analysis

1. **White: d4, Black: d5**
   - **White:** Opens with the Queen's Pawn Opening, aiming to control the center and open lines for the bishop and queen.
   - **Black:** Responds with the Queen's Pawn Opening, mirroring White's strategy and contesting the center.

2. **White: Nf3, Black: Nf6**
   - **White:** Develops the knight to a natural square, attacking the d5 pawn and preparing for kingside castling.
   - **Black:** Develops the knight to a natural square, defending the d5 pawn and preparing for kingside castling.

3. **White: c4, Black: e6**
   - **White:** Advances the c-pawn


In [12]:
tokenizer.model_max_length

4096