In [4]:
# !pip install -q -U fsspec
# !pip install -q -U langchain transformers bitsandbytes accelerate
# !pip install -q langchain-community langchain-core
# !pip install -q --no-deps peft
# !pip install -q -U datasets
# !pip install -q lightning

In [None]:
import torch

from huggingface_hub import login, whoami
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
torch.random.manual_seed(42)

login(token=None)
whoami()

{'type': 'user',
 'id': '6810d54444d15f9412e1accf',
 'name': 'ruhrpott',
 'fullname': 'Robin Uhrich',
 'email': 'robin.uhrich@gmail.com',
 'emailVerified': True,
 'canPay': False,
 'periodEnd': None,
 'isPro': False,
 'avatarUrl': '/avatars/ab2b57649313549bd71715ed22339198.svg',
 'orgs': [],
 'auth': {'type': 'access_token',
  'accessToken': {'displayName': 'JupyterNotebook',
   'role': 'read',
   'createdAt': '2025-05-21T11:10:56.360Z'}}}

In [6]:
import torch
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [7]:
model_name = "meta-llama/Llama-3.2-1B-Instruct"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model_4bit = AutoModelForCausalLM.from_pretrained(
    model_name, device_map="cuda", quantization_config=quantization_config, )
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
from peft import LoraConfig


adapter_configs = {
    'target_modules': 'all-linear',
    'lora_alpha': 16,
    'lora_dropout': 0.1,
    'r': 16,
    'bias': 'none',
    'task_type': 'CAUSAL_LM'
}

lora_configs = LoraConfig(**adapter_configs)

In [9]:
from peft import prepare_model_for_kbit_training, get_peft_model

prepared_model_4bit = prepare_model_for_kbit_training(model_4bit)
qlora_model = get_peft_model(prepared_model_4bit, lora_configs)

In [10]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

import numpy as np

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    start_instruction = "Make the opening move in a chess game. Produce the next move in UCI format to win the game"
    inbetween_instructions = "You are given a set of chess moves in UCI format. Produce the next move in UCI format to win the game"
    games = examples["Moves"]
    texts = []
    for game in games:
        i = np.random.choice(min(len(game), 150))
        if i == 0:
            instruction = start_instruction
            inpt = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR"
        else:
            instruction = inbetween_instructions
            inpt = game[:i -1]
            inpt = " ".join(inpt)
            inpt = " " + inpt + " "
            # print(inpt)
        output = game[i]
        output = " " + output + " "
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, inpt, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("../data", data_files="chess_game_0001_10K.parquet", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [11]:
torch.cuda.empty_cache()

In [12]:
dataset[0]

{'Moves': ['d2d4',
  'f7f5',
  'g2g3',
  'g7g6',
  'f1g2',
  'f8g7',
  'g1f3',
  'd7d6',
  'c2c3',
  'e7e6',
  'a2a4',
  'g8f6',
  'd1c2',
  'd8e7',
  'b1d2',
  'e6e5',
  'd4e5',
  'd6e5',
  'e2e4',
  'b8c6',
  'e1g1',
  'f5e4',
  'd2e4',
  'c8f5',
  'f3d2',
  'e8c8',
  'b2b4',
  'g7h6',
  'f1e1',
  'h6d2',
  'c1d2',
  'f6e4',
  'g2e4',
  'e7e6',
  'd2g5',
  'd8d6',
  'a1d1',
  'd6d1',
  'e1d1',
  'h7h6',
  'g5e3',
  'a7a5',
  'c2b1',
  'h6h5',
  'b4b5',
  'c6e7',
  'e3g5',
  'h8e8',
  'h2h4',
  'e6c4',
  'd1e1',
  'f5e4',
  'e1e4',
  'c4e6',
  'g5f4',
  'e6f5',
  'f4e5',
  'e7d5',
  'b1e1',
  'd5b6',
  'f2f4',
  'b6d7',
  'e1e2',
  'b7b6',
  'e4e3',
  'e8e7',
  'e3e4',
  'd7c5',
  'e4d4',
  'e7d7',
  'g1g2',
  'c8d8',
  'g2h2',
  'd8c8',
  'e2g2',
  'c8b8',
  'g2a2',
  'b8a7',
  'a2g2',
  'a7b8',
  'g2e2',
  'b8c8',
  'e2f3',
  'c8b8',
  'f3d1',
  'b8c8',
  'd1e2',
  'c8b8',
  'e2d1',
  'b8b7',
  'd4d7',
  'c5d7',
  'e5d4',
  'd7c5',
  'h2g2',
  'f5d5',
  'g2g1',
  'd5f5',
  'd4c5',
 

In [13]:
from torch.utils.data import DataLoader

tokenizer.pad_token = tokenizer.eos_token
def collate(mini_batch):
    input_encodings = tokenizer([sample['text'] for sample in mini_batch], return_tensors='pt', padding=True)
    labels = input_encodings.input_ids.clone()
    labels[~input_encodings.attention_mask.bool()] = -100
    return input_encodings, labels

data_loader = DataLoader(
    dataset, collate_fn=collate, shuffle=True, batch_size=1
)

In [14]:
import lightning as L
from torch.optim import AdamW
import torch.nn.functional as F

class LightningWrapper(L.LightningModule):
    def __init__(self, model, tokeniser, lr=1.e-4):
        super().__init__()
        self._model = model
        self._tokeniser = tokeniser
        self._lr = lr

    def configure_optimizers(self):
        # Build optimiser
        optimiser = AdamW(self.parameters(), lr=self._lr)

        return optimiser

    def forward(self, *args, **kwargs):
        return self._model.forward(*args, **kwargs)

    def training_step(self, mini_batch, mini_batch_idx):
        # Unpack the encoding and the target labels
        input_encodings, labels = mini_batch
        # Run generic forward step
        output = self.forward(**input_encodings)
        # Compute logits
        logits: torch.tensor = output.logits
        # Shift logits to exclude the last element
        logits = logits[..., :-1, :].contiguous()
        # shift labels to exclude the first element
        labels = labels[..., 1:].contiguous()
        # Compute LM loss token-wise
        loss: torch.tensor = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))

        return loss

lightning_model = LightningWrapper(qlora_model, tokenizer)

In [None]:
trainer = L.Trainer(
    accumulate_grad_batches=32,ConnectionError: (MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /api/whoami-v2 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7467773969f0>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: d5a12c4d-ac77-4a13-97b9-e85bb183aec1)')

    precision='bf16-mixed',  # Mixed precision (bf16-mixed or 16-mixed)
    gradient_clip_val=1.0,  # Gradient clipping
    max_epochs=2
)

Using bfloat16 Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(lightning_model, train_dataloaders=data_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type                 | Params | Mode 
--------------------------------------------------------
0 | _model | PeftModelForCausalLM | 760 M  | train
--------------------------------------------------------
11.3 M    Trainable params
749 M     Non-trainable params
760 M     Total params
3,042.189 Total estimated model params size (MB)
1122      Modules in train mode
215       Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]