In [1]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

In [2]:
#model_id = "cjvt/GaMS-9B"
model_id = "cjvt/GaMS-2B"

In [3]:
from datasets import Dataset

train_dataset = Dataset.load_from_disk("../data/hf/dataset-tiny")

In [4]:
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [6]:
input_tags = [
    "A1",
    "B1",
    "C1",
    "TitlePomembno",
    "ContentPomembno",
    "TitleNesrece",
    "ContentNesrece",
    "TitleZastoji",
    "ContentZastoji",
    "TitleVreme",
    "ContentVreme",
    "TitleOvire",
    "ContentOvire",
    "TitleDeloNaCesti",
    "ContentDeloNaCesti",
    "TitleOpozorila",
    "ContentOpozorila",
    "TitleMednarodneInformacije",
    "ContentMednarodneInformacije",
    "TitleSplosno",
    "ContentSplosno"
]

In [7]:
def format_training_prompt(input_data):
    return f"""
Generate a traffic report from the following input data.

## Inputs:

{input_data['input']}

## Traffic Report:

{input_data['target']}
"""

def format_inference_prompt(input_data):
    return f"""
Generate a traffic report from the following input data.

## Inputs:

{input_data['input']}

## Traffic Report:
"""


def format_single_input(input_item, i):
    input_string = f"### Input {i+1}:\n"
    for tag in input_tags:
        if input_item[tag] != None:
            input_string = input_string + f"\n#### {tag}:\n{input_item[tag]}\n"

    return input_string.strip()

def format_sample(example):
    inputs = ""
    for i in range(0, len(example['inputs'])):
        inputs = inputs + format_single_input(example['inputs'][i], i)

    return {
        "input": inputs,
        "target": example['output']['content']
    }

In [8]:
ds = train_dataset.map(format_sample, remove_columns=["inputs", "output"])

Map:   0%|          | 0/183 [00:00<?, ? examples/s]

In [9]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    attn_implementation='eager',
    #device_map=device
    device_map="auto"
)

model.to("cuda")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Gemma2ForCausalLM(
  (model): Gemma2Model(
    (embed_tokens): Embedding(256000, 2304, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma2DecoderLayer(
        (self_attn): Gemma2Attention(
          (q_proj): Linear4bit(in_features=2304, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2304, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=2304, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2304, bias=False)
        )
        (mlp): Gemma2MLP(
          (gate_proj): Linear4bit(in_features=2304, out_features=9216, bias=False)
          (up_proj): Linear4bit(in_features=2304, out_features=9216, bias=False)
          (down_proj): Linear4bit(in_features=9216, out_features=2304, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
        (post_attention_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
        (pre_

In [10]:
#aaa = ds.select(range(1))["txt"][0]

#print(train_dataset.select(range(1))[0]["inputs"][0]["B1"])

#print(ds.select(range(1))["input"][0])

#print(format_training_prompt({
#    "input": ds.select(range(1))["input"][0],
#    "target": ds.select(range(1))["target"][0],
#}))


In [11]:
def zero_shot():
    #sample = train_dataset.select(range(1))[0]["inputs"][0]["B1"]
    sample = ds.select(range(1))["input"][0]
    target = train_dataset.select(range(1))[0]["output"]["content"]
    prompt = f"""
    Generate a traffic report from the following input data.

    ## Inputs:

    {sample}

    ## Traffic Report:
    """

    
    inputs = tokenizer(prompt, return_tensors='pt').to("cuda")

    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=200,
        )[0], 
        skip_special_tokens=True
    )


    dash_line = '-'.join('' for x in range(100))
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN REPORT:\n{target}\n')
    print(dash_line)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

In [12]:
#zero_shot()

In [13]:
from peft import LoraConfig

# LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj"],
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

In [14]:
def formatting_prompts_func(example):
    return format_training_prompt(example)

In [15]:
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(output_dir="../outputs/gemma-2b-finetune",
    per_device_train_batch_size=2, # 4 with 512
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    logging_steps=10,
    num_train_epochs=1, # 3
    fp16=True,
    save_strategy="epoch",
    max_length=1024, # 1024 or 2048
    #tokenizer=tokenizer
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=ds, # add eval dataset later
    peft_config=peft_config,
    formatting_func=formatting_prompts_func,
    args=training_args
)

Applying formatting function to train dataset:   0%|          | 0/183 [00:00<?, ? examples/s]

Converting train dataset to ChatML:   0%|          | 0/183 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/183 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/183 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/183 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [16]:
trainer.train()

Step,Training Loss
10,0.5909
20,0.2889
30,0.2006
40,0.1916


TrainOutput(global_step=46, training_loss=0.29740623935409216, metrics={'train_runtime': 836.2324, 'train_samples_per_second': 0.219, 'train_steps_per_second': 0.055, 'total_flos': 2196961897319424.0, 'train_loss': 0.29740623935409216})

In [17]:
# Save model
#trainer.save_model("../models/nlpmaxxing-GaMS-9B-rtvslo-trfc")

In [18]:
from transformers import TextStreamer, AutoModelForCausalLM, AutoTokenizer

finetuned_model = AutoModelForCausalLM.from_pretrained("../models/nlpmaxxing-GaMS-9B-rtvslo-trfc")
finetuned_model.to("cuda")

tokenizer = AutoTokenizer.from_pretrained("../models/nlpmaxxing-GaMS-9B-rtvslo-trfc")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [19]:
def trained():

    prompt = format_inference_prompt({
        "input": ds.select(range(1))["input"][0],
        "target": ds.select(range(1))["target"][0],
    })

    target = ds.select(range(1))["target"][0]

    
    inputs = tokenizer(prompt, return_tensors='pt').to("cuda")

    input_size = inputs.input_ids.shape[1] - 5 # to include '## Traffic Report:'

    generated_tokens = finetuned_model.generate(inputs["input_ids"], max_new_tokens=200)

    

    output = tokenizer.decode(
        generated_tokens[0][input_size:], 
        skip_special_tokens=True
    )


    dash_line = '-'.join('' for x in range(100))
    print(dash_line)
    print(f'INPUT PROMPT:\n{prompt}')
    print(dash_line)
    print(f'BASELINE HUMAN REPORT:\n{target}\n')
    print(dash_line)
    print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

In [20]:
from transformers import pipeline

#pipe = pipeline(model="../models/nlpmaxxing-GaMS-9B-rtvslo-trfc", return_full_text=False)

In [21]:
prompt = format_inference_prompt({
    "input": ds.select(range(1))["input"][0],
    "target": ds.select(range(1))["target"][0],
})

#print(pipe(prompt))

In [22]:
trained()

---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Generate a traffic report from the following input data.

## Inputs:

### Input 1:

#### B1:
Vreme 

 Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost! 

 Omejitve za tovorna vozila 

 Po Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:- danes, 1. 1., od 8. do 22. ure;- v nedeljo, 2. 1., od 8. do 22. ure. 

 Od 30. decembra je v veljavi sprememba omejitve za tovorna vozila nad 7,5 ton. Več. 

 Dela 

 Na primorski avtocesti je ponovno odprt priključek Črni Kal v obe smeri.

#### ContentVreme:
Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost!

#### TitleDeloNaCesti:
Dela

#### ContentDeloNaCesti:
Na primorski avtocesti je ponovno odprt priključek Črni Kal v obe smeri.

#### TitleOpozorila:
Omejitve za tovorna vozila

#### ContentOpozorila:
Po Sloveniji velja med praznik