In [1]:
cat tools/requirements.txt

torch==2.5.1
transformers==4.48.2
peft==0.14.0
wandb==0.19.5
trl==0.12.0
bitsandbytes==0.45.1
accelerate==1.3.0
huggingface_hub==0.28.1
cut_cross_entropy==25.1.1
hf_transfer==0.1.9
protobuf==3.20.3
sentencepiece==0.2.0
unsloth==2025.1.8
unsloth_zoo==2025.1.5
xformers==0.0.29.post1

In [None]:
%pip install -r tools/requirements.txt

In [None]:
from tools.train_configs import get_gemma_2_9B_cot_configs
from tools.eval_configs import get_eval_config
from tools.train import run_train
from tools.eval import run_eval
from tools.prepare_data import hash_dataset, detect_duplicates
from huggingface_hub import login

In [None]:
login()

In [2]:
import json
with open("/workspace/repeat_determ/ukrainian_dedup_format_02012025.json", "r", encoding="utf-8") as fr:
    dataset = json.load(fr)

In [3]:
dataset.keys()

dict_keys(['train', 'val', 'test'])

In [4]:
hash_dataset(dataset)

'26cb90de3ba73d17a7144ce5bded0f51'

In [None]:
detect_duplicates(dataset)

In [6]:
train_params = get_gemma_2_9B_cot_configs()

In [7]:
train_params['run_name'] = "/workspace/repeat_determ/tools/train_models_23022025/gemma-2-it-cot-26cb90de3ba73d17a7144ce5bded0f51"
train_params['data_path'] = "/workspace/repeat_determ/ukrainian_dedup_format_02012025.json"

In [8]:
train_params

{'model_name': 'google/gemma-2-9b-it',
 'batch_size': 4,
 'gradient_accumulation': 4,
 'seed': 42,
 'load_in_4bit': True,
 'load_in_8bit': False,
 'lora_rank': 16,
 'lora_alpha': 16,
 'lora_dropout': 0.05,
 'learning_rate': 0.0003,
 'max_seq_len': 3072,
 'num_epochs': 4,
 'warmup_steps': 20,
 'chain_of_thought': True,
 'with_topic': True,
 'run_name': '/workspace/repeat_determ/tools/train_models_23022025/gemma-2-it-cot-26cb90de3ba73d17a7144ce5bded0f51',
 'data_path': '/workspace/repeat_determ/ukrainian_dedup_format_02012025.json'}

In [None]:
run_train(**train_params)

In [9]:
eval_params = get_eval_config(train_params)
eval_params["adapter_path"] = "/workspace/repeat_determ/tools/train_models_23022025/gemma-2-it-cot-26cb90de3ba73d17a7144ce5bded0f51_lr0.0003_ep4_msl3072_bs4_ga4_ws20_s42_fdTrue_1742013896.1715345/checkpoint-327"
eval_params["merge_type"] = "quantized"
eval_params["out_file_path"] = "/workspace/repeat_determ/val_preds/gemma-2-it-no-cot-0203/gemma-2-it-cot-ep3-step327.json"
#eval_params["sample_size"] = 10
eval_params["split"] = "test"
eval_params["experiment_link"] = "https://wandb.ai/nik-syromyatnikov-/huggingface/runs/kn6koj4u"
#eval_params["date_string"] = "07 Feb 2025"
eval_params

{'temperature': 0.0,
 'max_new_tokens': 2048,
 'do_sample': False,
 'rep_penalty': 1.0,
 'base_model_name': 'google/gemma-2-9b-it',
 'data_path': '/workspace/repeat_determ/ukrainian_dedup_format_02012025.json',
 'chain_of_thought': True,
 'with_topic': True,
 'load_in_4bit': True,
 'load_in_8bit': False,
 'max_seq_len': 3072,
 'date_string': None,
 'replace_map': None,
 'train_parameters': {'model_name': 'google/gemma-2-9b-it',
  'batch_size': 4,
  'gradient_accumulation': 4,
  'seed': 42,
  'load_in_4bit': True,
  'load_in_8bit': False,
  'lora_rank': 16,
  'lora_alpha': 16,
  'lora_dropout': 0.05,
  'learning_rate': 0.0003,
  'max_seq_len': 3072,
  'num_epochs': 4,
  'warmup_steps': 20,
  'chain_of_thought': True,
  'with_topic': True,
  'run_name': '/workspace/repeat_determ/tools/train_models_23022025/gemma-2-it-cot-26cb90de3ba73d17a7144ce5bded0f51',
  'data_path': '/workspace/repeat_determ/ukrainian_dedup_format_02012025.json'},
 'adapter_path': '/workspace/repeat_determ/tools/trai

In [None]:
result = run_eval(**eval_params)

In [22]:
eval_params = get_eval_config(train_params)
eval_params["adapter_path"] = "/workspace/repeat_determ/tools/train_models_23022025/llama-3.1-it-cot-26cb90de3ba73d17a7144ce5bded0f51_lr0.0003_ep4_msl3072_bs4_ga4_ws20_s42_fdTrue_1740440605.713917/checkpoint-327"
eval_params["merge_type"] = "full_precision"
eval_params["out_file_path"] = "/workspace/repeat_determ/val_preds/llama-3.1-it-cot/merged_full_prec_llama-3.1-it-cot-ep3-step327.json"
#eval_params["sample_size"] = 10
eval_params["split"] = "val"
eval_params["experiment_link"] = "https://wandb.ai/nik-syromyatnikov-/huggingface/runs/3txhfi0k"
#eval_params["date_string"] = "07 Feb 2025"
eval_params

{'temperature': 0.0,
 'max_new_tokens': 2048,
 'do_sample': False,
 'rep_penalty': 1.0,
 'base_model_name': 'meta-llama/Llama-3.1-8B-Instruct',
 'data_path': '/workspace/repeat_determ/ukrainian_dedup_format_02012025.json',
 'chain_of_thought': True,
 'load_in_4bit': True,
 'load_in_8bit': False,
 'max_seq_len': 3072,
 'date_string': None,
 'train_parameters': {'model_name': 'meta-llama/Llama-3.1-8B-Instruct',
  'batch_size': 4,
  'gradient_accumulation': 4,
  'seed': 42,
  'load_in_4bit': True,
  'load_in_8bit': False,
  'lora_rank': 16,
  'lora_alpha': 16,
  'lora_dropout': 0.05,
  'learning_rate': 0.0003,
  'max_seq_len': 3072,
  'num_epochs': 4,
  'warmup_steps': 20,
  'chain_of_thought': True,
  'run_name': '/workspace/repeat_determ/tools/train_models_23022025/llama-3.1-it-cot-26cb90de3ba73d17a7144ce5bded0f51',
  'data_path': '/workspace/repeat_determ/ukrainian_dedup_format_02012025.json'},
 'adapter_path': '/workspace/repeat_determ/tools/train_models_23022025/llama-3.1-it-cot-26cb

In [None]:
result = run_eval(**eval_params)