# Model Training Script

### Necessary Library

In [1]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
from Llama3SP import LlamaForSequenceClassification as LLAMA3SP
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoConfig,
    get_linear_schedule_with_warmup,
    XLNetTokenizer,
    BertTokenizer
)
from peft import (
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model,
)
from huggingface_hub import login
from dotenv import load_dotenv
from tokenizers import Tokenizer

import torch.nn as nn
import os
import pandas as pd
import numpy as np
import torch
import time
import gc

### Login to huggingface hub to put your Llama token so we can access Llama 3.2 1B Param Pre-trained Model

In [2]:
load_dotenv()
token = os.getenv("HUGGINGFACE_TOKEN")
login(token=token)

### Hyperparameters

In [3]:
global EPOCHS, BATCH_SIZE_RATIO, SEQUENCE_LEN, LEARNING_RATE, TOKENIZER, MODEL_NAME, DEVICE

EPOCHS = 20
BATCH_SIZE_RATIO = 0.3 # within proj: 0.3 / cross proj: 0.4
SEQUENCE_LEN = 20
LEARNING_RATE = 5e-4
TOKENIZER = 'qwen' # available: llama3, wordlevel, sentencepiece, wordpiece, gpt, deepseek, qwen
MODEL_NAME = 'qwen' # available: llama3, llama3sp, deepseek, qwen
# HF_MODEL_NAME = "meta-llama/Llama-3.2-1B"
# HF_MODEL_NAME = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B'
HF_MODEL_NAME = 'Qwen/Qwen3-1.7B'

# define device
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    # set up to release cache memory when possible
    torch.cuda.empty_cache()
    # set up more conservative memory limits  
    torch.cuda.set_per_process_memory_fraction(0.8)  # Use only 80% of GPU memory

# define files to be used
global DATA_PATH 
DATA_PATH = './sp_dataset/marked_data/'

### Configure dynamic memory allocation

In [4]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

### Static Methods and Variables

In [5]:
OUTPUT = ''
MODEL = None
DYNAMIC_BATCH = True
BATCH_SIZE = None
WITHIN_PROJECT = None
MAE_RECORDS = []
MDAE_RECORDS = []


def optimize_memory():
    """Aux function to optimize memory usage"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()


def data_processing(file_pair):
    global BATCH_SIZE, BATCH_SIZE_RATIO, DATA_PATH, WITHIN_PROJECT, DYNAMIC_BATCH, MODEL_NAME

    optimize_memory()

    train_data = pd.DataFrame(columns=['text', 'label'])
    for train_file_name in file_pair['train']:
        fname = DATA_PATH + train_file_name + '.csv'
        df = prepare_dataframe(fname)
        train_data = train_data.append(df)
        
    # data split
    if WITHIN_PROJECT:
        train_text, train_labels, val_text, val_labels, test_text, test_labels = within_project_split(train_data)
    else:
        train_text, train_labels, val_text, val_labels = train_val_split(train_data, 0.6)
    # define batch size dynamically based on training length
    if DYNAMIC_BATCH:
        # BATCH_SIZE = int(len(train_text) * BATCH_SIZE_RATIO)
        BATCH_SIZE = min(int(len(train_text) * BATCH_SIZE_RATIO), 32)

    optimize_memory()

    # process data in chunks for tokenization
    def process_in_chunks(texts, chunk_size=1000):
        all_tokens = {'input_ids': []}
        for i in range(0, len(texts), chunk_size):
            chunk = texts[i:i + chunk_size].tolist()
            tokens = tokenization(chunk)
            all_tokens['input_ids'].extend(tokens['input_ids'])
            optimize_memory()
        return all_tokens
    
    # tokenization
    tokens_train = process_in_chunks(train_text)
    tokens_val = process_in_chunks(val_text)
 
    train_seq = torch.tensor(tokens_train['input_ids'])
    train_y = torch.tensor(train_labels.tolist()).type(torch.LongTensor)
    train_dataloader = prepare_dataloader(train_seq, train_y, sampler_type='random')

    val_seq = torch.tensor(tokens_val['input_ids'])
    val_y = torch.tensor(val_labels.tolist()).type(torch.LongTensor)
    val_dataloader = prepare_dataloader(val_seq, val_y, sampler_type='sequential')
    
    # prepare testing datasets
    all_test_dataloader = []
    test_file_names = []

    if WITHIN_PROJECT:
        tokens_test = process_in_chunks(test_text)
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')
        all_test_dataloader.append(test_dataloader)
        test_file_names.append(file_pair['test'][0])
        return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names

    for test_file_name in file_pair['test']:
        fname = DATA_PATH + test_file_name + '.csv'
        test_data = prepare_dataframe(fname)

        test_text = test_data['text']
        test_labels = test_data['label']

        # tokenization
        tokens_test = process_in_chunks(test_text)
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')

        all_test_dataloader.append(test_dataloader)
        test_file_names.append(test_file_name)

        optimize_memory()
    print('cross project data processing!')
    return file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names


def train_val_split(data, split_ratio):
    print('cross project split!')
    split_point = int(len(data) * split_ratio)
    train_text = data['text'][:split_point]
    train_labels = data['label'][:split_point]
    val_text = data['text'][split_point:]
    val_labels = data['label'][split_point:]
    return train_text, train_labels, val_text, val_labels


def tokenization(text_list):
    global TOKENIZER, SEQUENCE_LEN, MODEL

    if TOKENIZER == 'wordpiece':
        print('using wordpiece tokenizer!')
        tokenizer = BertTokenizer('all_tokenizers/sp_word_piece/vocab.txt')
    elif TOKENIZER == 'sentencepiece':
        print('using sentencepiece tokenizer!')
        tokenizer = XLNetTokenizer('all_tokenizers/sp_sentence_piece/spm_tokenizer.model', padding_side='right')
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token
    elif TOKENIZER == 'wordlevel':
        print('using wordlevel tokenizer!')
        tokenizer = Tokenizer.from_file('all_tokenizers/sp_word_level/wordlevel.json')
        encoded_sentences = {'input_ids':[]}
        for sentence in text_list:
            encoded = tokenizer.encode(sentence)
            encoded = encoded.ids
            if len(encoded) > SEQUENCE_LEN:
                encoded = encoded[:SEQUENCE_LEN]
            elif len(encoded) < SEQUENCE_LEN:
                padding = SEQUENCE_LEN - len(encoded)
                for _ in range(padding):
                    encoded.append(3)
            encoded_sentences['input_ids'].append(encoded)
        return encoded_sentences
    elif TOKENIZER == 'llama3':
        print('using pretrained llama3 tokenizer')
        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME, add_prefix_space=True)
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token
    elif TOKENIZER == 'deepseek':
        print('using pretrained DeepSeek tokenizer')
        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token
    elif TOKENIZER == 'qwen':
        print('using pretrained Qwen tokenizer')
        tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token

    # update some model configs
    # must use .cache = False as below or it crashes from my experience
    MODEL.config.pad_token_id = tokenizer.pad_token_id
    MODEL.config.use_cache = False
    MODEL.config.pretraining_tp = 1
    return tokenizer.batch_encode_plus(text_list, truncation=True, max_length=SEQUENCE_LEN, padding='max_length')


def prepare_dataframe(file_name):
    data = pd.read_csv(file_name)
    # some rows have no description, fill blank to avoid Null
    data = data.fillna(' ')
    d = {'text': (data['title']).tolist(), 'label': data['storypoint']}
    return pd.DataFrame(data=d)


def prepare_dataloader(seq, y, sampler_type):
    global BATCH_SIZE
    tensor_dataset = TensorDataset(seq, y)
    if sampler_type == 'random':
        sampler = RandomSampler(tensor_dataset)
    elif sampler_type == 'sequential':
        sampler = SequentialSampler(tensor_dataset)
    dataloader = DataLoader(tensor_dataset, sampler=sampler, batch_size=BATCH_SIZE)
    return dataloader


def within_project_split(data):
    print('within project split!')
    train_val_split_point = int(len(data) * 0.6)
    val_test_split_point = int(len(data) * 0.8)
    train_text = data['text'][:train_val_split_point]
    train_labels = data['label'][:train_val_split_point]
    val_text = data['text'][train_val_split_point:val_test_split_point]
    val_labels = data['label'][train_val_split_point:val_test_split_point]
    test_text = data['text'][val_test_split_point:]
    test_labels = data['label'][val_test_split_point:]
    return train_text, train_labels, val_text, val_labels, test_text, test_labels   


def train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, model, test_file_names):
    global LEARNING_RATE, EPOCHS, MAE_RECORDS, MDAE_RECORDS, DEVICE
    optimizer = torch.optim.AdamW(MODEL.parameters(), lr=LEARNING_RATE)    
    # total number of training steps is [number of batches] x [number of epochs]
    total_steps = len(train_dataloader) * EPOCHS
    # create the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    print("Start training for ", file_pair, ".....")
    start_time = time.time()
    
    # tensorboard writer
    writer_path = 'tb/' + str(file_pair['train'][0]) + '_' + str(file_pair['test'][0])
    writer = SummaryWriter(writer_path)
    
    # vars for model selection
    min_eval_loss_epoch = [10000, 0]
    
    time_records = []
    MAE_RECORDS = []
    MDAE_RECORDS = []
    
    loss_fct = nn.L1Loss()
    for e in range(EPOCHS):
        # ---TRAINING---
        # clean GPU memory
        optimize_memory()
        print(">>> epoch ", e)
        # set model into train mode
        model.train()
        total_train_loss = 0
        for step, batch in enumerate(train_dataloader):            
            b_input_ids = batch[0].to(torch.long).to(DEVICE)
            b_labels = batch[1].to(torch.float).to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids, 
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_train_loss += loss.item()
            loss.backward() 
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            # clean memory
            del step, batch, b_input_ids, b_labels, result, loss, logits
            optimize_memory()

        avg_train_loss = total_train_loss / len(train_dataloader)
        print(" Average training MAE loss: {0:.2f}".format(avg_train_loss))
        writer.add_scalar('loss/train', avg_train_loss, e)
        # clean memory
        del avg_train_loss, total_train_loss
        optimize_memory()
        
        time_records.append(time.time() - start_time)
        
        # ---EVAL---
        print("-")
        # set model into eval mode
        model.eval()
        total_eval_loss = 0
        for batch in val_dataloader:            
            b_input_ids = batch[0].to(torch.long).to(DEVICE)
            b_labels = batch[1].to(torch.float).to(DEVICE)
            model.zero_grad()
            result = model(b_input_ids, 
                           labels=b_labels,
                           return_dict=True)
            loss = result.loss
            logits = result.logits
            total_eval_loss += loss.item()  
            # clean memory
            del b_input_ids, b_labels, batch, result, loss, logits
            optimize_memory()
        avg_eval_loss = total_eval_loss / len(val_dataloader)
        print(" Average eval MAE loss: {0:.2f}".format(avg_eval_loss))
        
        if avg_eval_loss <= min_eval_loss_epoch[0]:
            min_eval_loss_epoch[0] = avg_eval_loss
            min_eval_loss_epoch[1] = e

        optimize_memory()
        
        writer.add_scalar('loss/eval', avg_eval_loss, e)
        # clean memory
        del avg_eval_loss, total_eval_loss
        optimize_memory()
        # save model state to dict
        torch.save(model.state_dict(), './models/' + 'epo_' + str(e))
        
        print("===============================")
        
        # testing on holdout data
        index = 0
        for test_dataloader in all_test_dataloader:
            test_file_name = test_file_names[index]
            index += 1
            testing_start_time = time.time()
            predictions = []
            true_labels = []
            for batch in test_dataloader:
                batch = tuple(t.to(DEVICE) for t in batch)
                b_input_ids, b_labels = batch
                with torch.no_grad():
                    logits = model(b_input_ids)
                logits = logits['logits'].detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                predictions.append(logits)
                true_labels.append(label_ids)
            # calculate errors
            distance_records = []
            for i in range(len(predictions)):
                for j in range(len(predictions[i])):
                    distance = abs(predictions[i][j] - true_labels[i][j])
                    distance_records.append(distance)

            ## MAE = mean value of all absolute errors (stored in distance_records)
            MAE = np.mean(np.array(distance_records)) 
            ## MdAE = median value of all absolute errors (stored in distance_records)
            MdAE = np.median(np.array(distance_records)) 

            MAE_RECORDS.append(MAE)
            MDAE_RECORDS.append(MdAE)
            
            global OUTPUT
            OUTPUT +=  'Epochs ' + str(e) + '\n'
            OUTPUT += 'MAE: ' + str(MAE) + '\n'
            OUTPUT += 'MdAE: ' + str(MdAE) + '\n\n'
            print('MAE: ', MAE)
            print('MdAE: ', MdAE)
    writer.flush()
    writer.close()
    
    # select model
    os.rename('models/epo_' + str(min_eval_loss_epoch[1]), 
              'models/' + str(file_pair['train'][0]) + '_' 
              + str(file_pair['test'][0]) + '_epo_' + str(min_eval_loss_epoch[1]))
    
    # del unwanted models
    for i in range(20):
        try:
            os.remove("models/epo_" + str(i))
        except:
            continue
            
    OUTPUT += 'MAE: ' + str(MAE_RECORDS[min_eval_loss_epoch[1]]) \
                + '  MdAE: ' + str(MDAE_RECORDS[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'training time: ' + str(time_records[min_eval_loss_epoch[1]]) + '\n'
    OUTPUT += 'Epochs: ' + str(min_eval_loss_epoch[1]) +'\n'
    global BATCH_SIZE
    OUTPUT += 'batch size: ' + str(BATCH_SIZE)
    print('all done for one project')

In [6]:
global WITHIN_PROJECT, BATCH_SIZE_RATIO
WITHIN_PROJECT = True
BATCH_SIZE_RATIO = 0.3

TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['appceleratorstudio'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['aptanastudio']},
                        {'train': ['bamboo'], 'test': ['bamboo']},
                        {'train': ['clover'], 'test': ['clover']},
                        {'train': ['datamanagement'], 'test': ['datamanagement']},
                        {'train': ['duracloud'], 'test': ['duracloud']},
                        {'train': ['jirasoftware'], 'test': ['jirasoftware']},
                        {'train': ['mesos'], 'test': ['mesos']},
                        {'train': ['moodle'], 'test': ['moodle']},
                        {'train': ['mule'], 'test': ['mule']},
                        {'train': ['mulestudio'], 'test': ['mulestudio']},
                        {'train': ['springxd'], 'test': ['springxd']},
                        {'train': ['talenddataquality'], 'test': ['talenddataquality']},
                        {'train': ['talendesb'], 'test': ['talendesb']},
                        {'train': ['titanium'], 'test': ['titanium']},
                        {'train': ['usergrid'], 'test': ['usergrid']},
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME, HF_MODEL_NAME

    # Load LLama model with 4 bit quantization as specified in bits and bytes and prepare model for peft training
    # Quantization Config (for QLORA)
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16, # Changed to float16 for lower memory usage
    )
    # Lora Config
    lora_config = LoraConfig(
        r=8, # Reduced from 16 to 8 for lower memory usage
        lora_alpha=16,
        target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
        lora_dropout=0.1,
        bias='none',
        task_type='SEQ_CLS'
    )

    for file in TRAIN_TEST_FILE_PAIRS:
        optimize_memory()

        # Config for Lama3 model
        config = AutoConfig.from_pretrained(HF_MODEL_NAME, num_labels=1)
        if MODEL_NAME == 'llama3':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'llama3sp':
            MODEL = LLAMA3SP.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'deepseek':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'qwen':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        # prepare_model_for_kbit_training() function to preprocess the quantized model for training.
        MODEL = prepare_model_for_kbit_training(MODEL)
        # get_peft_model prepares a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model
        MODEL = get_peft_model(MODEL, lora_config)

        # additional memory optimizations
        MODEL.gradient_checkpointing_enable()  # Reduce memory usage during training
        MODEL.enable_input_require_grads()

        if TOKENIZER == 'wordlevel':
            MODEL.config.pad_token_id = 3
        elif TOKENIZER == 'sentencepiece':
            MODEL.config.pad_token_id = 0
        elif TOKENIZER == 'wordpiece':
            MODEL.config.pad_token_id = 0
        
        MODEL.cuda()

        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        optimize_memory()
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""


if __name__ == "__main__":
    main()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['appceleratorstudio'], 'test': ['appceleratorstudio']} .....
>>> epoch  0
 Average training MAE loss: 18.92
-
 Average eval MAE loss: 4.23
MAE:  1.5512632
MdAE:  1.3481796
>>> epoch  1
 Average training MAE loss: 12.13
-
 Average eval MAE loss: 12.15
MAE:  2.6329756
MdAE:  2.3386428
>>> epoch  2
 Average training MAE loss: 7.94
-
 Average eval MAE loss: 11.59
MAE:  2.699457
MdAE:  2.414291
>>> epoch  3
 Average training MAE loss: 5.17
-
 Average eval MAE loss: 9.57
MAE:  2.4589581
MdAE:  2.2552195
>>> epoch  4
 Average training MAE loss: 3.55
-
 Average eval MAE loss: 10.43
MAE:  2.5690935
MdAE:  2.3332527
>>> epoch  5
 Average training MAE loss: 2.41
-
 Average eval MAE loss: 7.08
MAE:  2.056425
MdAE:  1.7325592
>>> epoch  6
 Average training MAE loss: 2.31
-
 Average eval MAE loss: 5.80
MAE:  1.8765509
MdA

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['aptanastudio'], 'test': ['aptanastudio']} .....
>>> epoch  0
 Average training MAE loss: 41.75
-
 Average eval MAE loss: 35.50
MAE:  3.364114
MdAE:  2.1809506
>>> epoch  1
 Average training MAE loss: 28.55
-
 Average eval MAE loss: 29.84
MAE:  3.927094
MdAE:  3.0819707
>>> epoch  2
 Average training MAE loss: 16.12
-
 Average eval MAE loss: 33.18
MAE:  4.0182986
MdAE:  3.2475832
>>> epoch  3
 Average training MAE loss: 8.79
-
 Average eval MAE loss: 36.86
MAE:  3.7999246
MdAE:  2.8548074
>>> epoch  4
 Average training MAE loss: 4.96
-
 Average eval MAE loss: 34.69
MAE:  4.178519
MdAE:  3.4144874
>>> epoch  5
 Average training MAE loss: 4.04
-
 Average eval MAE loss: 35.23
MAE:  3.712961
MdAE:  2.8498752
>>> epoch  6
 Average training MAE loss: 2.57
-
 Average eval MAE loss: 33.52
MAE:  3.844384
MdAE:  3.0900888
>>> epoch  7
 Average train

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['bamboo'], 'test': ['bamboo']} .....
>>> epoch  0
 Average training MAE loss: 8.10
-
 Average eval MAE loss: 3.62
MAE:  1.689991
MdAE:  1.5455232
>>> epoch  1
 Average training MAE loss: 4.86
-
 Average eval MAE loss: 3.25
MAE:  1.5603386
MdAE:  1.3236208
>>> epoch  2
 Average training MAE loss: 2.73
-
 Average eval MAE loss: 2.06
MAE:  1.1810333
MdAE:  0.94262314
>>> epoch  3
 Average training MAE loss: 1.05
-
 Average eval MAE loss: 2.31
MAE:  1.1823512
MdAE:  0.92555666
>>> epoch  4
 Average training MAE loss: 1.65
-
 Average eval MAE loss: 2.95
MAE:  1.3102189
MdAE:  1.0478227
>>> epoch  5
 Average training MAE loss: 0.60
-
 Average eval MAE loss: 2.26
MAE:  1.1314809
MdAE:  0.9572675
>>> epoch  6
 Average training MAE loss: 0.59
-
 Average eval MAE loss: 1.96
MAE:  1.1032604
MdAE:  0.9220009
>>> epoch  7
 Average training MAE loss: 0.

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['clover'], 'test': ['clover']} .....
>>> epoch  0
 Average training MAE loss: 57.58
-
 Average eval MAE loss: 31.64
MAE:  5.496256
MdAE:  4.4075813
>>> epoch  1
 Average training MAE loss: 29.38
-
 Average eval MAE loss: 21.87
MAE:  3.8125405
MdAE:  1.917032
>>> epoch  2
 Average training MAE loss: 19.34
-
 Average eval MAE loss: 34.91
MAE:  5.5954533
MdAE:  4.2779965
>>> epoch  3
 Average training MAE loss: 9.98
-
 Average eval MAE loss: 23.96
MAE:  4.0238843
MdAE:  2.1857357
>>> epoch  4
 Average training MAE loss: 3.28
-
 Average eval MAE loss: 23.64
MAE:  4.1501102
MdAE:  2.1706867
>>> epoch  5
 Average training MAE loss: 2.75
-
 Average eval MAE loss: 27.01
MAE:  4.76079
MdAE:  3.3562832
>>> epoch  6
 Average training MAE loss: 2.37
-
 Average eval MAE loss: 23.97
MAE:  4.370663
MdAE:  2.7399158
>>> epoch  7
 Average training MAE loss

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['datamanagement'], 'test': ['datamanagement']} .....
>>> epoch  0
 Average training MAE loss: 230.20
-
 Average eval MAE loss: 179.87
MAE:  9.184349
MdAE:  7.2387123
>>> epoch  1
 Average training MAE loss: 153.41
-
 Average eval MAE loss: 157.36
MAE:  6.7086096
MdAE:  4.193161
>>> epoch  2
 Average training MAE loss: 111.37
-
 Average eval MAE loss: 162.96
MAE:  7.6410913
MdAE:  5.050146
>>> epoch  3
 Average training MAE loss: 83.22
-
 Average eval MAE loss: 200.14
MAE:  6.993478
MdAE:  3.4593217
>>> epoch  4
 Average training MAE loss: 65.18
-
 Average eval MAE loss: 172.21
MAE:  5.812704
MdAE:  2.500825
>>> epoch  5
 Average training MAE loss: 46.35
-
 Average eval MAE loss: 165.18
MAE:  6.608508
MdAE:  3.3993607
>>> epoch  6
 Average training MAE loss: 36.04
-
 Average ev

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['duracloud'], 'test': ['duracloud']} .....
>>> epoch  0
 Average training MAE loss: 10.83
-
 Average eval MAE loss: 1.82
MAE:  1.0864797
MdAE:  1.001685
>>> epoch  1
 Average training MAE loss: 3.96
-
 Average eval MAE loss: 1.75
MAE:  1.1094776
MdAE:  0.94037944
>>> epoch  2
 Average training MAE loss: 1.86
-
 Average eval MAE loss: 1.72
MAE:  1.1561805
MdAE:  0.94638956
>>> epoch  3
 Average training MAE loss: 1.02
-
 Average eval MAE loss: 1.97
MAE:  1.1160784
MdAE:  0.924518
>>> epoch  4
 Average training MAE loss: 0.85
-
 Average eval MAE loss: 1.97
MAE:  1.2421608
MdAE:  0.9990084
>>> epoch  5
 Average training MAE loss: 0.62
-
 Average eval MAE loss: 1.80
MAE:  1.0950247
MdAE:  0.83456296
>>> epoch  6
 Average training MAE loss: 0.48
-
 Average eval MAE loss: 2.00
MAE:  1.176124
MdAE:  0.9104851
>>> epoch  7
 Average training MAE lo

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['jirasoftware'], 'test': ['jirasoftware']} .....
>>> epoch  0
 Average training MAE loss: 25.57
-
 Average eval MAE loss: 20.31
MAE:  3.331905
MdAE:  2.9716892
>>> epoch  1
 Average training MAE loss: 11.83
-
 Average eval MAE loss: 8.41
MAE:  2.018072
MdAE:  1.3974485
>>> epoch  2
 Average training MAE loss: 5.65
-
 Average eval MAE loss: 12.04
MAE:  2.3102365
MdAE:  1.6827383
>>> epoch  3
 Average training MAE loss: 2.89
-
 Average eval MAE loss: 8.91
MAE:  2.0530078
MdAE:  1.6691737
>>> epoch  4
 Average training MAE loss: 2.07
-
 Average eval MAE loss: 9.65
MAE:  2.1932287
MdAE:  1.8365936
>>> epoch  5
 Average training MAE loss: 1.12
-
 Average eval MAE loss: 13.11
MAE:  2.4435115
MdAE:  2.3060722
>>> epoch  6
 Average training MAE loss: 2.37
-
 Average eval MAE loss: 8.58
MAE:  1.9691124
MdAE:  1.223979
>>> epoch  7
 Average training

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['mesos'], 'test': ['mesos']} .....
>>> epoch  0
 Average training MAE loss: 7.46
-
 Average eval MAE loss: 6.49
MAE:  1.9931749
MdAE:  1.8402349
>>> epoch  1
 Average training MAE loss: 5.56
-
 Average eval MAE loss: 4.45
MAE:  1.5542097
MdAE:  1.362766
>>> epoch  2
 Average training MAE loss: 3.44
-
 Average eval MAE loss: 4.26
MAE:  1.4155613
MdAE:  1.0332493
>>> epoch  3
 Average training MAE loss: 2.10
-
 Average eval MAE loss: 4.38
MAE:  1.433119
MdAE:  1.1728595
>>> epoch  4
 Average training MAE loss: 1.54
-
 Average eval MAE loss: 4.26
MAE:  1.3974905
MdAE:  1.0901616
>>> epoch  5
 Average training MAE loss: 1.36
-
 Average eval MAE loss: 5.20
MAE:  1.6077414
MdAE:  1.3160386
>>> epoch  6
 Average training MAE loss: 1.02
-
 Average eval MAE loss: 4.34
MAE:  1.3738711
MdAE:  1.0683196
>>> epoch  7
 Av

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['moodle'], 'test': ['moodle']} .....
>>> epoch  0
 Average training MAE loss: 571.79
-
 Average eval MAE loss: 573.52
MAE:  12.598942
MdAE:  13.75243
>>> epoch  1
 Average training MAE loss: 445.73
-
 Average eval MAE loss: 516.86
MAE:  11.779709
MdAE:  11.292009
>>> epoch  2
 Average training MAE loss: 293.29
-
 Average eval MAE loss: 443.59
MAE:  12.859807
MdAE:  9.830744
>>> epoch  3
 Average training MAE loss: 138.43
-
 Average eval MAE loss: 460.76
MAE:  10.142724
MdAE:  8.125706
>>> epoch  4
 Average training MAE loss: 68.22
-
 Average eval MAE loss: 419.50
MAE:  14.187761
MdAE:  12.099871
>>> epoch  5
 Average training MAE loss: 53.60
-
 Average eval MAE loss: 453.85
MAE:  14.844414
MdAE:  12.949636
>>> epoch  6
 Average training MAE loss: 32.11
-
 Average eval MAE loss: 446.51
MAE:  15.07636
MdAE:  12.175335
>>> epoch  7
 Average t

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['mule'], 'test': ['mule']} .....
>>> epoch  0
 Average training MAE loss: 15.10
-
 Average eval MAE loss: 9.55
MAE:  2.7322893
MdAE:  2.6263456
>>> epoch  1
 Average training MAE loss: 9.06
-
 Average eval MAE loss: 9.84
MAE:  2.5648608
MdAE:  2.3239179
>>> epoch  2
 Average training MAE loss: 5.03
-
 Average eval MAE loss: 12.85
MAE:  3.1131797
MdAE:  2.8565195
>>> epoch  3
 Average training MAE loss: 2.61
-
 Average eval MAE loss: 11.05
MAE:  2.6314797
MdAE:  2.257971
>>> epoch  4
 Average training MAE loss: 1.91
-
 Average eval MAE loss: 11.86
MAE:  2.8244817
MdAE:  2.637557
>>> epoch  5
 Average training MAE loss: 1.30
-
 Average eval MAE loss: 11.38
MAE:  2.741017
MdAE:  2.559682
>>> epoch  6
 Average training MAE loss: 0.70
-
 Average eval MAE loss: 11.42
MAE:  2.8554783
MdAE:  2.5979981
>>> epoch  7
 Average training MAE loss: 0.82


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['mulestudio'], 'test': ['mulestudio']} .....
>>> epoch  0
 Average training MAE loss: 29.08
-
 Average eval MAE loss: 58.22
MAE:  3.7876222
MdAE:  2.814314
>>> epoch  1
 Average training MAE loss: 17.22
-
 Average eval MAE loss: 58.61
MAE:  3.7621157
MdAE:  2.908661
>>> epoch  2
 Average training MAE loss: 9.32
-
 Average eval MAE loss: 59.57
MAE:  3.887725
MdAE:  2.758008
>>> epoch  3
 Average training MAE loss: 3.92
-
 Average eval MAE loss: 54.79
MAE:  3.9521832
MdAE:  3.171423
>>> epoch  4
 Average training MAE loss: 3.22
-
 Average eval MAE loss: 58.05
MAE:  3.7948647
MdAE:  2.7362623
>>> epoch  5
 Average training MAE loss: 3.05
-
 Average eval MAE loss: 63.04
MAE:  3.9938076
MdAE:  2.820116
>>> epoch  6
 Average training MAE loss: 2.47
-
 Average eval MAE loss: 59.14
MAE:  3.9162436
MdAE:  2.8746734
>>> epoch  7
 Average training MA

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['springxd'], 'test': ['springxd']} .....
>>> epoch  0
 Average training MAE loss: 12.68
-
 Average eval MAE loss: 5.11
MAE:  2.0789132
MdAE:  1.877987
>>> epoch  1
 Average training MAE loss: 9.24
-
 Average eval MAE loss: 8.59
MAE:  2.711019
MdAE:  2.531328
>>> epoch  2
 Average training MAE loss: 5.97
-
 Average eval MAE loss: 7.72
MAE:  2.3767464
MdAE:  2.0808008
>>> epoch  3
 Average training MAE loss: 3.52
-
 Average eval MAE loss: 5.57
MAE:  1.9125504
MdAE:  1.4966627
>>> epoch  4
 Average training MAE loss: 2.60
-
 Average eval MAE loss: 8.24
MAE:  2.3805702
MdAE:  1.8280075
>>> epoch  5
 Average training MAE loss: 1.89
-
 Average eval MAE loss: 6.37
MAE:  2.0439177
MdAE:  1.5189184
>>> epoch  6
 Average training MAE loss: 1.52
-
 Average eval MAE loss: 6.52
MAE:  2.085

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['talenddataquality'], 'test': ['talenddataquality']} .....
>>> epoch  0
 Average training MAE loss: 31.92
-
 Average eval MAE loss: 26.76
MAE:  4.2486567
MdAE:  4.464247
>>> epoch  1
 Average training MAE loss: 19.69
-
 Average eval MAE loss: 30.52
MAE:  5.2487473
MdAE:  4.9714565
>>> epoch  2
 Average training MAE loss: 12.02
-
 Average eval MAE loss: 25.79
MAE:  3.8518445
MdAE:  3.7937565
>>> epoch  3
 Average training MAE loss: 6.62
-
 Average eval MAE loss: 26.64
MAE:  4.5436387
MdAE:  4.3907194
>>> epoch  4
 Average training MAE loss: 4.14
-
 Average eval MAE loss: 25.88
MAE:  4.4124956
MdAE:  4.193137
>>> epoch  5
 Average training MAE loss: 2.86
-
 Average eval MAE loss: 27.02
MAE:  4.5354385
MdAE:  4.3904114
>>> epoch  6
 Average training MAE loss: 2.42
-
 Average eval MAE loss: 24.19
MAE:  3.9337409
MdAE:  3.801639
>>> epoch  7
 A

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['talendesb'], 'test': ['talendesb']} .....
>>> epoch  0
 Average training MAE loss: 3.14
-
 Average eval MAE loss: 2.53
MAE:  0.9476132
MdAE:  0.6733115
>>> epoch  1
 Average training MAE loss: 1.55
-
 Average eval MAE loss: 2.69
MAE:  0.98689026
MdAE:  0.70084685
>>> epoch  2
 Average training MAE loss: 1.01
-
 Average eval MAE loss: 2.81
MAE:  1.0082505
MdAE:  0.69129074
>>> epoch  3
 Average training MAE loss: 0.52
-
 Average eval MAE loss: 2.89
MAE:  0.9756031
MdAE:  0.6125437
>>> epoch  4
 Average training MAE loss: 0.33
-
 Average eval MAE loss: 2.78
MAE:  0.9786424
MdAE:  0.7171731
>>> epoch  5
 Average training MAE loss: 0.39
-
 Average eval MAE loss: 2.72
MAE:  0.9490034
MdAE:  0.68289924
>>> epoch  6
 Average training MAE loss: 0.31
-
 Average eval MAE loss: 2.98
MAE:  1.0647031
MdAE:  0.76908696
>>> epoch  7
 Average training MA

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['titanium'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 35.24
-
 Average eval MAE loss: 15.98
MAE:  3.0147767
MdAE:  2.5986547
>>> epoch  1
 Average training MAE loss: 21.38
-
 Average eval MAE loss: 14.99
MAE:  2.7407074
MdAE:  2.1813853
>>> epoch  2
 Average training MAE loss: 14.45
-
 Average eval MAE loss: 22.17
MAE:  3.5770543
MdAE:  3.1264205
>>> epoch  3
 Average training MAE loss: 9.84
-
 Average eval MAE loss: 18.45
MAE:  2.7557676
MdAE:  2.0875545
>>> epoch  4
 Average training MAE loss: 5.42
-
 Average eval MAE loss: 19.56
MAE:  2.9054227
MdAE:  2.2184887
>>> epoch  5
 Average training MAE loss: 3.81
-
 Average eval MAE loss: 16.87
MAE:  2.7108357
MdAE:  2.06039
>>> epoch  6
 Average training MAE loss: 3.47
-
 Average eval MAE loss: 17.36
MAE:  2.7333567
MdAE:  2.049952
>

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


within project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
Start training for  {'train': ['usergrid'], 'test': ['usergrid']} .....
>>> epoch  0
 Average training MAE loss: 10.88
-
 Average eval MAE loss: 2.63
MAE:  1.4812906
MdAE:  1.2713976
>>> epoch  1
 Average training MAE loss: 1.49
-
 Average eval MAE loss: 2.56
MAE:  1.4864702
MdAE:  1.1901793
>>> epoch  2
 Average training MAE loss: 1.20
-
 Average eval MAE loss: 2.65
MAE:  1.4564929
MdAE:  1.1896538
>>> epoch  3
 Average training MAE loss: 0.59
-
 Average eval MAE loss: 2.58
MAE:  1.4871763
MdAE:  1.2894163
>>> epoch  4
 Average training MAE loss: 0.85
-
 Average eval MAE loss: 2.51
MAE:  1.4898107
MdAE:  1.2936151
>>> epoch  5
 Average training MAE loss: 0.21
-
 Average eval MAE loss: 2.43
MAE:  1.4644948
MdAE:  1.2801867
>>> epoch  6
 Average training MAE loss: 0.18
-
 Average eval MAE loss: 2.65
MAE:  1.493079
MdAE:  1.2983723
>>> epoch  7
 Average training MAE loss:

### Cross Project Training Script - Within Repository

In [7]:
global WITHIN_PROJECT, BATCH_SIZE_RATIO
WITHIN_PROJECT = False
BATCH_SIZE_RATIO = 0.4

TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['mesos'], 'test': ['usergrid']},
                        {'train': ['usergrid'], 'test': ['mesos']},
                        {'train': ['appceleratorstudio'], 'test': ['aptanastudio']},
                        {'train': ['appceleratorstudio'], 'test': ['titanium']},
                        {'train': ['titanium'], 'test': ['appceleratorstudio']},
                        {'train': ['aptanastudio'], 'test': ['titanium']},
                        {'train': ['mule'], 'test': ['mulestudio']},
                        {'train': ['mulestudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME, HF_MODEL_NAME
    
    # Load LLama model with 4 bit quantization as specified in bits and bytes and prepare model for peft training
    # Quantization Config (for QLORA)
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16, # Changed to float16 for lower memory usage
    )
    # Lora Config
    lora_config = LoraConfig(
        r=8, # Reduced from 16 to 8 for lower memory usage
        lora_alpha=16,
        target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
        lora_dropout=0.1,
        bias='none',
        task_type='SEQ_CLS'
    )

    for file in TRAIN_TEST_FILE_PAIRS:
        optimize_memory()

        # Config for Lama3 model
        config = AutoConfig.from_pretrained(HF_MODEL_NAME, num_labels=1)
        if MODEL_NAME == 'llama3':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'llama3sp':
            MODEL = LLAMA3SP.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'deepseek':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'qwen':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        # prepare_model_for_kbit_training() function to preprocess the quantized model for training.
        MODEL = prepare_model_for_kbit_training(MODEL)
        # get_peft_model prepares a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model
        MODEL = get_peft_model(MODEL, lora_config)

        # additional memory optimizations
        MODEL.gradient_checkpointing_enable()  # Reduce memory usage during training
        MODEL.enable_input_require_grads()

        if TOKENIZER == 'wordlevel':
            MODEL.config.pad_token_id = 3
        elif TOKENIZER == 'sentencepiece':
            MODEL.config.pad_token_id = 0
        elif TOKENIZER == 'wordpiece':
            MODEL.config.pad_token_id = 0

        MODEL.cuda()

        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL
        optimize_memory()
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""

                
if __name__ == "__main__":
    main()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['mesos'], 'test': ['usergrid']} .....
>>> epoch  0
 Average training MAE loss: 8.72
-
 Average eval MAE loss: 3.25
MAE:  1.2458876
MdAE:  0.9863001
>>> epoch  1
 Average training MAE loss: 5.26
-
 Average eval MAE loss: 4.30
MAE:  1.5236213
MdAE:  1.1998624
>>> epoch  2
 Average training MAE loss: 2.85
-
 Average eval MAE loss: 3.69
MAE:  1.2884929
MdAE:  0.98581505
>>> epoch  3
 Average training MAE loss: 2.19
-
 Average eval MAE loss: 5.23
MAE:  1.5182556
MdAE:  1.1842268
>>> epoch  4
 Average training MAE loss: 2.49
-
 Average eval MAE loss: 4.79
MAE:  1.4671437
MdAE:  1.1367322
>>> epoch  5
 Average training MAE loss: 1.27
-
 Average eval MAE loss: 3.71
MAE:  1.4234837
MdAE:  1.1407456
>>> epoch  6
 Average training MAE loss: 0.83
-
 Average eval MAE loss: 3.85
MAE:  1.39662

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['usergrid'], 'test': ['mesos']} .....
>>> epoch  0
 Average training MAE loss: 11.28
-
 Average eval MAE loss: 3.10
MAE:  1.7041667
MdAE:  1.198221
>>> epoch  1
 Average training MAE loss: 1.47
-
 Average eval MAE loss: 2.68
MAE:  1.6804981
MdAE:  1.3477235
>>> epoch  2
 Average training MAE loss: 1.30
-
 Average eval MAE loss: 2.55
MAE:  1.6504104
MdAE:  1.3013399
>>> epoch  3
 Average training MAE loss: 0.54
-
 Average eval MAE loss: 2.69
MAE:  1.6573416
MdAE:  1.2434778
>>> epoch  4
 Average training MAE loss: 0.23
-
 Average eval MAE loss: 2.69
MAE:  1.6681812
MdAE:  1.3330007
>>> epoch  5
 Average training MAE loss: 0.18
-
 Average eval MAE loss: 2.66
MAE:  1.7107381
MdAE:  1.4061103
>>> epoch  6
 Average training MAE loss: 0.13
-
 Average eval MAE loss: 2.81
MAE:  1.730685

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['appceleratorstudio'], 'test': ['aptanastudio']} .....
>>> epoch  0
 Average training MAE loss: 16.39
-
 Average eval MAE loss: 4.00
MAE:  4.630319
MdAE:  3.4143634
>>> epoch  1
 Average training MAE loss: 13.13
-
 Average eval MAE loss: 7.09
MAE:  3.9511602
MdAE:  2.7448945
>>> epoch  2
 Average training MAE loss: 8.39
-
 Average eval MAE loss: 5.40
MAE:  4.1327276
MdAE:  2.9493499
>>> epoch  3
 Average training MAE loss: 4.93
-
 Average eval MAE loss: 6.62
MAE:  4.0519123
MdAE:  2.6906977
>>> epoch  4
 Average training MAE loss: 3.10
-
 Average eval MAE loss: 6.15
MAE:  4.345239
MdAE:  3.1425018
>>> epoch  5
 Average training MAE loss: 2.28
-
 Average eval MAE loss: 7.88
MAE:  3.9523427
MdAE:  2.6837535
>>> epoch  6
 Average training MAE loss: 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['appceleratorstudio'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 19.69
-
 Average eval MAE loss: 5.73
MAE:  3.5479658
MdAE:  2.5609531
>>> epoch  1
 Average training MAE loss: 13.52
-
 Average eval MAE loss: 4.72
MAE:  3.2817395
MdAE:  2.2235017
>>> epoch  2
 Average training MAE loss: 9.45
-
 Average eval MAE loss: 5.21
MAE:  3.4180686
MdAE:  2.271946
>>> epoch  3
 Average training MAE loss: 7.08
-
 Average eval MAE loss: 6.63
MAE:  3.4712348
MdAE:  2.2989922
>>> epoch  4
 Average training MAE loss: 5.00
-
 Average eval MAE loss: 6.43
MAE:  3.5035808
MdAE:  2.5060797
>>> epoch  5
 Average training MAE loss: 3.67
-
 Average eval MAE loss: 5.35
MAE:  3.458964

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['titanium'], 'test': ['appceleratorstudio']} .....
>>> epoch  0
 Average training MAE loss: 31.40
-
 Average eval MAE loss: 15.80
MAE:  2.637276
MdAE:  2.082324
>>> epoch  1
 Average training MAE loss: 21.50
-
 Average eval MAE loss: 15.24
MAE:  2.587139
MdAE:  1.9294453
>>> epoch  2
 Average training MAE loss: 13.26
-
 Average eval MAE loss: 21.07
MAE:  2.7973397
MdAE:  2.067708
>>> epoch  3
 Average training MAE loss: 8.38
-
 Average eval MAE loss: 15.37
MAE:  2.6518965
MdAE:  2.015575
>>> epoch  4
 Average training MAE loss: 5.46
-
 Average eval MAE loss: 17.63
MAE:  2.6345232
MdAE:  1.954257
>>> epoch  5
 Average training MAE loss: 4.85
-
 Average eval MAE loss: 18.81
MAE:  2.7492201
MdAE:  2.0748968
>>> epoch  

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['aptanastudio'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 39.36
-
 Average eval MAE loss: 34.41
MAE:  3.5362182
MdAE:  2.5565157
>>> epoch  1
 Average training MAE loss: 29.20
-
 Average eval MAE loss: 34.14
MAE:  3.6454935
MdAE:  2.806098
>>> epoch  2
 Average training MAE loss: 18.89
-
 Average eval MAE loss: 34.21
MAE:  3.7638392
MdAE:  2.8902502
>>> epoch  3
 Average training MAE loss: 8.76
-
 Average eval MAE loss: 34.41
MAE:  4.611096
MdAE:  4.0659943
>>> epoch  4
 Average training MAE loss: 5.60
-
 Average eval MAE loss: 34.59
MAE:  3.9136565
MdAE:  3.0465822
>>> epoch  5
 Average training MAE loss: 4.86
-
 Average eval MAE loss: 32.91
MAE:  4.532384
MdAE:  3.9551806
>>> epoch  6
 Average training MAE loss: 2.29


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['mule'], 'test': ['mulestudio']} .....
>>> epoch  0
 Average training MAE loss: 13.81
-
 Average eval MAE loss: 10.22
MAE:  3.5305417
MdAE:  2.5355532
>>> epoch  1
 Average training MAE loss: 8.69
-
 Average eval MAE loss: 10.11
MAE:  3.6041436
MdAE:  2.3004165
>>> epoch  2
 Average training MAE loss: 5.84
-
 Average eval MAE loss: 10.64
MAE:  3.7072618
MdAE:  2.288129
>>> epoch  3
 Average training MAE loss: 3.15
-
 Average eval MAE loss: 10.55
MAE:  3.6083584
MdAE:  2.5387995
>>> epoch  4
 Average training MAE loss: 1.59
-
 Average eval MAE loss: 10.44
MAE:  3.6716852
MdAE:  2.5071654
>>> epoch  5
 Average training MAE loss: 1.40
-
 Average eval MAE loss: 11.63
MAE:  3.7419276
MdAE:  2.6820593
>>> epoch  6
 Average training MAE loss: 1.28
-
 Average eval MAE loss: 10.40
MAE:  3.5965905
MdAE:  2.5059829
>>> e

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['mulestudio'], 'test': ['mule']} .....
>>> epoch  0
 Average training MAE loss: 26.45
-
 Average eval MAE loss: 45.33
MAE:  2.7474277
MdAE:  2.4203367
>>> epoch  1
 Average training MAE loss: 16.94
-
 Average eval MAE loss: 38.08
MAE:  3.0882294
MdAE:  2.6277776
>>> epoch  2
 Average training MAE loss: 9.90
-
 Average eval MAE loss: 44.77
MAE:  3.0148852
MdAE:  2.315507
>>> epoch  3
 Average training MAE loss: 5.64
-
 Average eval MAE loss: 44.39
MAE:  2.7244713
MdAE:  2.2782896
>>> epoch  4
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 40.51
MAE:  2.982663
MdAE:  2.4238725
>>> epoch  5
 Average training MAE loss: 3.72
-
 Average eval MAE loss: 38.16
MAE:  2.9816957
MdAE:  2.614366
>>> epoch  6
 Average training MAE loss: 1.90
-
 Average eval MAE loss: 38.75
MAE:  2.926465
MdAE:  2.4452267
>>> epo

### Cross Project Training Script - Cross Repository

In [8]:
global WITHIN_PROJECT, BATCH_SIZE_RATIO
WITHIN_PROJECT = False
BATCH_SIZE_RATIO = 0.4

TRAIN_TEST_FILE_PAIRS = [
                        {'train': ['clover'], 'test': ['usergrid']},
                        {'train': ['talendesb'], 'test': ['mesos']},
                        {'train': ['talenddataquality'], 'test': ['aptanastudio']},
                        {'train': ['mule'], 'test': ['titanium']},
                        {'train': ['talenddataquality'], 'test': ['appceleratorstudio']},
                        {'train': ['mulestudio'], 'test': ['titanium']},
                        {'train': ['appceleratorstudio'], 'test': ['mulestudio']},
                        {'train': ['appceleratorstudio'], 'test': ['mule']}
                        ]


def main():
    global TRAIN_TEST_FILE_PAIRS, MODEL, TOKENIZER, MODEL_NAME, HF_MODEL_NAME
    
    # Load LLama model with 4 bit quantization as specified in bits and bytes and prepare model for peft training
    # Quantization Config (for QLORA)
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16,  # Cambiado a float16 para menor uso de memoria
    )
    # Lora Config
    lora_config = LoraConfig(
        r=8, # Reduced from 16 to 8 for lower memory usage
        lora_alpha=16,
        target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
        lora_dropout=0.1,
        bias='none',
        task_type='SEQ_CLS'
    )

    for file in TRAIN_TEST_FILE_PAIRS:
        optimize_memory()

        # Config for Lama3 model
        config = AutoConfig.from_pretrained(HF_MODEL_NAME, num_labels=1)
        if MODEL_NAME == 'llama3':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'llama3sp':
            MODEL = LLAMA3SP.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'deepseek':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        elif MODEL_NAME == 'qwen':
            MODEL = AutoModelForSequenceClassification.from_pretrained(
                HF_MODEL_NAME,
                quantization_config=quantization_config,
                # num_labels=1, # For regression
                torch_dtype=torch.float16,
                device_map='auto',
                low_cpu_mem_usage=True,
                config=config,
            )
        # prepare_model_for_kbit_training() function to preprocess the quantized model for training.
        MODEL = prepare_model_for_kbit_training(MODEL)
        # get_peft_model prepares a model for training with a PEFT method such as LoRA by wrapping the base model and PEFT configuration with get_peft_model
        MODEL = get_peft_model(MODEL, lora_config)

        # additional memory optimizations
        MODEL.gradient_checkpointing_enable()  # Reduce memory usage during training
        MODEL.enable_input_require_grads()

        if TOKENIZER == 'wordlevel':
            MODEL.config.pad_token_id = 3
        elif TOKENIZER == 'sentencepiece':
            MODEL.config.pad_token_id = 0
        elif TOKENIZER == 'wordpiece':
            MODEL.config.pad_token_id = 0

        MODEL.cuda()
        file_pair, train_dataloader, val_dataloader, all_test_dataloader, test_file_names = data_processing(file_pair=file)
        train_eval_test(file_pair, train_dataloader, val_dataloader, all_test_dataloader, MODEL, test_file_names)
        del MODEL 
        optimize_memory()
        torch.cuda.empty_cache()            
        global OUTPUT
        with open('./results/' + str(file['train'][0]) + '_' + str(file['test'][0]) +'.txt', 'w+') as f:
            f.writelines(OUTPUT)
            print('results have been written into a text file!')
            OUTPUT = ""

                
if __name__ == "__main__":
    main()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['clover'], 'test': ['usergrid']} .....
>>> epoch  0
 Average training MAE loss: 46.19
-
 Average eval MAE loss: 49.65
MAE:  3.4119787
MdAE:  3.4438124
>>> epoch  1
 Average training MAE loss: 30.37
-
 Average eval MAE loss: 35.78
MAE:  2.0512733
MdAE:  1.8475847
>>> epoch  2
 Average training MAE loss: 20.71
-
 Average eval MAE loss: 41.82
MAE:  1.9108717
MdAE:  1.6488316
>>> epoch  3
 Average training MAE loss: 15.13
-
 Average eval MAE loss: 43.41
MAE:  2.6068974
MdAE:  2.309262
>>> epoch  4
 Average training MAE loss: 6.75
-
 Average eval MAE loss: 42.18
MAE:  2.6494951
MdAE:  2.2329254
>>> epoch  5
 Average training MAE loss: 2.21
-
 Average eval MAE loss: 40.39
MAE:  2.019535
MdAE:  1.7288733
>>> epoch  6
 Average training MAE loss: 2.01
-
 Average eval MAE loss: 40.51
MAE:  2.0863597
MdAE:  1.8308818
>>>

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['talendesb'], 'test': ['mesos']} .....
>>> epoch  0
 Average training MAE loss: 3.92
-
 Average eval MAE loss: 2.06
MAE:  1.635124
MdAE:  1.0246979
>>> epoch  1
 Average training MAE loss: 1.53
-
 Average eval MAE loss: 2.05
MAE:  1.5145098
MdAE:  0.9832066
>>> epoch  2
 Average training MAE loss: 0.98
-
 Average eval MAE loss: 2.72
MAE:  1.5977587
MdAE:  1.0607721
>>> epoch  3
 Average training MAE loss: 0.53
-
 Average eval MAE loss: 2.29
MAE:  1.77549
MdAE:  1.2070217
>>> epoch  4
 Average training MAE loss: 0.42
-
 Average eval MAE loss: 2.06
MAE:  1.6380045
MdAE:  1.0139602
>>> epoch  5
 Average training MAE loss: 0.27
-
 Average eval MAE loss: 2.05
MAE:  1.5984111
MdAE:  0.99618363
>>> epoch  6
 Average training MAE loss: 0.18
-
 Average eval MAE loss: 2.14
MAE:  1.6095941

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['talenddataquality'], 'test': ['aptanastudio']} .....
>>> epoch  0
 Average training MAE loss: 36.61
-
 Average eval MAE loss: 25.45
MAE:  4.0535917
MdAE:  2.8079658
>>> epoch  1
 Average training MAE loss: 21.23
-
 Average eval MAE loss: 28.13
MAE:  4.1533237
MdAE:  3.1360626
>>> epoch  2
 Average training MAE loss: 14.01
-
 Average eval MAE loss: 22.43
MAE:  4.207697
MdAE:  3.0496073
>>> epoch  3
 Average training MAE loss: 8.09
-
 Average eval MAE loss: 25.08
MAE:  4.316
MdAE:  3.194418
>>> epoch  4
 Average training MAE loss: 4.35
-
 Average eval MAE loss: 31.31
MAE:  4.19203
MdAE:  3.3543873
>>> epoch  5
 Average training MAE loss: 3.12
-
 Average eval MAE loss: 25.88
MAE:  4.2701716
MdAE:  3.2054095
>>> epoch  6
 Average training MAE loss: 2.99
-
 Average eval MAE loss: 24.47
MAE:  4.2278743
MdAE:  3.119

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['mule'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 20.44
-
 Average eval MAE loss: 10.69
MAE:  3.398951
MdAE:  2.2323399
>>> epoch  1
 Average training MAE loss: 10.05
-
 Average eval MAE loss: 10.36
MAE:  3.6782262
MdAE:  2.2390995
>>> epoch  2
 Average training MAE loss: 6.14
-
 Average eval MAE loss: 11.21
MAE:  3.4111068
MdAE:  2.339006
>>> epoch  3
 Average training MAE loss: 3.57
-
 Average eval MAE loss: 10.38
MAE:  3.592048
MdAE:  2.1889277
>>> epoch  4
 Average training MAE loss: 2.60
-
 Average eval MAE loss: 10.70
MAE:  3.5026834
MdAE:  2.3197064
>>> epoch  5
 Average training MAE loss: 1.75
-
 Average eval MAE loss: 10.40
MAE:  3.4687011
MdAE:  2.309823
>>> epoch  6
 Average training MAE loss: 1.35
-
 Average

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['talenddataquality'], 'test': ['appceleratorstudio']} .....
>>> epoch  0
 Average training MAE loss: 35.53
-
 Average eval MAE loss: 26.57
MAE:  2.6246564
MdAE:  1.9418707
>>> epoch  1
 Average training MAE loss: 22.48
-
 Average eval MAE loss: 21.21
MAE:  2.4628508
MdAE:  1.9015484
>>> epoch  2
 Average training MAE loss: 17.52
-
 Average eval MAE loss: 23.04
MAE:  2.661877
MdAE:  2.0798879
>>> epoch  3
 Average training MAE loss: 9.40
-
 Average eval MAE loss: 29.93
MAE:  3.33324
MdAE:  2.804533
>>> epoch  4
 Average training MAE loss: 5.16
-
 Average eval MAE loss: 24.84
MAE:  2.7217054
MdAE:  2.162005
>>> epoch  5
 Average training MAE loss: 3.79
-
 Average eval MAE loss: 24.67
MAE:  2.9540203
MdAE:  2.415741
>>> epoch  6
 Average training MAE

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['mulestudio'], 'test': ['titanium']} .....
>>> epoch  0
 Average training MAE loss: 27.08
-
 Average eval MAE loss: 39.10
MAE:  3.46333
MdAE:  2.3932767
>>> epoch  1
 Average training MAE loss: 15.89
-
 Average eval MAE loss: 43.31
MAE:  3.4286234
MdAE:  2.2223644
>>> epoch  2
 Average training MAE loss: 7.63
-
 Average eval MAE loss: 40.02
MAE:  3.5787332
MdAE:  2.532837
>>> epoch  3
 Average training MAE loss: 4.49
-
 Average eval MAE loss: 41.13
MAE:  3.575898
MdAE:  2.555266
>>> epoch  4
 Average training MAE loss: 3.03
-
 Average eval MAE loss: 38.67
MAE:  3.834474
MdAE:  2.880488
>>> epoch  5
 Average training MAE loss: 2.96
-
 Average eval MAE loss: 38.55
MAE:  3.7675242
MdAE:  2.7828727
>>> epoch  6
 Average training MAE loss: 1.57
-
 Aver

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['appceleratorstudio'], 'test': ['mulestudio']} .....
>>> epoch  0
 Average training MAE loss: 19.97
-
 Average eval MAE loss: 4.98
MAE:  3.6080663
MdAE:  2.4341588
>>> epoch  1
 Average training MAE loss: 12.74
-
 Average eval MAE loss: 7.36
MAE:  3.8913705
MdAE:  3.126961
>>> epoch  2
 Average training MAE loss: 8.31
-
 Average eval MAE loss: 6.09
MAE:  3.6601543
MdAE:  2.7717974
>>> epoch  3
 Average training MAE loss: 4.79
-
 Average eval MAE loss: 6.20
MAE:  3.68953
MdAE:  2.548896
>>> epoch  4
 Average training MAE loss: 3.85
-
 Average eval MAE loss: 10.05
MAE:  4.1147146
MdAE:  3.2314801
>>> epoch  5
 Average training MAE loss: 2.64
-
 Average eval MAE loss: 5.47
MAE:  3.5572743
MdAE:  2.545262
>>> epoch  6
 Average training MAE loss: 2.10


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-1.7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cross project split!
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
using pretrained Qwen tokenizer
cross project data processing!
Start training for  {'train': ['appceleratorstudio'], 'test': ['mule']} .....
>>> epoch  0
 Average training MAE loss: 16.83
-
 Average eval MAE loss: 9.04
MAE:  3.5204232
MdAE:  3.4285717
>>> epoch  1
 Average training MAE loss: 12.45
-
 Average eval MAE loss: 5.28
MAE:  2.7992587
MdAE:  2.4242382
>>> epoch  2
 Average training MAE loss: 8.97
-
 Average eval MAE loss: 8.57
MAE:  3.15766
MdAE:  2.8181162
>>> epoch  3
 Average training MAE loss: 5.34
-
 Average eval MAE loss: 8.50
MAE:  3.4273376
MdAE:  2.9539943
>>> epoch  4
 Average training MAE loss: 3.95
-
 Average eval MAE loss: 7.39
MAE:  2.9087496
MdAE:  2.4951525
>>> epoch  5
 Average training MAE loss: 2.43
-
 Average eval MAE loss: 6.92
MAE:  2.928387
MdAE:  2.438219
>>> epoch  6
 Average training MAE loss: 2.11
-
 Ave