# Llama3SP - Model Inspection Notebook

#### Hello!  Welcome to the model inspection notebook, we provides the scripts for model loading and model inference on testing data of model training process for all of the models mentioned in our experiments in the paper. 

##### Attention!!!
##### Before interacting with this notebook, you may want to install a few dependencies [HERE](#dependencies).
##### Also, make sure to run the [Static Methods](#static-method) cell, then you are good to go
#### The models are categorized by the experiment scenario, please follow the link as follows to reach the specific section

### 1. [Within Project Models](#within_project)
### 2. [Cross Project Models](#cross_project)

<a id='dependencies'></a>
## Dependencies Installation
#### run the cell below to install the dependencies

In [None]:
%!pip install transformers
%!pip install peft
%!pip install torch
%!pip install tokenizers
%!pip install captum

<a id='static-method'></a>
## Static Methods

In [1]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from Llama3SP import LlamaForSequenceClassification as Llama3SP
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    XLNetTokenizer,
    BertTokenizer,
    BitsAndBytesConfig,
)
from peft import (
    PeftModel,
    PeftConfig,
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model,
)
from tokenizers import Tokenizer
from pathlib import Path

import torch
import pandas as pd
import numpy as np
import os
import gc
import shutil
import re

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# Tokenizer Mapping
TOKENIZER_MAPPING = {"#0": "llama3", "#00": "llama3", "#000": "llama3",
                     "#2": "sp_word_level", "#22": "sp_word_level", "#222": "sp_word_level",
                     "#6": "wordpiece_sp", "#66": "wordpiece_sp", "#666": "wordpiece_sp",
                     "#7": "sentencepiece_sp", "#77": "sentencepiece_sp", "#777": "sentencepiece_sp"}

# pad token ID mapping
PAD_TOKEN_ID_MAPPING = {"llama3": 128001, "sp_word_level": 3, "wordpiece_sp": 0, "sentencepiece_sp": 0}
# static global vars
global DYNAMIC_BATCH, DEVICE
DYNAMIC_BATCH = True
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    # set up to release cache memory when possible
    torch.cuda.empty_cache()
    # set up more conservative memory limits  
    torch.cuda.set_per_process_memory_fraction(0.8)  # Use only 80% of GPU memory
SEQUENCE_LEN = 20
# dynamic global vars
global PAD_TOKEN_ID, BATCH_SIZE_RATIO, BATCH_SIZE, TOKENIZER, WITHIN_PROJECT, TEXT, KEY, TOK, MODEL, PROJECT_ID
PAD_TOKEN_ID = None
BATCH_SIZE_RATIO = None
BATCH_SIZE = None
TOKENIZER = None
WITHIN_PROJECT = None
TEXT = None
KEY = None
TOK = None
MODEL = None
PROJECT_ID = None


def tokenization(text_list, path):
    global TOKENIZER, SEQUENCE_LEN, MODEL, TOK, PAD_TOKEN_ID
    
    if TOKENIZER == 'llama3':
        print('using llama3 tokenizer!')
        tokenizer = AutoTokenizer.from_pretrained(path)

        tokenizer.pad_token = tokenizer.eos_token
        MODEL.config.pad_token_id = MODEL.config.eos_token_id 
        
        # ensure that no sequence exceeds SEQUENCE_LEN
        encoded_dict = tokenizer.batch_encode_plus(
            text_list,
            max_length=SEQUENCE_LEN,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
            return_attention_mask=True,
            return_token_type_ids=False
        )
        
        # Verify dimensions
        if encoded_dict['input_ids'].shape[1] > SEQUENCE_LEN:
            print(f"Warning: Truncating sequences to {SEQUENCE_LEN} tokens")
            encoded_dict['input_ids'] = encoded_dict['input_ids'][:, :SEQUENCE_LEN]
            if 'attention_mask' in encoded_dict:
                encoded_dict['attention_mask'] = encoded_dict['attention_mask'][:, :SEQUENCE_LEN]
        
        TOK = tokenizer
        return encoded_dict
    elif TOKENIZER == 'sp_word_level':
        print('using word-level tokenizer!')
        tokenizer = Tokenizer.from_pretrained(path)
        encoded_sentences = {'input_ids':[]}
        for sentence in text_list:
            encoded = tokenizer.encode(sentence)
            encoded = encoded.ids
            if len(encoded) > SEQUENCE_LEN:
                encoded = encoded[:SEQUENCE_LEN]
            elif len(encoded) < SEQUENCE_LEN:
                padding = SEQUENCE_LEN - len(encoded)
                for _ in range(padding):
                    encoded.append(3)
            encoded_sentences['input_ids'].append(encoded)
        tokenizer.pad_token_id = PAD_TOKEN_ID
        MODEL.config.pad_token_id = PAD_TOKEN_ID

        TOK = tokenizer
        return encoded_sentences
    elif TOKENIZER == 'sentencepiece_sp':
        print('using sentencepiece tokenizer!')
        tokenizer = XLNetTokenizer('all_tokenizers/sp_sentence_piece/spm_tokenizer.model', padding_side='right')

        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token
        # update some model configs
        # must use .cache = False as below or it crashes from my experience
        MODEL.config.pad_token_id = tokenizer.pad_token_id
        MODEL.config.use_cache = False
        MODEL.config.pretraining_tp = 1
        return tokenizer.batch_encode_plus(text_list, truncation=True, max_length=SEQUENCE_LEN, padding='max_length')
    elif TOKENIZER == 'wordpiece_sp':
        print('using wordpiece tokenizer!')
        tokenizer = BertTokenizer('all_tokenizers/sp_word_piece/vocab.txt')

        MODEL.config.pad_token_id = tokenizer.pad_token_id
        MODEL.config.use_cache = False
        MODEL.config.pretraining_tp = 1
        return tokenizer.batch_encode_plus(text_list, truncation=True, max_length=SEQUENCE_LEN, padding='max_length')


def prepare_dataframe(file_name):
    data = pd.read_csv(file_name)
    # some rows have no description, fill blank to avoid Null
    data = data.fillna(' ')
    d = {'text': data['title'], 'label': data['storypoint'], 'issuekey': data['issuekey']}
    return pd.DataFrame(data=d)


def prepare_dataloader(seq, y, sampler_type):
    global BATCH_SIZE
    tensor_dataset = TensorDataset(seq, y)
    if sampler_type == 'random':
        sampler = RandomSampler(tensor_dataset)
    elif sampler_type == 'sequential':
        sampler = SequentialSampler(tensor_dataset)
    dataloader = DataLoader(tensor_dataset, sampler=sampler, batch_size=BATCH_SIZE)
    return dataloader


def load_trained_model(model_id, project_name):
    global WITHIN_PROJECT, MODEL, PROJECT_ID

    local = False
    try:
        int(model_id[1:])
        local = False
    except:
        local = True

    if local:
        print("Loading model from local...")

        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type='nf4',
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.float16,
        )
        lora_config = LoraConfig(
            r=8,
            lora_alpha=16,
            target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],
            lora_dropout=0.1,
            bias='none',
            task_type='SEQ_CLS'
        )
        HF_MODEL_NAME = "meta-llama/Llama-3.2-1B"
        MODEL = AutoModelForSequenceClassification.from_pretrained(
            HF_MODEL_NAME,
            quantization_config=quantization_config,
            num_labels=1,
            torch_dtype=torch.float16,
            device_map='auto',
            low_cpu_mem_usage=True,
            pad_token_id=PAD_TOKEN_ID,
        )
        MODEL = prepare_model_for_kbit_training(MODEL)
        MODEL = get_peft_model(MODEL, lora_config)
        MODEL.gradient_checkpointing_enable()
        MODEL.enable_input_require_grads()

        state_dict = torch.load(model_id, map_location=DEVICE, weights_only=True)
        MODEL.load_state_dict(state_dict, strict=False)

    else:
        print("Loading model from Hugging Face...")

        if WITHIN_PROJECT:
            path = "DEVCamiloSepulveda/" + model_id[1:] + "-LLAMA3SP-" + project_name
        else:
            path = "DEVCamiloSepulveda/" + model_id[1:] + "-LLAMA3SP-" + project_name.split("_")[0] + "-" + project_name.split("_")[1]
        PROJECT_ID = path

        # Load the model configuration
        config = PeftConfig.from_pretrained(path)

        # Load the original base model
        base_model = AutoModelForSequenceClassification.from_pretrained(
            config.base_model_name_or_path,
            num_labels=1,
            torch_dtype=torch.float16,
            device_map='auto'
        )

        # Load the LoRA adapters
        MODEL = PeftModel.from_pretrained(base_model, path)
    
    return MODEL


def prepare_test_dataloader(file_name, model_id, project_name):
    global WITHIN_PROJECT, BATCH_SIZE, BATCH_SIZE_RATIO, TEXT, KEY, MODEL

    global WITHIN_PROJECT, MODEL
    if WITHIN_PROJECT:
        path = "DEVCamiloSepulveda/" + model_id[1:] + "-LLAMA3SP-" + project_name
    else:
        path = "DEVCamiloSepulveda/" + model_id[1:] + "-LLAMA3SP-" + project_name.split("_")[0] + "-" + project_name.split("_")[1]
    path = "meta-llama/Llama-3.2-1B"

    if WITHIN_PROJECT:
        # calculate the batch size
        df = prepare_dataframe(file_name)
        BATCH_SIZE = min(int(int(len(df['text'][:int(len(df)*0.6)])) * BATCH_SIZE_RATIO), 512)
        print("Batch Size: ", BATCH_SIZE)
        # prepare testing data
        test_text = df['text'][int(len(df)*0.8):]
        TEXT = test_text
        KEY = df['issuekey'][int(len(df)*0.8):]
        test_labels = df['label'][int(len(df)*0.8):]
        tokens_test = tokenization(test_text.tolist(), path)
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')
    else:
        # calculate the batch size based on training data
        df = prepare_dataframe('sp_dataset/marked_data/' + file_name[0] + '.csv')
        BATCH_SIZE = min(int(int(len(df['text'][:int(len(df)*0.6)])) * BATCH_SIZE_RATIO), 512)
        # prepare testing data
        df = prepare_dataframe('sp_dataset/marked_data/' + file_name[1] + '.csv')
        test_text = df['text']
        TEXT = test_text
        KEY = df['issuekey']
        test_labels = df['label']
        tokens_test = tokenization(test_text.tolist(), path)
        test_seq = torch.tensor(tokens_test['input_ids'])
        test_y = torch.tensor(test_labels.tolist()).type(torch.LongTensor)
        test_dataloader = prepare_dataloader(test_seq, test_y, sampler_type='sequential')         
    return test_dataloader
 

def do_inference(trained_model, test_dataloader):
    global TEXT, KEY
    global XAI
    predictions = []
    true_labels = []
    try:
        for batch in test_dataloader:
            batch = tuple(t.to(DEVICE) for t in batch)
            b_input_ids, b_labels = batch
            with torch.no_grad():
                logits = trained_model(b_input_ids)
            logits = logits['logits'].detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            predictions.append(logits)
            true_labels.append(label_ids)
        # Calculate errors
        distance_records = []
        for i in range(len(predictions)):
            for j in range(len(predictions[i])):
                # Calculate absolute difference between prediction and true value
                distance = abs(predictions[i][j] - true_labels[i][j])
                distance_records.append(distance)
        
        # Convert to numpy array for efficient calculation
        distance_array = np.array(distance_records)
        
        # Calculate metrics
        MAE = np.mean(distance_array)
        MdAE = np.median(distance_array)

        return MAE, MdAE
        
    except Exception as e:
        print(f"Unexpected error in do_inference: {e}")
        import traceback
        traceback.print_exc()
        return None


def main(model_id, project_name):
    global WITHIN_PROJECT, BATCH_SIZE_RATIO, TOKENIZER, PAD_TOKEN_ID, MODEL
    # define tokenizer based on model ID
    try:
        TOKENIZER = TOKENIZER_MAPPING[model_id]
    except:
        TOKENIZER = 'llama3'
    PAD_TOKEN_ID = PAD_TOKEN_ID_MAPPING[TOKENIZER]

    if len(project_name.split('_')) == 1:
        WITHIN_PROJECT = True
        BATCH_SIZE_RATIO = 0.3
        print('within project inference using model ' + model_id + ' for project ' + project_name)
        file_name = 'sp_dataset/marked_data/' + project_name + '.csv'
    else:
        WITHIN_PROJECT = False
        BATCH_SIZE_RATIO = 0.4
        training_project = project_name.split('_')[0]
        testing_project = project_name.split('_')[1]
        print('cross project inference using model ' + model_id + ' trained on ' + training_project 
              + ' for project ' + testing_project)
        file_name = (training_project, testing_project)
    trained_model = load_trained_model(model_id, project_name)
    
    trained_model.to(DEVICE)
    trained_model.eval()
    
    test_dataloader = prepare_test_dataloader(file_name, model_id, project_name)
    predictions = do_inference(trained_model, test_dataloader)
    
    return predictions


def clean_hf_cache(model_id=None):
    """
    Cleans the cache of files downloaded from Hugging Face.
    
    Args:
        model_id (str, optional): Specific model ID to clean.
                                  If not specified, cleans the entire cache.
    """
    # Use the correct cache path
    cache_path = os.path.join(str(Path.home()), '.cache', 'huggingface', 'hub')
    
    if not os.path.exists(cache_path):
        print(f"Cache directory not found at: {cache_path}")
        return
        
    if model_id:
        # If a model is specified, look for its specific directory
        model_path = os.path.join(cache_path, 'models--' + model_id.replace('/', '--'))
        if os.path.exists(model_path):
            print(f"Deleting cache for model: {model_id}")
            try:
                shutil.rmtree(model_path)
                print(f"Cache successfully deleted for: {model_id}")
            except Exception as e:
                print(f"Error deleting cache: {e}")
        else:
            print(f"Cache not found for model: {model_id}")
    else:
        # Clean the entire cache
        print(f"Deleting all Hugging Face cache at: {cache_path}")
        try:
            for item in os.listdir(cache_path):
                item_path = os.path.join(cache_path, item)
                if os.path.isdir(item_path):
                    shutil.rmtree(item_path)
                else:
                    os.remove(item_path)
            print("Cache completely deleted")
        except Exception as e:
            print(f"Error deleting cache: {e}")


def clean_gpu_memory():
    """
    Clean GPU memory by releasing cache memory and unused tensors
    """
    global MODEL

    del MODEL

    # Release PyTorch cache memory
    torch.cuda.empty_cache()
    
    # Release tensor memory
    if torch.cuda.is_available():
        # Get the current device
        device = torch.cuda.current_device()
        
        # Synchronize the device to ensure all operations are complete
        torch.cuda.synchronize(device)
        
        # Force garbage collection
        gc.collect()
        
        # Release all tensors assigned to the device
        torch.cuda.empty_cache()
        
        # Reset all CUDA devices
        torch.cuda.reset_peak_memory_stats(device)
        torch.cuda.reset_accumulated_memory_stats(device)


def local_model_inference(models):
    maes = []
    for i, model in enumerate(models):
        train_project = model['train']
        test_project = model['test']
        model_name = f"{train_project}_{test_project}"
        # Open the file in results to upload the model
        with open(f"./results/{train_project}_{test_project}.txt", "r") as f:
            model_results = f.read()
            mae, mdae, training_time, epochs, batch_size = (
                float(re.search(r"MAE:\s*([\d.]+)", model_results).group(1)),
                float(re.search(r"MdAE:\s*([\d.]+)", model_results).group(1)),
                float(re.search(r"training time:\s*([\d.]+)", model_results).group(1)),
                int(re.search(r"Epochs:\s*(\d+)", model_results).group(1)),
                int(re.search(r"batch size:\s*(\d+)", model_results).group(1))
            )

        mae, mdae = main(f"./models/{model_name}_epo_{epochs}", train_project)
        mae = round(mae, 2)
        maes.append(mae)

        print(f"Model: {model_name}, MAE: {mae}, MdAE: {mdae}")

        clean_gpu_memory()
    return maes


def create_df(maes, models):
    data = [
        {'Project': train_proj, 'MAE': mae} 
        if (train_proj := model['train']) == (test_proj := model['test'])
        else {'Train': train_proj, 'Test': test_proj, 'MAE': mae}
        for model, mae in zip(models, maes)
    ]
    return pd.DataFrame(data)

In [2]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.set_per_process_memory_fraction(0.8)

PROJECTS = [
    [
        {'train': 'appceleratorstudio', 'test': 'appceleratorstudio'},
        {'train': 'aptanastudio', 'test': 'aptanastudio'},
        {'train': 'bamboo', 'test': 'bamboo'},
        {'train': 'clover', 'test': 'clover'},
        {'train': 'datamanagement', 'test': 'datamanagement'},
        {'train': 'duracloud', 'test': 'duracloud'},
        {'train': 'jirasoftware', 'test': 'jirasoftware'},
        {'train': 'mesos', 'test': 'mesos'},
        {'train': 'moodle', 'test': 'moodle'},
        {'train': 'mule', 'test': 'mule'},
        {'train': 'mulestudio', 'test': 'mulestudio'},
        {'train': 'springxd', 'test': 'springxd'},
        {'train': 'talenddataquality', 'test': 'talenddataquality'},
        {'train': 'talendesb', 'test': 'talendesb'},
        {'train': 'titanium', 'test': 'titanium'},
        {'train': 'usergrid', 'test': 'usergrid'},
    ],
    [
        {'train': 'mesos', 'test': 'usergrid'},
        {'train': 'usergrid', 'test': 'mesos'},
        {'train': 'appceleratorstudio', 'test': 'aptanastudio'},
        {'train': 'appceleratorstudio', 'test': 'titanium'},
        {'train': 'titanium', 'test': 'appceleratorstudio'},
        {'train': 'aptanastudio', 'test': 'titanium'},
        {'train': 'mule', 'test': 'mulestudio'},
        {'train': 'mulestudio', 'test': 'mule'}
    ],
    [
        {'train': 'clover', 'test': 'usergrid'},
        {'train': 'talendesb', 'test': 'mesos'},
        {'train': 'talenddataquality', 'test': 'aptanastudio'},
        {'train': 'mule', 'test': 'titanium'},
        {'train': 'talenddataquality', 'test': 'appceleratorstudio'},
        {'train': 'mulestudio', 'test': 'titanium'},
        {'train': 'appceleratorstudio', 'test': 'mulestudio'},
        {'train': 'appceleratorstudio', 'test': 'mule'}
    ]
]

MODELS = {
    "#0": "Llama3.2",
    "#2": "Llama3.2+SPWordLevel",
    "#6": "Llama3.2+SPWordPiece",
    "#7": "Llama3.2+SPSentencePiece"
}

WITHIN_PROJECTS = PROJECTS[0]
CROSS_PROJECTS = PROJECTS[1:]
ALL_CROSS_PROJECTS = [item for sublist in PROJECTS[1:] for item in sublist]

<a id='within_project'></a>
## Within Projects Models

#### There are two parts under Within Project Model section, follow the link to reach the section:
#### 1. [Training Process Inspection](#within_project_tb)
#### 2. [Model Testing](#within_project_model_testing)

#### Different models are available for cross project estimation as follows: 

#### #0 - Llama3.2 Auto Tokenizer + Llama3.2
#### #2 - Word-level Story Point Tokenizer + Llama3.2
#### #6 - WordPiece Story Point Tokenizer + Llama3.2
#### #7 - SentencePiece Story Point Tokenizer + Llama3.2 

<a id='within_project_model_testing'></a>
### Model Testing

##### Run the cell below to do inference on all testing datasets using all the uploaded models on Hugging Face

In [12]:
llama3_within_df = pd.DataFrame({'Project': [item['train'] for item in WITHIN_PROJECTS]}, dtype='float64')

for model in MODELS:
    maes = []
    for project in WITHIN_PROJECTS:
        print(f"Running inference for project: {project['train']}")
        mae, mdae = main(model, project['train'])
        mae = round(mae, 2)
        maes.append(mae)
        print(f"MAE for project {project['train']}: {mae}")
        model_id = f"DEVCamiloSepulveda/{model[1:]}-LLAMA3SP-{project['train']}"
        # clean_hf_cache(model_id)
        clean_gpu_memory()
    llama3_within_df[MODELS[model]] = maes
    llama3_within_df[MODELS[model]] = llama3_within_df[MODELS[model]].apply(lambda x: round(x, 2))

print("All projects processed")

llama3_within_df

  llama3_within_df = pd.DataFrame({'Project': [item['train'] for item in WITHIN_PROJECTS]}, dtype='float64')


Running inference for project: appceleratorstudio
within project inference using model #0 for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  525
using llama3 tokenizer!


  test_seq = torch.tensor(tokens_test['input_ids'])


MAE for project appceleratorstudio: 1.650390625
Running inference for project: aptanastudio
within project inference using model #0 for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using llama3 tokenizer!
MAE for project aptanastudio: 3.740234375
Running inference for project: bamboo
within project inference using model #0 for project bamboo
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  93
using llama3 tokenizer!
MAE for project bamboo: 1.099609375
Running inference for project: clover
within project inference using model #0 for project clover
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using llama3 tokenizer!
MAE for project clover: 4.078125
Running inference for project: datamanagement
within project inference using model #0 for project datamanagement
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  840
using llama3 tokenizer!
MAE for project datamanagement: 6.44921875
Running inference for project: duracloud
within project inference using model #0 for project duracloud
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  119
using llama3 tokenizer!
MAE for project duracloud: 1.0498046875
Running inference for project: jirasoftware
within project inference using model #0 for project jirasoftware
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  63
using llama3 tokenizer!
MAE for project jirasoftware: 2.05078125
Running inference for project: mesos
within project inference using model #0 for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using llama3 tokenizer!
MAE for project mesos: 1.3798828125
Running inference for project: moodle
within project inference using model #0 for project moodle
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  209
using llama3 tokenizer!
MAE for project moodle: 11.609375
Running inference for project: mule
within project inference using model #0 for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using llama3 tokenizer!
MAE for project mule: 2.650390625
Running inference for project: mulestudio
within project inference using model #0 for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using llama3 tokenizer!
MAE for project mulestudio: 3.69921875
Running inference for project: springxd
within project inference using model #0 for project springxd
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  634
using llama3 tokenizer!
MAE for project springxd: 2.0703125
Running inference for project: talenddataquality
within project inference using model #0 for project talenddataquality
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using llama3 tokenizer!
MAE for project talenddataquality: 3.7890625
Running inference for project: talendesb
within project inference using model #0 for project talendesb
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using llama3 tokenizer!
MAE for project talendesb: 1.080078125
Running inference for project: titanium
within project inference using model #0 for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using llama3 tokenizer!
MAE for project titanium: 2.4609375
Running inference for project: usergrid
within project inference using model #0 for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using llama3 tokenizer!
MAE for project usergrid: 1.509765625
Running inference for project: appceleratorstudio
within project inference using model #2 for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  525
using word-level tokenizer!
MAE for project appceleratorstudio: 1.5
Running inference for project: aptanastudio
within project inference using model #2 for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using word-level tokenizer!
MAE for project aptanastudio: 3.759765625
Running inference for project: bamboo
within project inference using model #2 for project bamboo
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  93
using word-level tokenizer!
MAE for project bamboo: 1.4296875
Running inference for project: clover
within project inference using model #2 for project clover
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using word-level tokenizer!
MAE for project clover: 4.8984375
Running inference for project: datamanagement
within project inference using model #2 for project datamanagement
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  840
using word-level tokenizer!
MAE for project datamanagement: 16.78125
Running inference for project: duracloud
within project inference using model #2 for project duracloud
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  119
using word-level tokenizer!
MAE for project duracloud: 1.3603515625
Running inference for project: jirasoftware
within project inference using model #2 for project jirasoftware
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  63
using word-level tokenizer!
MAE for project jirasoftware: 2.3203125
Running inference for project: mesos
within project inference using model #2 for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using word-level tokenizer!
MAE for project mesos: 1.509765625
Running inference for project: moodle
within project inference using model #2 for project moodle
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  209
using word-level tokenizer!
MAE for project moodle: 11.640625
Running inference for project: mule
within project inference using model #2 for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using word-level tokenizer!
MAE for project mule: 2.689453125
Running inference for project: mulestudio
within project inference using model #2 for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using word-level tokenizer!
MAE for project mulestudio: 3.560546875
Running inference for project: springxd
within project inference using model #2 for project springxd
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  634
using word-level tokenizer!
MAE for project springxd: 1.9296875
Running inference for project: talenddataquality
within project inference using model #2 for project talenddataquality
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using word-level tokenizer!
MAE for project talenddataquality: 4.671875
Running inference for project: talendesb
within project inference using model #2 for project talendesb
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using word-level tokenizer!
MAE for project talendesb: 1.7099609375
Running inference for project: titanium
within project inference using model #2 for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using word-level tokenizer!
MAE for project titanium: 2.33984375
Running inference for project: usergrid
within project inference using model #2 for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using word-level tokenizer!
MAE for project usergrid: 1.51953125
Running inference for project: appceleratorstudio
within project inference using model #6 for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  525
using wordpiece tokenizer!
MAE for project appceleratorstudio: 1.669921875
Running inference for project: aptanastudio
within project inference using model #6 for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using wordpiece tokenizer!
MAE for project aptanastudio: 4.0390625
Running inference for project: bamboo
within project inference using model #6 for project bamboo
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  93
using wordpiece tokenizer!
MAE for project bamboo: 1.169921875
Running inference for project: clover
within project inference using model #6 for project clover
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using wordpiece tokenizer!
MAE for project clover: 3.83984375
Running inference for project: datamanagement
within project inference using model #6 for project datamanagement
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  840
using wordpiece tokenizer!
MAE for project datamanagement: 6.78125
Running inference for project: duracloud
within project inference using model #6 for project duracloud
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  119
using wordpiece tokenizer!
MAE for project duracloud: 1.2900390625
Running inference for project: jirasoftware
within project inference using model #6 for project jirasoftware
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  63
using wordpiece tokenizer!
MAE for project jirasoftware: 2.130859375
Running inference for project: mesos
within project inference using model #6 for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using wordpiece tokenizer!
MAE for project mesos: 1.1904296875
Running inference for project: moodle
within project inference using model #6 for project moodle
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  209
using wordpiece tokenizer!
MAE for project moodle: 13.796875
Running inference for project: mule
within project inference using model #6 for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using wordpiece tokenizer!
MAE for project mule: 2.80078125
Running inference for project: mulestudio
within project inference using model #6 for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using wordpiece tokenizer!
MAE for project mulestudio: 3.83984375
Running inference for project: springxd
within project inference using model #6 for project springxd
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  634
using wordpiece tokenizer!
MAE for project springxd: 1.849609375
Running inference for project: talenddataquality
within project inference using model #6 for project talenddataquality
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using wordpiece tokenizer!
MAE for project talenddataquality: 3.759765625
Running inference for project: talendesb
within project inference using model #6 for project talendesb
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using wordpiece tokenizer!
MAE for project talendesb: 1.0498046875
Running inference for project: titanium
within project inference using model #6 for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using wordpiece tokenizer!
MAE for project titanium: 2.470703125
Running inference for project: usergrid
within project inference using model #6 for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using wordpiece tokenizer!
MAE for project usergrid: 1.3701171875
Running inference for project: appceleratorstudio
within project inference using model #7 for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  525
using sentencepiece tokenizer!
MAE for project appceleratorstudio: 1.6396484375
Running inference for project: aptanastudio
within project inference using model #7 for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using sentencepiece tokenizer!
MAE for project aptanastudio: 3.740234375
Running inference for project: bamboo
within project inference using model #7 for project bamboo
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  93
using sentencepiece tokenizer!
MAE for project bamboo: 0.97021484375
Running inference for project: clover
within project inference using model #7 for project clover
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using sentencepiece tokenizer!
MAE for project clover: 3.8203125
Running inference for project: datamanagement
within project inference using model #7 for project datamanagement
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  840
using sentencepiece tokenizer!
MAE for project datamanagement: 7.171875
Running inference for project: duracloud
within project inference using model #7 for project duracloud
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  119
using sentencepiece tokenizer!
MAE for project duracloud: 1.099609375
Running inference for project: jirasoftware
within project inference using model #7 for project jirasoftware
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  63
using sentencepiece tokenizer!
MAE for project jirasoftware: 1.900390625
Running inference for project: mesos
within project inference using model #7 for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using sentencepiece tokenizer!
MAE for project mesos: 1.2900390625
Running inference for project: moodle
within project inference using model #7 for project moodle
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  209
using sentencepiece tokenizer!
MAE for project moodle: 11.21875
Running inference for project: mule
within project inference using model #7 for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using sentencepiece tokenizer!
MAE for project mule: 2.509765625
Running inference for project: mulestudio
within project inference using model #7 for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using sentencepiece tokenizer!
MAE for project mulestudio: 3.810546875
Running inference for project: springxd
within project inference using model #7 for project springxd
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  634
using sentencepiece tokenizer!
MAE for project springxd: 1.7001953125
Running inference for project: talenddataquality
within project inference using model #7 for project talenddataquality
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using sentencepiece tokenizer!
MAE for project talenddataquality: 3.919921875
Running inference for project: talendesb
within project inference using model #7 for project talendesb
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using sentencepiece tokenizer!
MAE for project talendesb: 1.0
Running inference for project: titanium
within project inference using model #7 for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using sentencepiece tokenizer!
MAE for project titanium: 2.3203125
Running inference for project: usergrid
within project inference using model #7 for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using sentencepiece tokenizer!
MAE for project usergrid: 1.3701171875
All projects processed


Unnamed: 0,Project,Llama3.2,Llama3.2+SPWordLevel,Llama3.2+SPWordPiece,Llama3.2+SPSentencePiece
0,appceleratorstudio,1.65,1.5,1.67,1.64
1,aptanastudio,3.74,3.76,4.04,3.74
2,bamboo,1.1,1.43,1.17,0.97
3,clover,4.08,4.9,3.84,3.82
4,datamanagement,6.45,16.78,6.78,7.17
5,duracloud,1.05,1.36,1.29,1.1
6,jirasoftware,2.05,2.32,2.13,1.9
7,mesos,1.38,1.51,1.19,1.29
8,moodle,11.61,11.64,13.8,11.22
9,mule,2.65,2.69,2.8,2.51


##### Optional: Save the results

You can save the results of the DataFrame to a CSV file for further analysis or record-keeping. Run the following script to save the `llama3_within_df` DataFrame to a CSV file.

In [13]:
# Save the DataFrame to a CSV file
llama3_within_df.to_csv('./data_model_analysis/Llama3_within_results.csv', index=False)

print("CSV file 'Llama3_within_results.csv' created successfully.")

CSV file 'Llama3_within_results.csv' created successfully.


##### Run the cell below to do inference on testing dataset using **local** trained model on all within projects

In [19]:
print("Loading models from local on Within projects inference...")
        
within_maes = local_model_inference(WITHIN_PROJECTS)
within_df = create_df(within_maes, WITHIN_PROJECTS)

within_df

Loading models from local on Within projects inference...
within project inference using model ./models/appceleratorstudio_appceleratorstudio_epo_1 for project appceleratorstudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!


  test_seq = torch.tensor(tokens_test['input_ids'])


Model: appceleratorstudio_appceleratorstudio, MAE: 1.590000033378601, MdAE: 1.3363306522369385
within project inference using model ./models/aptanastudio_aptanastudio_epo_12 for project aptanastudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using llama3 tokenizer!
Model: aptanastudio_aptanastudio, MAE: 3.890000104904175, MdAE: 3.0960545539855957
within project inference using model ./models/bamboo_bamboo_epo_17 for project bamboo
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  93
using llama3 tokenizer!
Model: bamboo_bamboo, MAE: 1.1100000143051147, MdAE: 1.0073730945587158
within project inference using model ./models/clover_clover_epo_2 for project clover
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using llama3 tokenizer!
Model: clover_clover, MAE: 3.990000009536743, MdAE: 2.606503963470459
within project inference using model ./models/datamanagement_datamanagement_epo_1 for project datamanagement
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: datamanagement_datamanagement, MAE: 5.840000152587891, MdAE: 2.9360532760620117
within project inference using model ./models/duracloud_duracloud_epo_3 for project duracloud
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  119
using llama3 tokenizer!
Model: duracloud_duracloud, MAE: 1.0099999904632568, MdAE: 0.7023202776908875
within project inference using model ./models/jirasoftware_jirasoftware_epo_3 for project jirasoftware
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  63
using llama3 tokenizer!
Model: jirasoftware_jirasoftware, MAE: 2.380000114440918, MdAE: 1.8219866752624512
within project inference using model ./models/mesos_mesos_epo_0 for project mesos
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using llama3 tokenizer!
Model: mesos_mesos, MAE: 1.2699999809265137, MdAE: 1.0253846645355225
within project inference using model ./models/moodle_moodle_epo_17 for project moodle
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  209
using llama3 tokenizer!
Model: moodle_moodle, MAE: 13.899999618530273, MdAE: 12.567741394042969
within project inference using model ./models/mule_mule_epo_15 for project mule
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using llama3 tokenizer!
Model: mule_mule, MAE: 2.759999990463257, MdAE: 2.5467729568481445
within project inference using model ./models/mulestudio_mulestudio_epo_1 for project mulestudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using llama3 tokenizer!
Model: mulestudio_mulestudio, MAE: 3.9100000858306885, MdAE: 3.3148765563964844
within project inference using model ./models/springxd_springxd_epo_6 for project springxd
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: springxd_springxd, MAE: 1.8700000047683716, MdAE: 1.6935579776763916
within project inference using model ./models/talenddataquality_talenddataquality_epo_7 for project talenddataquality
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using llama3 tokenizer!
Model: talenddataquality_talenddataquality, MAE: 4.539999961853027, MdAE: 4.284450531005859
within project inference using model ./models/talendesb_talendesb_epo_7 for project talendesb
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using llama3 tokenizer!
Model: talendesb_talendesb, MAE: 1.059999942779541, MdAE: 0.805655300617218
within project inference using model ./models/titanium_titanium_epo_1 for project titanium
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using llama3 tokenizer!
Model: titanium_titanium, MAE: 2.5999999046325684, MdAE: 1.9763915538787842
within project inference using model ./models/usergrid_usergrid_epo_14 for project usergrid
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using llama3 tokenizer!
Model: usergrid_usergrid, MAE: 1.4700000286102295, MdAE: 1.0582633018493652


Unnamed: 0,Project,MAE
0,appceleratorstudio,1.59
1,aptanastudio,3.89
2,bamboo,1.11
3,clover,3.99
4,datamanagement,5.84
5,duracloud,1.01
6,jirasoftware,2.38
7,mesos,1.27
8,moodle,13.9
9,mule,2.76


##### Optional: Save the results

You can save the results of the DataFrame to a CSV file for further analysis or record-keeping. Run the following script to save the `within_df` DataFrame to a CSV file.

In [8]:
# Save the DataFrame to a CSV file
within_df.to_csv('./data_model_analysis/Llama3SP_within_results.csv', index=False)

print("CSV file 'within_project_results.csv' created successfully.")

CSV file 'within_project_results.csv' created successfully.


<a id='cross_project'></a>
## Cross Projects Models

#### There are two parts under Cross Project Model section, follow the link to reach the section:
#### 1. [Training Process Inspection](#cross_project_tb)
#### 2. [Model Testing](#cross_project_model_testing)

##### Different models are available for cross project estimation as follows:

### Cross project - within repository models
#### #00 - Llama3.2 Auto Tokenizer
#### #22 - Word-level Story Point Tokenizer + Llama3.2
#### #66 - WordPiece Story Point Tokenizer + Llama3.2
#### #77 - SentencePiece Story Point Tokenizer + Llama3.2  

### Cross project - cross repository models
#### #000 - Llama3.2 Auto Tokenizer
#### #222 - Word-level Story Point Tokenizer + Llama3.2
#### #666 - WordPiece Story Point Tokenizer + Llama3.2
#### #777 - SentencePiece Story Point Tokenizer + Llama3.2 

<a id='cross_project_model_testing'></a>
### Model Testing

##### Run the cell below to do inference on all testing datasets using all the uploaded models on Hugging Face

In [5]:
llama3_cross_df = pd.DataFrame(
    {
        'Train': [item['train'] for item in ALL_CROSS_PROJECTS],
        'Test': [item['test'] for item in ALL_CROSS_PROJECTS]
    }
    , dtype='float64'
)

for model in MODELS:
    maes = []
    for i, projects in enumerate(CROSS_PROJECTS):
        for project in projects:
            model_id = model
            caracter = model[1:]
            caracter = caracter * (i + 2)
            model_id = f"#{caracter}"

            print(f"Running inference for project trained on {project['train']} and tested on {project['test']}")
            mae, mdae = main(model_id, f"{project['train']}_{project['test']}" )
            mae = round(mae, 2)
            maes.append(mae)
            print(f"MAE for project {project['train']}: {mae}")
            hf_model_id = f"DEVCamiloSepulveda/{model[1:]}-LLAMA3SP-{project['train']}"
            # clean_hf_cache(hf_model_id)
            clean_gpu_memory()
    llama3_cross_df[MODELS[model]] = maes
    llama3_cross_df[MODELS[model]] = llama3_cross_df[MODELS[model]].apply(lambda x: round(x, 2))

print("All projects processed")

llama3_cross_df

  llama3_cross_df = pd.DataFrame(


Running inference for project trained on mesos and tested on usergrid
cross project inference using model #00 trained on mesos for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!


  test_seq = torch.tensor(tokens_test['input_ids'])


MAE for project mesos: 1.33984375
Running inference for project trained on usergrid and tested on mesos
cross project inference using model #00 trained on usergrid for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project usergrid: 1.740234375
Running inference for project trained on appceleratorstudio and tested on aptanastudio
cross project inference using model #00 trained on appceleratorstudio for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project appceleratorstudio: 4.359375
Running inference for project trained on appceleratorstudio and tested on titanium
cross project inference using model #00 trained on appceleratorstudio for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project appceleratorstudio: 3.359375
Running inference for project trained on titanium and tested on appceleratorstudio
cross project inference using model #00 trained on titanium for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project titanium: 2.55078125
Running inference for project trained on aptanastudio and tested on titanium
cross project inference using model #00 trained on aptanastudio for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project aptanastudio: 3.779296875
Running inference for project trained on mule and tested on mulestudio
cross project inference using model #00 trained on mule for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project mule: 3.5703125
Running inference for project trained on mulestudio and tested on mule
cross project inference using model #00 trained on mulestudio for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project mulestudio: 2.98046875
Running inference for project trained on clover and tested on usergrid
cross project inference using model #000 trained on clover for project usergrid
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project clover: 2.01953125
Running inference for project trained on talendesb and tested on mesos
cross project inference using model #000 trained on talendesb for project mesos
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project talendesb: 1.51953125
Running inference for project trained on talenddataquality and tested on aptanastudio
cross project inference using model #000 trained on talenddataquality for project aptanastudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project talenddataquality: 4.53125
Running inference for project trained on mule and tested on titanium
cross project inference using model #000 trained on mule for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project mule: 3.48046875
Running inference for project trained on talenddataquality and tested on appceleratorstudio
cross project inference using model #000 trained on talenddataquality for project appceleratorstudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project talenddataquality: 2.720703125
Running inference for project trained on mulestudio and tested on titanium
cross project inference using model #000 trained on mulestudio for project titanium
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project mulestudio: 3.73046875
Running inference for project trained on appceleratorstudio and tested on mulestudio
cross project inference using model #000 trained on appceleratorstudio for project mulestudio
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project appceleratorstudio: 3.41015625
Running inference for project trained on appceleratorstudio and tested on mule
cross project inference using model #000 trained on appceleratorstudio for project mule
Loading model from Hugging Face...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


using llama3 tokenizer!
MAE for project appceleratorstudio: 2.900390625
Running inference for project trained on mesos and tested on usergrid
cross project inference using model #22 trained on mesos for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project mesos: 1.400390625
Running inference for project trained on usergrid and tested on mesos
cross project inference using model #22 trained on usergrid for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project usergrid: 1.73046875
Running inference for project trained on appceleratorstudio and tested on aptanastudio
cross project inference using model #22 trained on appceleratorstudio for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project appceleratorstudio: 5.21875
Running inference for project trained on appceleratorstudio and tested on titanium
cross project inference using model #22 trained on appceleratorstudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project appceleratorstudio: 3.759765625
Running inference for project trained on titanium and tested on appceleratorstudio
cross project inference using model #22 trained on titanium for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project titanium: 2.640625
Running inference for project trained on aptanastudio and tested on titanium
cross project inference using model #22 trained on aptanastudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project aptanastudio: 3.640625
Running inference for project trained on mule and tested on mulestudio
cross project inference using model #22 trained on mule for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project mule: 4.0703125
Running inference for project trained on mulestudio and tested on mule
cross project inference using model #22 trained on mulestudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project mulestudio: 3.0703125
Running inference for project trained on clover and tested on usergrid
cross project inference using model #222 trained on clover for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project clover: 2.529296875
Running inference for project trained on talendesb and tested on mesos
cross project inference using model #222 trained on talendesb for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project talendesb: 1.9501953125
Running inference for project trained on talenddataquality and tested on aptanastudio
cross project inference using model #222 trained on talenddataquality for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project talenddataquality: 4.69140625
Running inference for project trained on mule and tested on titanium
cross project inference using model #222 trained on mule for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project mule: 3.6796875
Running inference for project trained on talenddataquality and tested on appceleratorstudio
cross project inference using model #222 trained on talenddataquality for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project talenddataquality: 2.66015625
Running inference for project trained on mulestudio and tested on titanium
cross project inference using model #222 trained on mulestudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project mulestudio: 3.73046875
Running inference for project trained on appceleratorstudio and tested on mulestudio
cross project inference using model #222 trained on appceleratorstudio for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project appceleratorstudio: 3.5390625
Running inference for project trained on appceleratorstudio and tested on mule
cross project inference using model #222 trained on appceleratorstudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using word-level tokenizer!
MAE for project appceleratorstudio: 2.890625
Running inference for project trained on mesos and tested on usergrid
cross project inference using model #66 trained on mesos for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project mesos: 1.3798828125
Running inference for project trained on usergrid and tested on mesos
cross project inference using model #66 trained on usergrid for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project usergrid: 1.599609375
Running inference for project trained on appceleratorstudio and tested on aptanastudio
cross project inference using model #66 trained on appceleratorstudio for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project appceleratorstudio: 4.28125
Running inference for project trained on appceleratorstudio and tested on titanium
cross project inference using model #66 trained on appceleratorstudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project appceleratorstudio: 3.400390625
Running inference for project trained on titanium and tested on appceleratorstudio
cross project inference using model #66 trained on titanium for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project titanium: 2.359375
Running inference for project trained on aptanastudio and tested on titanium
cross project inference using model #66 trained on aptanastudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project aptanastudio: 4.109375
Running inference for project trained on mule and tested on mulestudio
cross project inference using model #66 trained on mule for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project mule: 3.4296875
Running inference for project trained on mulestudio and tested on mule
cross project inference using model #66 trained on mulestudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project mulestudio: 3.080078125
Running inference for project trained on clover and tested on usergrid
cross project inference using model #666 trained on clover for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project clover: 2.0703125
Running inference for project trained on talendesb and tested on mesos
cross project inference using model #666 trained on talendesb for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project talendesb: 1.599609375
Running inference for project trained on talenddataquality and tested on aptanastudio
cross project inference using model #666 trained on talenddataquality for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project talenddataquality: 4.5390625
Running inference for project trained on mule and tested on titanium
cross project inference using model #666 trained on mule for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project mule: 3.33984375
Running inference for project trained on talenddataquality and tested on appceleratorstudio
cross project inference using model #666 trained on talenddataquality for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project talenddataquality: 2.98046875
Running inference for project trained on mulestudio and tested on titanium
cross project inference using model #666 trained on mulestudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project mulestudio: 3.509765625
Running inference for project trained on appceleratorstudio and tested on mulestudio
cross project inference using model #666 trained on appceleratorstudio for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project appceleratorstudio: 3.390625
Running inference for project trained on appceleratorstudio and tested on mule
cross project inference using model #666 trained on appceleratorstudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using wordpiece tokenizer!
MAE for project appceleratorstudio: 3.009765625
Running inference for project trained on mesos and tested on usergrid
cross project inference using model #77 trained on mesos for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project mesos: 1.0498046875
Running inference for project trained on usergrid and tested on mesos
cross project inference using model #77 trained on usergrid for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project usergrid: 1.5595703125
Running inference for project trained on appceleratorstudio and tested on aptanastudio
cross project inference using model #77 trained on appceleratorstudio for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project appceleratorstudio: 4.30859375
Running inference for project trained on appceleratorstudio and tested on titanium
cross project inference using model #77 trained on appceleratorstudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project appceleratorstudio: 3.30078125
Running inference for project trained on titanium and tested on appceleratorstudio
cross project inference using model #77 trained on titanium for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project titanium: 2.359375
Running inference for project trained on aptanastudio and tested on titanium
cross project inference using model #77 trained on aptanastudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project aptanastudio: 3.779296875
Running inference for project trained on mule and tested on mulestudio
cross project inference using model #77 trained on mule for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project mule: 3.650390625
Running inference for project trained on mulestudio and tested on mule
cross project inference using model #77 trained on mulestudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project mulestudio: 3.310546875
Running inference for project trained on clover and tested on usergrid
cross project inference using model #777 trained on clover for project usergrid
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project clover: 1.48046875
Running inference for project trained on talendesb and tested on mesos
cross project inference using model #777 trained on talendesb for project mesos
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project talendesb: 1.66015625
Running inference for project trained on talenddataquality and tested on aptanastudio
cross project inference using model #777 trained on talenddataquality for project aptanastudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project talenddataquality: 4.73046875
Running inference for project trained on mule and tested on titanium
cross project inference using model #777 trained on mule for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project mule: 3.310546875
Running inference for project trained on talenddataquality and tested on appceleratorstudio
cross project inference using model #777 trained on talenddataquality for project appceleratorstudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project talenddataquality: 2.490234375
Running inference for project trained on mulestudio and tested on titanium
cross project inference using model #777 trained on mulestudio for project titanium
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project mulestudio: 3.970703125
Running inference for project trained on appceleratorstudio and tested on mulestudio
cross project inference using model #777 trained on appceleratorstudio for project mulestudio
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project appceleratorstudio: 3.529296875
Running inference for project trained on appceleratorstudio and tested on mule
cross project inference using model #777 trained on appceleratorstudio for project mule
Loading model from Hugging Face...


adapter_config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/6.84M [00:00<?, ?B/s]

using sentencepiece tokenizer!
MAE for project appceleratorstudio: 2.76953125
All projects processed


Unnamed: 0,Train,Test,Llama3.2,Llama3.2+SPWordLevel,Llama3.2+SPWordPiece,Llama3.2+SPSentencePiece
0,mesos,usergrid,1.34,1.4,1.38,1.05
1,usergrid,mesos,1.74,1.73,1.6,1.56
2,appceleratorstudio,aptanastudio,4.36,5.22,4.28,4.31
3,appceleratorstudio,titanium,3.36,3.76,3.4,3.3
4,titanium,appceleratorstudio,2.55,2.64,2.36,2.36
5,aptanastudio,titanium,3.78,3.64,4.11,3.78
6,mule,mulestudio,3.57,4.07,3.43,3.65
7,mulestudio,mule,2.98,3.07,3.08,3.31
8,clover,usergrid,2.02,2.53,2.07,1.48
9,talendesb,mesos,1.52,1.95,1.6,1.66


##### Optional: Save the results

You can save the results of the DataFrame to a CSV file for further analysis or record-keeping. Run the following script to save the `llama3_cross_df` DataFrame to a CSV file.

In [6]:
# Save the DataFrtaFrame to a CSV file
llama3_cross_df.to_csv('./data_model_analysis/Llama3_cross_results.csv', index=False)

print("CSV file 'Llama3_cross_results.csv' created successfully.")

CSV file 'Llama3_cross_results.csv' created successfully.


##### Run the cell below to do inference on testing dataset using **local** trained model on all cross projects

In [6]:
print("Loading models from local on Cross projects inference...")

cross_projects = [item for sublist in PROJECTS[1:] for item in sublist]
cross_maes = local_model_inference(cross_projects)
cross_df = create_df(cross_maes, cross_projects)

cross_df

Loading models from local on Cross projects inference...
within project inference using model ./models/mesos_usergrid_epo_6 for project mesos
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  302
using llama3 tokenizer!


  test_seq = torch.tensor(tokens_test['input_ids'])


Model: mesos_usergrid, MAE: 1.3700000047683716, MdAE: 1.0562742948532104
within project inference using model ./models/usergrid_mesos_epo_4 for project usergrid
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  86
using llama3 tokenizer!
Model: usergrid_mesos, MAE: 1.4500000476837158, MdAE: 1.2023036479949951
within project inference using model ./models/appceleratorstudio_aptanastudio_epo_0 for project appceleratorstudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: appceleratorstudio_aptanastudio, MAE: 1.5299999713897705, MdAE: 1.2283222675323486
within project inference using model ./models/appceleratorstudio_titanium_epo_0 for project appceleratorstudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: appceleratorstudio_titanium, MAE: 1.7000000476837158, MdAE: 1.4522945880889893
within project inference using model ./models/titanium_appceleratorstudio_epo_0 for project titanium
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  405
using llama3 tokenizer!
Model: titanium_appceleratorstudio, MAE: 2.6500000953674316, MdAE: 2.1921749114990234
within project inference using model ./models/aptanastudio_titanium_epo_2 for project aptanastudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  149
using llama3 tokenizer!
Model: aptanastudio_titanium, MAE: 3.4800000190734863, MdAE: 2.7980830669403076
within project inference using model ./models/mule_mulestudio_epo_16 for project mule
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using llama3 tokenizer!
Model: mule_mulestudio, MAE: 2.7300000190734863, MdAE: 2.5237152576446533
within project inference using model ./models/mulestudio_mule_epo_6 for project mulestudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using llama3 tokenizer!
Model: mulestudio_mule, MAE: 3.869999885559082, MdAE: 3.0381665229797363
within project inference using model ./models/clover_usergrid_epo_8 for project clover
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  69
using llama3 tokenizer!
Model: clover_usergrid, MAE: 4.170000076293945, MdAE: 2.5466246604919434
within project inference using model ./models/talendesb_mesos_epo_15 for project talendesb
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  156
using llama3 tokenizer!
Model: talendesb_mesos, MAE: 1.0700000524520874, MdAE: 0.8679754734039307
within project inference using model ./models/talenddataquality_aptanastudio_epo_3 for project talenddataquality
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using llama3 tokenizer!
Model: talenddataquality_aptanastudio, MAE: 3.7100000381469727, MdAE: 3.401155471801758
within project inference using model ./models/mule_titanium_epo_13 for project mule
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  159
using llama3 tokenizer!
Model: mule_titanium, MAE: 2.680000066757202, MdAE: 2.3301451206207275
within project inference using model ./models/talenddataquality_appceleratorstudio_epo_0 for project talenddataquality
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  248
using llama3 tokenizer!
Model: talenddataquality_appceleratorstudio, MAE: 3.9200000762939453, MdAE: 3.9146008491516113
within project inference using model ./models/mulestudio_titanium_epo_1 for project mulestudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  131
using llama3 tokenizer!
Model: mulestudio_titanium, MAE: 3.8499999046325684, MdAE: 3.1191020011901855
within project inference using model ./models/appceleratorstudio_mulestudio_epo_0 for project appceleratorstudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: appceleratorstudio_mulestudio, MAE: 1.7400000095367432, MdAE: 1.5412847995758057
within project inference using model ./models/appceleratorstudio_mule_epo_2 for project appceleratorstudio
Loading model from local...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batch Size:  512
using llama3 tokenizer!
Model: appceleratorstudio_mule, MAE: 1.909999966621399, MdAE: 1.7131648063659668


Unnamed: 0,Train Project,Test Project,MAE
0,mesos,usergrid,1.37
1,usergrid,mesos,1.45
2,appceleratorstudio,aptanastudio,1.53
3,appceleratorstudio,titanium,1.7
4,titanium,appceleratorstudio,2.65
5,aptanastudio,titanium,3.48
6,mule,mulestudio,2.73
7,mulestudio,mule,3.87
8,clover,usergrid,4.17
9,talendesb,mesos,1.07


##### Optional: Save the results

You can save the results of the DataFrame to a CSV file for further analysis or record-keeping. Run the following script to save the `cross_df` DataFrame to a CSV file.

In [7]:
# Save the DataFrtaFrame to a CSV file
cross_df.to_csv('./data_model_analysis/Llama3SP_cross_results.csv', index=False)

print("CSV file 'Llama3SP_cross_results.csv' created successfully.")

CSV file 'cross_project_results.csv' created successfully.
