In [1]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM, TaskType, PeftModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments, logging, pipeline
from torch import cuda, bfloat16
import transformers
from textwrap import dedent
from datasets import Dataset, load_dataset
import warnings
from metrics import  calculate_metrics, calculate_metrics2, calc_mets_my
import gc
import time
from trl import SFTConfig, SFTTrainer
import os

In [2]:
PROJECT = "Llama3-8B-QLora-FineTune-Omni"
MODEL_NAME = 'meta-llama/Meta-Llama-3-8B-Instruct'

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


model_config = transformers.AutoConfig.from_pretrained(
    MODEL_NAME,
    token=True
)

In [4]:
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=[
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.gate_proj",
        "mlp.up_proj",
        "mlp.down_proj",
    ],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)


In [5]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for  param in model.parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [6]:
model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        config=model_config,
        quantization_config=bnb_config,
        device_map='auto',
        token=True
    )
print("loaded model........")




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........


In [7]:
print_trainable_parameters(model)

trainable params: 1050939392 || all params: 4540600320 || trainable%: 23.145384264959926


In [8]:
num_parameters = sum(p.numel() for p in model.parameters())
print(num_parameters)

4540600320


In [9]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

In [10]:
print_trainable_parameters(model)

trainable params: 167772160 || all params: 4708372480 || trainable%: 3.5632728870252848


In [11]:
def format_test(row: dict, scenario='fine-tune'):
    prompt = dedent(
        f"""
        Place 1: '{row["e1"]}'
        Place 2: '{row["e2"]}'
        
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Do the two place descriptions refer to the same real-world place? Answer with 'Yes' if they do and 'No' if they do not.",
        },
        {"role": "user", "content": prompt},
    ]
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer: "
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [12]:
def format_test_distance(row, scenario="fine-tune"):
    prompt = dedent(
        f"""
        Place1: '{row["e1"]}'
        Place2: '{row["e2"]}'
        Distance: {row['distance']}
    
    """
    )
    messages = [
        {
            "role": "system",
            "content":  "Two place descriptions and the geographic distance between them are provided. Do the two place descriptions refer to the same real-world place? Answer with 'Yes' if they do and 'No' if they do not.",
        },
        {"role": "user", "content": prompt},
    ]
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer:"
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [13]:
def format_test_gtminer(row: dict, scenario):
    prompt = dedent(
        f"""
    Place 1: '{row["e1"]}'
    Place 2: '{row["e2"]}'
    
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.",
        },
        {"role": "user", "content": prompt},
    ]
    
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer:"
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [14]:
def format_test_gtminer_simple(row: dict, scenario):
    prompt = dedent(
        f"""
    Place 1: '{row["e1"]}'
    Place 2: '{row["e2"]}'
    
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Two place descriptions are provided. Predict the relation between them. Answer only with ‘same_as’, ‘part_of’, ‘serves’ or ‘unknown’.",
        },
        {"role": "user", "content": prompt},
    ]
    
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer:"
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [15]:
def format_test_gtminer_distance(row: dict, scenario="fine-tune"):
    prompt = dedent(
        f"""
    Place 1: '{row["e1"]}'
    Place 2: '{row["e2"]}'
    Distance: {row["distance"]}
    Answer only with: same_as, part_of, serves, unknown
    
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Two place descriptions and the geographic distance between them is provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.",
        },
        {"role": "user", "content": prompt},
    ]
    
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer:"
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [16]:
def format_test_gtminer2(row: dict, scenario="fine-tune"):
    prompt = dedent(
        f"""
        Place 1: '{row["e1"]}'
        Place 2: '{row["e2"]}'
        Answer only with: same_as, part_of, serves, unknown
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.",
        },
        {"role": "user", "content": prompt},
    ]
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer: "
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [17]:
def format_test_gtminer3(row: dict, scenario="fine-tune"):
    prompt = dedent(
        f"""
    Place 1: '{row["e1"]}'
    Place 2: '{row["e2"]}'
    Answer only with: same-as, part-of, serves, unknown
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Two place descriptions are provided. Predict the relation between the two places.",
        },
        {"role": "user", "content": prompt},
    ]
    
    if scenario=="zero":
        full_prompt = messages[0]["content"] + prompt + "Answer: "
        return full_prompt
    else:
        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [18]:
# Set to True if you want to save the finetuned model. Set to False otherwise.
SAVE_FINE_TUNED_MODEL = True

In [19]:
# Select prompt to test zero shot. Select between "simple", "attribute_val", "plm" and "attribute_value_dist"
PROMPT_TO_USE = "attribute_val"

In [20]:
dataset_folder_path = ['datasets2\\NZER_'+ PROMPT_TO_USE+ '\\hope\\', 
                       'datasets2\\NZER_'+ PROMPT_TO_USE+ '\\norse\\',
                       'datasets\\NZER_'+ PROMPT_TO_USE+ '\\palm\\', 
                       'datasets\\NZER_'+ PROMPT_TO_USE+ '\\north\\', 
                       'datasets2\\NZER_'+ PROMPT_TO_USE+ '\\auck\\',
                       'datasets\\GEOD_OSM_FSQ_'+ PROMPT_TO_USE+ '\\edi\\', 
                       'datasets\\GEOD_OSM_FSQ_'+ PROMPT_TO_USE+ '\\pit\\', 
                       'datasets\\GEOD_OSM_FSQ_'+ PROMPT_TO_USE+ '\\sin\\', 
                       'datasets\\GEOD_OSM_FSQ_'+ PROMPT_TO_USE+ '\\tor\\', 
                       'datasets\\GEOD_OSM_YELP_'+ PROMPT_TO_USE+ '\\edi\\', 
                       'datasets\\GEOD_OSM_YELP_'+ PROMPT_TO_USE+ '\\pit\\', 
                       'datasets\\GEOD_OSM_YELP_'+ PROMPT_TO_USE+ '\\sin\\', 
                       'datasets\\GEOD_OSM_YELP_'+ PROMPT_TO_USE+ '\\tor\\', 
                       'datasets\\SGN_'+PROMPT_TO_USE+'\\swiss\\']

In [None]:
logging.set_verbosity_error()
for dataset_folder in dataset_folder_path:
 
    warnings.filterwarnings("ignore")
    
    print(dataset_folder.split("\\"))
    dataset_output_path_1, dataset_output_path_2 = dataset_folder.split("\\")[-3], dataset_folder.split("\\")[-2]
        
    dataset = load_dataset(
        "json",
        data_files={"train": dataset_folder+"train.json", "valid": dataset_folder+"valid.json", "test": dataset_folder+"test.json"},
    )
    print("successfully loaded dataset.......")
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        config=model_config,
        quantization_config=bnb_config,
        device_map='auto',
        token=True
    )
    
    print("loaded model........")
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        MODEL_NAME,
        token=True
    )
    
    print("loaded tokenizer........")
    PAD_TOKEN = tokenizer.eos_token
    tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
    tokenizer.padding_side = "right"
    
    
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)
    
    print(dataset['train'][0]['text'])
    
    
    if SAVE_FINE_TUNED_MODEL:
        OUTPUT_DIR = "experiments\\"+ dataset_output_path_1 +"\\"+ dataset_output_path_2+"\\"
    
        os.makedirs(os.path.dirname(OUTPUT_DIR), exist_ok=True)
    
        sft_config = SFTConfig(
            output_dir=OUTPUT_DIR,
            dataset_text_field="text",
            max_seq_length=256,
            num_train_epochs=5,
            per_device_train_batch_size=6,
            per_device_eval_batch_size=4,
            gradient_accumulation_steps=4,
            logging_steps=10,
            learning_rate=1e-4,
            bf16=True,
            save_strategy="steps",
            warmup_ratio=0.1,
            save_total_limit=0,
            lr_scheduler_type="constant",
            save_safetensors=True,
            dataset_kwargs={
                "add_special_tokens": False,  
                "append_concat_token": False, 
            },
        )
    else:
        sft_config = SFTConfig(
            output_dir=OUTPUT_DIR,
            dataset_text_field="text",
            max_seq_length=256,
            num_train_epochs=5,
            per_device_train_batch_size=6,
            per_device_eval_batch_size=4,
            gradient_accumulation_steps=4,
            logging_steps=10,
            learning_rate=1e-4,
            bf16=True,
            save_strategy="no",
            warmup_ratio=0.1,
            lr_scheduler_type="constant",
            dataset_kwargs={
                "add_special_tokens": False,  
                "append_concat_token": False, 
            },
        )
        
    trainer = SFTTrainer(
        model=model,
        args=sft_config,
        train_dataset=dataset["train"],
        eval_dataset=dataset["valid"],
        tokenizer=tokenizer,
    )

    print("starting training...........")


    start_time_train = time.time()
    trainer.train()
    end_time_train = time.time()
    elapsed_time_train = end_time_train - start_time_train

    print('training completed....')
    
    if SAVE_FINE_TUNED_MODEL:
        trainer.save_model(OUTPUT_DIR)
        print('model saved .........')
    
    if PROMPT_TO_USE =="attribute_value_dist":
        test_prompts = [format_test_distance(x) for x in dataset['test']]
    else:
        test_prompts = [format_test(x) for x in dataset['test']]
    
    print(test_prompts[0])
    
    model.eval()
    
    batch_size=10
    results=[]
    start_time_test = time.time()
    with torch.no_grad():
        # for i in range(0, len(test_prompts), batch_size):
        for prompt in test_prompts:
                inputs = tokenizer(prompt, return_tensors="pt").to(device='cuda')
                # outputs = model.pipeline(inputs.input_ids)
                # batch = test_prompts[i:i + batch_size]
                # inputs = tokenizer(batch, return_tensors="pt", truncation=True,padding=True).to(device='cuda')
                outputs = model.generate(
                    inputs.input_ids, 
                    max_length=256,  # Maximum length of the generated text
                    max_new_tokens= 1,
                    num_return_sequences=1,  # Number of sequences to generate
                    no_repeat_ngram_size=2,  # Avoid repeating phrases
                    temperature=0.01,  # Controls randomness; lower is less random
                    top_k=50,  # Top-k sampling
                )
                prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
                # prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)
                # prediction = tokenizer.decode(outputs[:, inputs.shape[1]:])
                # results.extend([x.strip() for x in prediction])
                results.append(prediction.strip())

    end_time_test = time.time()
    elapsed_time_test = end_time_test - start_time_test
    
    print("testing completed........")
    
    # predictions = [x.split(" ")[-1].strip() for x in results]
    predictions = [x.split("\n")[-1].strip() for x in results]
    
    predictions = [1 if label == "Yes" else 0 if label == "No" else 2 for label in predictions]
    # print(predictions)
    labels = [1 if label == "Yes" else 0 for label in dataset['test']['answer']]
    # print(labels)
    print(dataset_folder.split("\\"))
    
    try:
        my_mets = calc_mets_my(predictions, labels)
        print(my_mets)
        
    except Exception as e:
        print(e)
        print('my calc failed')
        my_mets = 'my calc failed'
    
    # try:
    #     bin_mets = calculate_metrics(predictions, labels, 'binary')
    #     print(bin_mets)
    # except Exception as e:
    #     print(e)
    #     print('binary failed')
    #     bin_mets = 'binary failed'
    #     
    # try:
    #     micro_mets = calculate_metrics(predictions, labels, 'micro')
    #     print(micro_mets)
    # except Exception as e:
    #     print(e)
    #     print('micro failed')
    #     micro_mets = 'micro failed'
    #     
    # try:
    #     macro_mets = calculate_metrics(predictions, labels, 'macro')
    #     print(macro_mets)
    # except Exception as e:
    #     print(e)
    #     print('macro failed')
    #     macro_mets = 'macro failed'
    
    results_logs = "logs\\"+PROMPT_TO_USE+"_results.txt"
    with open("results_logs", "a", encoding='utf-8') as f:
        f.write(str(dataset_output_path_1))
        f.write(str(dataset_output_path_2))
        f.write('\n')
        f.write(str(dataset['train'][0]['text']))
        f.write('\n')
        f.write(str(results[0]))
        f.write('\n')
        f.write('\n')
        f.write(str(my_mets))
        f.write('\n')
        # f.write(str(bin_mets))
        # f.write('\n')
        # f.write(str(micro_mets))
        # f.write('\n')
        # f.write(str(macro_mets))
        # f.write('\n')
        f.write(str(elapsed_time_train))
        f.write('\n')
        f.write(str(elapsed_time_test))
        f.write('\n')
        f.write('\n')
        f.write('********************************')
        f.write('\n')
        f.write('\n')
        
    
    del model  # Delete the model instance
    del dataset
    del tokenizer
    gc.collect()
    torch.cuda.empty_cache()
    


['datasets2', 'NZER_attribute_val', 'hope', '']


Generating train split: 0 examples [00:00, ? examples/s]

Generating valid split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

successfully loaded dataset.......


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........
loaded tokenizer........
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Do the two place descriptions refer to the same real-world place? Answer with 'Yes' if they do and 'No' if they do not.<|eot_id|><|start_header_id|>user<|end_header_id|>

Place1: 'name: McKay Creek type: stream latitude: -44.21667 longitude: 168.43333'
Place2: 'name: Gipsy Creek type: Stream latitude: -44.022242 longitude: 168.684074'<|eot_id|><|start_header_id|>assistant<|end_header_id|>

No<|eot_id|>


Map:   0%|          | 0/13561 [00:00<?, ? examples/s]

Map:   0%|          | 0/2906 [00:00<?, ? examples/s]

starting training...........
{'loss': 2.475, 'grad_norm': 0.5667222142219543, 'learning_rate': 0.0001, 'epoch': 0.017691287041132243}
{'loss': 1.2191, 'grad_norm': 0.28920572996139526, 'learning_rate': 0.0001, 'epoch': 0.03538257408226449}
{'loss': 1.0766, 'grad_norm': 0.23915338516235352, 'learning_rate': 0.0001, 'epoch': 0.05307386112339673}
{'loss': 1.0342, 'grad_norm': 0.23277819156646729, 'learning_rate': 0.0001, 'epoch': 0.07076514816452897}
{'loss': 0.9784, 'grad_norm': 0.22267933189868927, 'learning_rate': 0.0001, 'epoch': 0.08845643520566121}
{'loss': 0.9375, 'grad_norm': 0.28909575939178467, 'learning_rate': 0.0001, 'epoch': 0.10614772224679346}
{'loss': 0.8784, 'grad_norm': 0.45711883902549744, 'learning_rate': 0.0001, 'epoch': 0.1238390092879257}
{'loss': 0.7825, 'grad_norm': 0.30580389499664307, 'learning_rate': 0.0001, 'epoch': 0.14153029632905795}
{'loss': 0.7558, 'grad_norm': 0.28000837564468384, 'learning_rate': 0.0001, 'epoch': 0.15922158337019018}
{'loss': 0.7222, 'g

In [ ]:
# Set to True if you want to save the finetuned model. Set to False otherwise.
SAVE_FINE_TUNED_MODEL = True

In [ ]:
# Select prompt to test zero shot. Select between "simple", "attribute_val", "plm" and "attribute_value_dist"
PROMPT_TO_USE = "attribute_val"

In [ ]:
dataset_folder_path = ['datasets2\\GTMD_'+ PROMPT_TO_USE+ '\\mel\\', 
                       'datasets2\\GTMD_'+ PROMPT_TO_USE+ '\\sea\\', 
                       'datasets2\\GTMD_'+ PROMPT_TO_USE+ '\\sin\\',
                       'datasets2\\GTMD_'+ PROMPT_TO_USE+ '\\tor\\']

In [19]:
logging.set_verbosity_error()


for dataset_folder in dataset_folder_path:
 
    warnings.filterwarnings("ignore")
    
    print(dataset_folder.split("\\"))
    dataset_output_path_1, dataset_output_path_2 = dataset_folder.split("\\")[-3], dataset_folder.split("\\")[-2]
        
    dataset = load_dataset(
        "json",
        data_files={"train": dataset_folder+"train.json", "valid": dataset_folder+"valid.json", "test": dataset_folder+"test.json"},
    )
    print("successfully loaded dataset.......")
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        config=model_config,
        quantization_config=bnb_config,
        device_map='auto',
        token=True
    )
    
    print("loaded model........")
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        MODEL_NAME,
        token=True
    )
    
    print("loaded tokenizer........")
    PAD_TOKEN = tokenizer.eos_token
    tokenizer.add_special_tokens({"pad_token": PAD_TOKEN})
    tokenizer.padding_side = "right"
    
    
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)
    
    print(dataset['train'][0]['text'])
    
    if SAVE_FINE_TUNED_MODEL:
    
        OUTPUT_DIR = "experiments\\"+ dataset_output_path_1 +"\\"+ dataset_output_path_2+"\\"
    
        os.makedirs(os.path.dirname(OUTPUT_DIR), exist_ok=True)
    
        sft_config = SFTConfig(
            output_dir=OUTPUT_DIR,
            dataset_text_field="text",
            max_seq_length=300,
            num_train_epochs=2,
            per_device_train_batch_size=6,
            per_device_eval_batch_size=4,
            gradient_accumulation_steps=4,
            logging_steps=10,
            learning_rate=1e-4,
            bf16=True,
            save_strategy="steps",
            warmup_ratio=0.1,
            save_total_limit=0,
            lr_scheduler_type="constant",
            save_safetensors=True,
            dataset_kwargs={
                "add_special_tokens": False,  
                "append_concat_token": False, 
            },
        )
        
    else:
        sft_config = SFTConfig(
            output_dir=OUTPUT_DIR,
            dataset_text_field="text",
            max_seq_length=300,
            num_train_epochs=2,
            per_device_train_batch_size=6,
            per_device_eval_batch_size=4,
            gradient_accumulation_steps=4,
            logging_steps=10,
            learning_rate=1e-4,
            bf16=True,
            save_strategy="no",
            warmup_ratio=0.1,
            lr_scheduler_type="constant",
            dataset_kwargs={
                "add_special_tokens": False,  
                "append_concat_token": False, 
            },
        )

    trainer = SFTTrainer(
        model=model,
        args=sft_config,
        train_dataset=dataset["train"],
        eval_dataset=dataset["valid"],
        tokenizer=tokenizer,
    )
    # 
    print("starting training...........")



    start_time_train = time.time()
    trainer.train()
    end_time_train = time.time()
    elapsed_time_train = end_time_train - start_time_train

    print('training completed....')
    
    if SAVE_FINE_TUNED_MODEL:
        trainer.save_model(OUTPUT_DIR)

        print('model saved .........')
    
    # if dataset_output_path_1 =="gtminer":
    if PROMPT_TO_USE =="simple":
        test_prompts = [format_test_gtminer_simple(x) for x in dataset['test']]
    elif PROMPT_TO_USE=="attribute_value_dist":
        test_prompts = [format_test_gtminer_distance(x) for x in dataset['test']]
    else:
        test_prompts = [format_test_gtminer(x) for x in dataset['test']]
    # elif dataset_output_path_1 =="gtminer3":
    #     test_prompts = [format_test_gtminer3(x) for x in dataset['test']]
    
    results=[]
    start_time_test = time.time()  
    
    model.eval()
    with torch.no_grad():
        for prompt in test_prompts:
                inputs = tokenizer(prompt, return_tensors="pt").to(device='cuda')
                # outputs = model.pipeline(inputs.input_ids)
                outputs = model.generate(
                    inputs.input_ids, 
                    max_length=300,  # Maximum length of the generated text
                    max_new_tokens= 2,
                    num_return_sequences=1,  # Number of sequences to generate
                    no_repeat_ngram_size=2,  # Avoid repeating phrases
                    temperature=0.01,  # Controls randomness; lower is less random
                    top_k=50,  # Top-k sampling
                )
                prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
                # prediction = tokenizer.decode(outputs[:, inputs.shape[1]:])
                results.append(prediction.strip())
    
    end_time_test = time.time()
    elapsed_time_test = end_time_test - start_time_test
    
    print("testing completed........")
    # print(results)
    # predictions = [x.split(":")[-1].strip() for x in results]
    predictions = [x.split("\n")[-1].strip() for x in results]
    
    predictions = [1 if label in ["same_as", "same", "same-as"] else 2 if label in ["part_of", "part-of", "partof"] else 3 if label in ["serves", "served"] else 0 if label in ["unknown"] else 4 for label in predictions]
    # print(predictions)
    labels = [1 if label == "same_as" else 2 if label == "part_of" else 3 if label == "serves" else 0 if label == "unknown" else 5 for label in dataset['test']['answer']]
    # print(labels)
    print(dataset_folder.split("\\"))
    
    try:
        my_mets = calculate_metrics2(predictions, labels)
        print(my_mets)
        
    except Exception as e:
        print(e)
        print('my calc failed')
        my_mets = 'my calc failed'
    
    # try:
    #     bin_mets = calculate_metrics(predictions, labels, 'binary')
    #     print(bin_mets)
    # except Exception as e:
    #     print(e)
    #     print('binary failed')
    #     bin_mets = 'binary failed'
        
    # try:
    #     micro_mets = calculate_metrics(predictions, labels, 'micro')
    #     print(micro_mets)
    # except Exception as e:
    #     print(e)
    #     print('micro failed')
    #     micro_mets = 'micro failed'
    #     
    # try:
    #     macro_mets = calculate_metrics(predictions, labels, 'macro')
    #     print(macro_mets)
    # except Exception as e:
    #     print(e)
    #     print('macro failed')
    #     macro_mets = 'macro failed'
        
        
    results_logs = "logs\\GTMD_"+PROMPT_TO_USE+"_results.txt"    
    with open(results_logs, "a", encoding='utf-8') as f:
        f.write(str(dataset_output_path_1))
        f.write(str(dataset_output_path_2))
        f.write('\n')
        f.write(str(dataset['train'][0]['text']))
        f.write('\n')
        f.write(str(results[0]))
        f.write('\n')
        f.write('\n')
        f.write(str(my_mets))
        f.write('\n')
        # f.write(str(bin_mets))
        # f.write('\n')
        # f.write(str(micro_mets))
        # f.write('\n')
        # f.write(str(macro_mets))
        # f.write('\n')
        f.write("Train time: "+str(elapsed_time_train))
        f.write('\n')
        f.write("Test time: " +str(elapsed_time_test))
        f.write('\n')
        f.write('\n')
        f.write('********************************')
        f.write('\n')
        f.write('\n')
    
    del model  # Delete the model instance
    del dataset
    del tokenizer
    gc.collect()
    torch.cuda.empty_cache()


['datasets', 'gtminer_plm', 'mel', '']


Generating train split: 0 examples [00:00, ? examples/s]

Generating valid split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

successfully loaded dataset.......


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........
loaded tokenizer........
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.<|eot_id|><|start_header_id|>user<|end_header_id|>

Place 1: 'COL name VAL Department of Environment, Land, Water and Planning COL type VAL office COL address VAL 8 Nicholson Street 3002 COL latitude VAL -37.8083722 COL longitude VAL 144.9734604 '
Place 2: 'COL name VAL Hotel Ovolo COL type VAL hotel COL address VAL 19 Little Bourke Street 3000 COL latitude VAL -37.8107508 COL longitude VAL 144.9719983 '
Answer only with: same_as, part_of, serves, unknown<|eot_id|><|start_h

Generating train split: 0 examples [00:00, ? examples/s]

Generating valid split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

successfully loaded dataset.......


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........
loaded tokenizer........
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.<|eot_id|><|start_header_id|>user<|end_header_id|>

Place 1: 'COL name VAL The Grilled Cheese Experience COL type VAL cafe COL address VAL 909 Pike Street 98101 COL latitude VAL 47.6123242 COL longitude VAL -122.3309093 '
Place 2: 'COL name VAL Washington State Convention Center COL type VAL attraction COL address VAL 800 Convention Place 98101 COL latitude VAL 47.6117274 COL longitude VAL -122.3316528 '
Answer only with: same_as, part_of, serves, unknown<|eot_id|><|start_h

Generating train split: 0 examples [00:00, ? examples/s]

Generating valid split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

successfully loaded dataset.......


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........
loaded tokenizer........
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.<|eot_id|><|start_header_id|>user<|end_header_id|>

Place 1: 'COL name VAL City Square Post Office COL type VAL post_office COL address VAL 180 Kitchener Road #B2-33 City Square 208539 COL latitude VAL 1.3109755 COL longitude VAL 103.8567196 '
Place 2: 'COL name VAL Mustafa Centre COL type VAL Shopping Centers COL address VAL 145 Syed Alwi Road 207704 COL latitude VAL 1.3101243 COL longitude VAL 103.8553164 '
Answer only with: same_as, part_of, serves, unknown<|eot_id|><|st

Generating train split: 0 examples [00:00, ? examples/s]

Generating valid split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

successfully loaded dataset.......


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

loaded model........
loaded tokenizer........
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Two place descriptions are provided. Answer with 'same_as' if the first place is the same as the second place. Answer with 'part_of' if the first place is a part of the second place and is located inside the second place. Answer with 'serves' if the first place provides a service to the second place in terms of human mobility, assistance, etc. Answer with 'unknown' if the two places show none of these relations.<|eot_id|><|start_header_id|>user<|end_header_id|>

Place 1: 'COL name VAL First Copy COL type VAL copyshop COL address VAL 61 College Street COL latitude VAL 43.6605215 COL longitude VAL -79.386228 '
Place 2: 'COL name VAL Medical Sciences Building COL type VAL Colleges & Universities; Education COL address VAL 1 King's College Cir M5S 1A8 COL latitude VAL 43.6607748 COL longitude VAL -79.3933729 '
Answer only with: same_as, part_of, serves, unknown<|eot_id|><|start_header

In [None]:
MODEL_NAME = 'meta-llama/Meta-Llama-3-8B-Instruct'
tokenizer = AutoTokenizer.from_pretrained('experiments/my_data/auck')
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
)
model = PeftModel.from_pretrained(model, 'experiments/my_data/auck')
model = model.merge_and_unload()