In [1]:
from huggingface_hub import login
import os
from dotenv import load_dotenv
load_dotenv()

hf_token = os.getenv('HUGGINGFACE_TOKEN')
login(hf_token)

### Prepare inputs for test

In [2]:
from src.synthetic_generation import generate
from src.prompts import (
    CharacterNamesPair, 
    FictionalCharacterPair,
    TShirtOrderPair,
    FilmIdeaPair,
    ItineraryPair,
    RecipePair,
    EventPair,
    ResumePair,
    GameArtifactPair,
    BookReviewPair,
    TextSummaryPair,
    GameIdeaPair,
    HousePair,
    SollarSystemPair,
)

import asyncio
asyncio.get_event_loop().set_debug(True)

In [3]:
pairs = [
    CharacterNamesPair(),
    FictionalCharacterPair(),
    TShirtOrderPair(),
    FilmIdeaPair(),
    ItineraryPair(),
    RecipePair(),
    EventPair(),
    ResumePair(),
    GameArtifactPair(),
    BookReviewPair(),
    TextSummaryPair(),
    GameIdeaPair(),
    HousePair(),
    SollarSystemPair(),
]

sample_num = 1

### Original model test

In [4]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096
dtype = None
load_in_4bit = True

original_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Llama-3.2-1B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
from src.langchain_llm_wrapper import LlamaLLM

original_llm = LlamaLLM(model=original_model, tokenizer=tokenizer)

In [7]:
error_counter = 0
all_results = []

for pair in pairs:
    try:
        result, errors = await generate(pair=pair, sample_num=sample_num, json_gen_llm=original_llm)
        all_results.append(result)
        error_counter += errors
    except Exception as e:
        # print(f"Error processing a chunk: {e}")
        error_counter += 1

print(f"Original model total errors: {error_counter}")

Original model total errors: 12


In [8]:
all_results

[[{'input': 'You are a name and surname extractor. Your task is to identify and extract the names and surnames of characters from the given text.\n### Input:\nIn the quaint town of Willowbrook, Clara, a spirited librarian, discovered an old journal hidden in the library\'s attic. Intrigued, she began to read about the life of Thomas Whitaker, a curious inventor from the 19th century. \n\nAs Clara delved deeper into the stories of Thomas\'s eccentric creations, her best friend, Jake Thompson, a local history enthusiast, became fascinated by the mystery surrounding Whitaker. “We should try to find more about his inventions,” Jake suggested, his eyes sparkling with excitement.\n\nTogether, they scoured the town\'s archives, uncovering blueprints and forgotten letters filled with innovative ideas. The deeper they dug, the more they realized that Thomas wasn’t just an inventor; he was ahead of his time, envisioning machines that mirrored today’s technology.\n\nDuring one rainy afternoon, Cl

### Fine tuned model test

In [9]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Llama1B_FineTuned",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)

==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.3.14 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 2048, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear

Executing <Task pending name='Task-3' coro=<Kernel.dispatch_queue() running at /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/ipykernel/kernelbase.py:524> wait_for=<Future pending cb=[Task.task_wakeup()] created at /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/tornado/queues.py:248> cb=[_wrap_awaitable.<locals>.<lambda>() at /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/tornado/gen.py:851, IOLoop.add_future.<locals>.<lambda>() at /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/tornado/ioloop.py:699]> took 6.029 seconds


In [10]:
from src.langchain_llm_wrapper import LlamaLLM

tuned_llm = LlamaLLM(model=model, tokenizer=tokenizer)

In [11]:
error_counter = 0
all_results = []

for pair in pairs:
    try:
        result, errors = await generate(pair=pair, sample_num=sample_num, json_gen_llm=tuned_llm)
        all_results.append(result)
        error_counter += errors
    except Exception as e:
        # print(f"Error processing a chunk: {e}")
        error_counter += 1

print(f"Fine tuned model total errors: {error_counter}")

Fine tuned model total errors: 1


In [12]:
all_results

[[{'input': 'You are a name and surname extractor. Your task is to identify and extract the names and surnames of characters from the given text.\n### Input:\nIn the quaint town of Maplewood, Lydia Harrington hurried through the cobblestone streets, clutching a stack of old letters. She was on her way to meet Thomas Caldwell, the local historian known for his encyclopedic knowledge of the town’s past. \n\n“Lydia! Over here!” Thomas called, waving from under the shade of a large oak tree in the town square. \n\nShe approached, breathless. “I found these in my grandmother\'s attic. They’re from the 1920s. I thought you might find them interesting.” \n\nThomas carefully took the letters, his eyes lighting up with excitement. “These could be a goldmine! You have to tell me what else you know about your grandmother, Margaret Elwood.”\n\nLydia paused, her mind racing back to stories of her grandmother’s youth. “She used to talk about adventures with the mysterious Jasper Blake; they were gre