In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from peft import PeftModel, PeftConfig
import torch

# Load tokenizer
model_name = "meta-llama/Llama-3.2-3B-Instruct"  # or the base model you trained on
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Load base model in 4-bit if you used quantization
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
config = AutoConfig.from_pretrained(model_name)
# manually set rope_scaling to supported structure:
config.rope_scaling = {"type": "dynamic", "factor": 2.0}
config.use_cache = True

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    config = config,
    torch_dtype=torch.float16
)
adapted_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    config=config,
    torch_dtype=torch.float16
)

# Load your LoRA adapter
adapter_path = "./../Training/final_adapter_with_eval_0"  # or wherever your adapter_model.safetensors is
adapted_model= PeftModel.from_pretrained(adapted_model, adapter_path)


2025-03-26 13:51:14.762890: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
#Need to empty_cache other wise 2 models' results will bleed into each other
# only slice and decode the new tokens 
def generate_summary(input_text, max_new_tokens=150):
    torch.cuda.empty_cache()

    prompt = f"""Without commentary, from its original language summarize to English on useful information including sensitive data, below 100 words. If no meaning return <NULL>
Text:
{input_text}
"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(adapted_model.device)
    
    with torch.no_grad():
        outputs = adapted_model.generate(
            **inputs,
            do_sample=True,
            temperature=0.7,
            max_new_tokens=max_new_tokens,
            top_p=0.9
        )
    input_len = inputs["input_ids"].shape[1]
    new_tokens = outputs[0][input_len:]  # exclude prompt
    summary = tokenizer.decode(new_tokens, skip_special_tokens=True)
    return summary

def generate_base_summary(input_text, max_new_tokens=150):
    torch.cuda.empty_cache()
    prompt = f"""Without commentary, from its original language summarize to English on useful information including sensitive data, below 100 words. If no meaning return <NULL>
Text:
{input_text}
"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(base_model.device)
    
    with torch.no_grad():
        outputs = base_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
    input_len = inputs["input_ids"].shape[1]
    new_tokens = outputs[0][input_len:]  # exclude prompt
    summary = tokenizer.decode(new_tokens, skip_special_tokens=True)
    return summary
    

In [3]:
import json
def get_json_from_file(file_path):

    with open(file_path, "r", encoding="utf-8") as file:
        data = file.read().strip()

        # Fix concatenated JSON objects
        objs = data.split("}{")

        parsed_objs = []
        for i, obj in enumerate(objs):
            if not obj.startswith("{"):
                obj = "{" + obj
            if not obj.endswith("}"):
                obj = obj + "}"
            try:
                parsed = json.loads(obj)
                parsed_objs.append(parsed)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON in file {file_path}, object {i}: {e}")
    return parsed_objs

In [4]:

# example_file = "./../Training/../Training/InputNLabel/prompt_sensitive_translated/receipts_2.json"
# file_and_limits = {
#     "./../Training/InputNLabel/filtered_labels/filtered_formatted.json": 30,
#     "./../Training/InputNLabel/filtered_labels/filtered_randoms.json": 30,
#     "./../Training/InputNLabel/filtered_labels/filtered_receipts.json": 30,
#     "./../Training/InputNLabel/filtered_labels/filtered_reports.json": 10,
#     "./../Training/InputNLabel/prompt_sensitive_translated/formatted_2.json": 300,
#     "./../Training/InputNLabel/prompt_sensitive_translated/random_2.json": 300,
#     "./../Training/InputNLabel/prompt_sensitive_translated/receipts_2.json": 300,
#     "./../Training/InputNLabel/prompt_sensitive_translated/reports_2.json": 300,
# }
formatted_chunks = "./../Training/InputNLabel/prompt_sensitive_translated/formatted_2.json"
receipt_chunks = "./../Training/InputNLabel/prompt_sensitive_translated/receipts_2.json"
report_chunks = "./../Training/InputNLabel/prompt_sensitive_translated/reports_2.json"
random_chunks = "./../Training/InputNLabel/prompt_sensitive_translated/random_2.json"

filtered_receipts = "./../Training/InputNLabel/filtered_labels/filtered_receipts.json"
filtered_reports = "./../Training/InputNLabel/filtered_labels/filtered_reports.json"
filtered_randoms = "./../Training/InputNLabel/filtered_labels/filtered_randoms.json"
filtered_formatted = "./../Training/InputNLabel/filtered_labels/filtered_formatted.json"

formatted = get_json_from_file(formatted_chunks)
receipts = get_json_from_file(receipt_chunks)
reports = get_json_from_file(report_chunks)
randoms = get_json_from_file(random_chunks)

f_receipts = get_json_from_file(filtered_receipts)
f_reports = get_json_from_file(filtered_reports)
f_randoms = get_json_from_file(filtered_randoms)
f_formatted = get_json_from_file(filtered_formatted)

print("chunks with formats",len(formatted))
print("chunks of recipts that need ocr", len(receipts))
print("chunks of well written long papers, no ocr",len(reports))
print("chunks of all sorts",len(randoms))

print("chunks with filtered formats",len(f_formatted))
print("chunks with formatted receipts", len(f_receipts))
print("chunks with formatted reports", len(f_reports))
print("chunks with formatted randoms", len(f_randoms))



chunks with formats 300
chunks of recipts that need ocr 300
chunks of well written long papers, no ocr 300
chunks of all sorts 300
chunks with filtered formats 99
chunks with formatted receipts 100
chunks with formatted reports 30
chunks with formatted randoms 100


##### Testing filtered chunks

In [5]:
input = f_receipts[0]
print(input["input"])
print("\nOutput:",input["output"])

MARKET
Pesan as 62-2104
se
etna
saa
10h
czy corn Eueees ” a
Raurommcorse =|
Aur isto.
|| B
ferecth a ne bibs
B
Stl
a
ate:
f
ital
S
Ei iam:
paid
Riles,
ase
1
ip facoet mor
his Er i Amn
rug;
AIT retro a e
Wrveuty ov tee fc 
3B Ghar asia tea
biti vat
canter
fc} tars ave a ele Taos
a's Pras Stig, Vid tke
{Sin ie a scon
Tam 5 ack at ne fons e
HR the tan Pres hoar
(Shave a musi.cofet

--------------------------------------------------

Output: <NULL>


In [9]:
input_text = input["input"]
#print(generate_base_summary(input_text,150))
print("!!!!<---------Next Summary----------->!!!!")
print(generate_summary(input_text, 150))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


!!!!<---------Next Summary----------->!!!!
Summary:
<NULL>


In [None]:
print(f_reports[0]["input"])
print(f_receipts[0]["output"])


P
Figure 3: ImageNet acc. vs model complexity.
Table 6: Comparisons among the GFNet and other
variants based on the transformer-like architecture
on ImageNet. We show that GFNet outperforms the
ResMLP [42], FNet [25] and models with local depth-
wise convolutions. We also report the number of pa-
rameters and theoretical complexity in FLOPs.
Model
Acc
Param
FLOPs
(%)
(M)
(G)
DeiT-S [43]
79.8
22
4.6
Local Conv (3 × 3)
77.7
15
2.8
Local Conv (5 × 5)
78.1
15
2.9
Local Conv (7 × 7)
78.2
15
2.9
ResML
<NULL>


In [10]:
input = receipts[0]
print(generate_base_summary(input["input"]))
print("!!!!<----------------------->!!!!")
print(generate_summary(input["input"]))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Date: 2019-10-09
Time: 06:58
Currency: EUR
Payment Method: Check
Payment Amount: 12.60
Discount: 1.04 9% BFW
Subtotal: 12.60
Total: 12.60
Total with tax: 12.60
Discount Amount: 1.04
Tax Amount: 11.56
Items: 3
Total with tax: 12.60
Tax Amount: 11.56
Items: 3
Total with tax: 12.60
Discount Amount: 1.04
Tax Amount: 11.56
Items: 3
Total with tax: 12.
!!!!<----------------------->!!!!
KFC.nl
Bijdrage aan het 5ejaar van KFC Nederland
Bedankt voor uw bezoek aan KFC؛
Heeft 2 minuten nodig om een foto te maken?
Geef feedback en krijg 3 gratis
Hot Wings bij uw volgende
bestelling (van min, 6 euro).
Ga naar; w
KFC.nl
Bijdrage aan het 5ejaar van KFC Nederland
Bedankt voor uw bezoek aan KFC؛
Heeft 2 minuten nodig om een foto te maken?
Geef feedback en krijg 3 gratis
Hot Wings bij uw volgende
bestelling (van min, 6


In [10]:
#trained adapter 0 after 72 examples: there is still original text but a summary is now  more concise
input_text = formatted[0]["input"] + formatted[1]["input"] + formatted[2]["input"]
#input_text = f_receipts[0]["input"]
print("!!!!<----------------------->!!!!")
print(generate_base_summary(input_text))
print("!!!!<----------------------->!!!!")
print(generate_summary(input_text))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


!!!!<----------------------->!!!!


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


summarize :-08
Shipped Date: 2017-08-11
Products:
Product: Jack's New England Glam Chowder
‘Quantity: 20
Unit Pie:
9.65,

--------------------------------------------------7-08-21
Shipped Date: 2017-08-28
Products:
--------------------------------------------------------------------------------------------------
Product: Gudbrandsdalsost
Quantity: 20
Unit Price: 36.0
Total: 720.0
--------------------------------------------------------------------------------------------------
Product: Outback Lager
Quantity: 15
Unit Price: 15.0
Total: 225.0
Total Price:

--------------------------------------------------
Total Price: 945.0

---------------------------------------Order ID: 10481
Shipping Details:
Ship Name: Ricardo Adocicados
Ship Address: Av. Copacabana, 257
Ship Cy: de Janeiro.
Ship Region: South America
Ship Postal Code: 02389-890
Ship County: Brazil
Customer Details:
CustomerID: RICAR
Customer Name:
Ricardo Adocicados
Employee Details:
Employee Name: Laura Callahan
Shipper Details: