In [1]:
import re
import difflib
import pandas as pd

from tqdm import  tqdm_notebook as tqdm
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "cycv5/llama3b-lora-phone",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    token = "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",
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA A10. Max memory: 21.988 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.2.15 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [3]:
# Enable native 2x faster inference
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [4]:
def get_messages(sentence, examples):
    messages = [
        {"role": "system", "content": "You are an expert in correcting typos in sentences."},
        {"role": "user", "content": """
Here are examples of sentences with typos; learn from them:

{examples}
Now, please correct this sentence and output only the corrected version with no additional text:

{target_sentence}
        """.format(target_sentence=sentence, examples=examples)},
    ]
    return messages

In [5]:
def compute_accuracy_and_wrong_syllables(true_sentence, predicted_sentence):
    # Character-level accuracy using SequenceMatcher
    char_matcher = difflib.SequenceMatcher(None, true_sentence, predicted_sentence)
    accuracy = char_matcher.ratio()
    
    # Word-level wrong syllable count using SequenceMatcher on word lists
    true_words = true_sentence.split()
    predicted_words = predicted_sentence.split()
    word_matcher = difflib.SequenceMatcher(None, true_words, predicted_words)
    
    # Calculate wrong syllables based on insert, delete, and replace operations
    wrong_syllables = sum(1 for tag, _, _, _, _ in word_matcher.get_opcodes() if tag in ('insert', 'delete', 'replace'))
    
    return accuracy, wrong_syllables

In [6]:
def get_llm_sentence(sentence, examples):
    messages = get_messages(sentence, examples)
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to("cuda")
    outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
                         temperature = 1.5, min_p = 0.1)
    ret = tokenizer.batch_decode(outputs)
    gpt_response = re.search(r"assistant<\|end_header_id\|>\n\n(.*?)<\|eot_id\|>", ret[0], re.DOTALL)
    if gpt_response:
        gpt_response = gpt_response.group(1)
        return gpt_response
    else:
        raise ValueError("LLM response not found")

In [7]:
def llm_postprocess(sentence):
    sentence = sentence.lower().strip()
    # remove all non a-z0-9 
    sentence = re.sub(r'[^a-z0-9\s]', '', sentence)
    return sentence

In [None]:
NFs = [
    "noise_0.012",
    "noise_0.024",
    "noise_0.06",
]
output_dir = "echocrypt"

for nf in NFs:
    df = pd.read_csv(f"results/{nf}.csv")
    examples = ""

    for i in range(2):
        examples += f"\tsentence: {df['Predicted Sentence'][i]}\n"
        examples += f"\tcorrected: {df['True Sentence'][i]}\n\n"

    llm_accs = []
    llm_ws = []
    llm_sen = []
    total=len(df)

    for index, row in tqdm(df.iterrows(), total=total):
        should_print = index % 100 == 0
        predicted_sentence = row['Predicted Sentence']
        true_sentence = row['True Sentence']
        accuracy, wrong_syllables = compute_accuracy_and_wrong_syllables(true_sentence, predicted_sentence)
        if should_print:
            print(f"[LLM Auto] Index: {index} of {total}")
            print("[LLM Auto] CoAtNet", accuracy, wrong_syllables)
        
        llm_sentence = get_llm_sentence(predicted_sentence, examples)
        llm_sentence = llm_postprocess(llm_sentence)
        accuracy, wrong_syllables = compute_accuracy_and_wrong_syllables(true_sentence, llm_sentence)
        if should_print:
            print("[LLM Auto] LLM", accuracy, wrong_syllables)
            print("[LLM Auto] ==========")
        
        llm_sen.append(llm_sentence)
        llm_accs.append(accuracy)
        llm_ws.append(wrong_syllables)

    df['LLM Sentence'] = llm_sen
    df['LLM Accuracy'] = llm_accs
    df['LLM Wrong syllables'] = llm_ws

    # average accuracy
    llm_avg_accuracy = sum(llm_accs) / len(llm_accs)
    # sum of wrong syllables
    llm_sum_wrong_syllables = sum(llm_ws)

    print(f"[LLM Auto] Model: EchoCrypt")
    print(f"[LLM Auto] NF {nf}")
    print(f"[LLM Auto] LLM Average Accuracy: {llm_avg_accuracy}")
    print(f"[LLM Auto] LLM Sum of Wrong Syllables: {llm_sum_wrong_syllables}")
    print("[LLM Auto] ===")
    
    df.to_csv(f'results/{output_dir}/{nf}.csv', index=False)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for index, row in tqdm(df.iterrows(), total=total):


  0%|          | 0/1000 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


[LLM Auto] Index: 0 of 1000
[LLM Auto] CoAtNet 0.9423076923076923 4
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 100 of 1000
[LLM Auto] CoAtNet 0.958904109589041 2
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 200 of 1000
[LLM Auto] CoAtNet 0.9404761904761905 3
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 300 of 1000
[LLM Auto] CoAtNet 0.9594594594594594 3
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 400 of 1000
[LLM Auto] CoAtNet 0.9166666666666666 4
[LLM Auto] LLM 0.9861111111111112 1
[LLM Auto] Index: 500 of 1000
[LLM Auto] CoAtNet 0.9365079365079365 2
[LLM Auto] LLM 0.9606299212598425 1
[LLM Auto] Index: 600 of 1000
[LLM Auto] CoAtNet 0.9358974358974359 3
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 700 of 1000
[LLM Auto] CoAtNet 0.9468085106382979 4
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 800 of 1000
[LLM Auto] CoAtNet 0.9418604651162791 3
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 900 of 1000
[LLM Auto] CoAtNet 0.9888888888888889 1
[LLM Auto] LLM 1.0 0
[LLM Auto] Model: EchoCrypt
[LLM Auto] NF noise_0.1
[LLM Auto] 

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for index, row in tqdm(df.iterrows(), total=total):


  0%|          | 0/1000 [00:00<?, ?it/s]

[LLM Auto] Index: 0 of 1000
[LLM Auto] CoAtNet 0.8269230769230769 3
[LLM Auto] LLM 0.9714285714285714 2
[LLM Auto] Index: 100 of 1000
[LLM Auto] CoAtNet 0.7945205479452054 4
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 200 of 1000
[LLM Auto] CoAtNet 0.7857142857142857 3
[LLM Auto] LLM 0.9761904761904762 1
[LLM Auto] Index: 300 of 1000
[LLM Auto] CoAtNet 0.8648648648648649 4
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 400 of 1000
[LLM Auto] CoAtNet 0.8333333333333334 3
[LLM Auto] LLM 0.9861111111111112 1
[LLM Auto] Index: 500 of 1000
[LLM Auto] CoAtNet 0.873015873015873 4
[LLM Auto] LLM 0.8527131782945736 3
[LLM Auto] Index: 600 of 1000
[LLM Auto] CoAtNet 0.7051282051282052 1
[LLM Auto] LLM 0.9375 1
[LLM Auto] Index: 700 of 1000
[LLM Auto] CoAtNet 0.7872340425531915 4
[LLM Auto] LLM 0.9574468085106383 1
[LLM Auto] Index: 800 of 1000
[LLM Auto] CoAtNet 0.7325581395348837 2
[LLM Auto] LLM 0.9310344827586207 1
[LLM Auto] Index: 900 of 1000
[LLM Auto] CoAtNet 0.7888888888888889 2
[LLM Auto] LLM 0.8729

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for index, row in tqdm(df.iterrows(), total=total):


  0%|          | 0/1000 [00:00<?, ?it/s]

[LLM Auto] Index: 0 of 1000
[LLM Auto] CoAtNet 0.625 1
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 100 of 1000
[LLM Auto] CoAtNet 0.6712328767123288 3
[LLM Auto] LLM 0.5035971223021583 1
[LLM Auto] Index: 200 of 1000
[LLM Auto] CoAtNet 0.5714285714285714 2
[LLM Auto] LLM 0.9761904761904762 1
[LLM Auto] Index: 300 of 1000
[LLM Auto] CoAtNet 0.5135135135135135 1
[LLM Auto] LLM 0.5644171779141104 3
[LLM Auto] Index: 400 of 1000
[LLM Auto] CoAtNet 0.5833333333333334 3
[LLM Auto] LLM 0.6164383561643836 2
[LLM Auto] Index: 500 of 1000
[LLM Auto] CoAtNet 0.7777777777777778 5
[LLM Auto] LLM 0.6612903225806451 3
[LLM Auto] Index: 600 of 1000
[LLM Auto] CoAtNet 0.6923076923076923 1
[LLM Auto] LLM 0.9271523178807947 2
[LLM Auto] Index: 700 of 1000
[LLM Auto] CoAtNet 0.6808510638297872 2
[LLM Auto] LLM 0.5578947368421052 2
[LLM Auto] Index: 800 of 1000
[LLM Auto] CoAtNet 0.813953488372093 2
[LLM Auto] LLM 1.0 0
[LLM Auto] Index: 900 of 1000
[LLM Auto] CoAtNet 0.6333333333333333 1
[LLM Auto] LLM 0.33333