In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import textstat


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Load tokenizer and fix pad token
model_path = "./qwen-7b-lora-simplifier"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)


In [3]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token or "<|endoftext|>"


In [4]:
# Load base model with quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)


In [5]:

base_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen-7B-Chat",
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)


The model is automatically converting to bf16 for faster inference. If you want to disable the automatic precision, please manually add bf16/fp16/fp32=True to "AutoModelForCausalLM.from_pretrained".
Try importing flash-attention for faster inference...
Loading checkpoint shards: 100%|██████████| 8/8 [00:24<00:00,  3.07s/it]


In [6]:
base_model.resize_token_embeddings(len(tokenizer))


Embedding(151851, 4096)

In [7]:
# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): QWenLMHeadModel(
      (transformer): QWenModel(
        (wte): Embedding(151851, 4096)
        (drop): Dropout(p=0.0, inplace=False)
        (rotary_emb): RotaryEmbedding()
        (h): ModuleList(
          (0-31): 32 x QWenBlock(
            (ln_1): RMSNorm()
            (attn): QWenAttention(
              (c_attn): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=12288, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=12288, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
      

In [8]:

#  Batch of complex sentences to simplify
complex_sentences = [
#    "The proliferation of autonomous vehicles necessitates robust regulatory oversight.",
#    "Photosynthesis is a process through which green plants use sunlight to synthesize foods from carbon dioxide and water.",
#    "Urban planners must incorporate resilient and adaptive infrastructure strategies to address climate-related risks.",
#    "Quantum computing leverages principles of quantum mechanics to process information exponentially faster than classical computers."
#    "The amortization schedule delineates the temporal allocation of loan repayments, wherein a progressively increasing portion is attributed to the principal and a decreasing component to interest, under the assumption of a fixed interest rate and consistent periodic payments throughout the loan's tenure.
    "The teacher, the students, and the principal, who had all gathered in the auditorium for the emergency meeting, listened intently as the superintendent outlined the district's new safety protocols."
]  


In [11]:

# Function to simplify a sentence
def simplify(text):
    prompt = (
    "You are an assistant that rewrites technical sentences in plain English for students and non-experts. From this sentence, identify all the nouns, and then provide one overarching common noun that could represent the whole group. Use this common noun in the simplified sentence\n\n"
    f"Sentence: {text.strip()}\n"
    "Simplified:"
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        top_p=0.9,
        top_k=50,
        temperature=0.7,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id or tokenizer.pad_token_id
    )


    output = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
    simplified = output.split("Simplified:")[-1].strip()
    return simplified


In [12]:
# Loop through and simplify each sentence
print(" Batch Simplification Results:\n")
for i, sentence in enumerate(complex_sentences, 1):
    simplified = simplify(sentence)
    print(f"{i}. Original:\n{sentence}")
    print(f"   Simplified:\n{simplified}")
    
    # Optional: Readability metrics
    print("   📊 Readability:")
    print(f"     - Flesch Reading Ease: {textstat.flesch_reading_ease(simplified):.2f}")
    print(f"     - FK Grade Level: {textstat.flesch_kincaid_grade(simplified):.2f}")
    print(f"     - Gunning Fog: {textstat.gunning_fog(simplified):.2f}")
    print(f"     - Consensus: {textstat.text_standard(simplified)}")
    print("-" * 80)

 Batch Simplification Results:

1. Original:
The teacher, the students, and the principal, who had all gathered in the auditorium for the emergency meeting, listened intently as the superintendent outlined the district's new safety protocols.
   Simplified:
People (which includes the teacher, students, principal, auditorium, emergency meeting participants, superintendent, and district) gathered in the auditorium to listen to the superintendent outline the district's new safety protocols.
   📊 Readability:
     - Flesch Reading Ease: -17.19
     - FK Grade Level: 22.90
     - Gunning Fog: 19.88
     - Consensus: 22nd and 23rd grade
--------------------------------------------------------------------------------
