In [None]:
!pip install unsloth==2025.9.6
!pip install --no-deps trl==0.8.6 xformers accelerate peft bitsandbytes

In [2]:
import unsloth
from unsloth import FastLanguageModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
max_seq_length = 4096
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3.1-8b-instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

==((====))==  Unsloth 2025.9.6: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Standard rank for QA reasoning
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
)

Unsloth 2025.9.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
from datasets import load_dataset
dataset = load_dataset("squad", split="train[:3000]")

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

plain_text/validation-00000-of-00001.par(…):   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [7]:
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 3000
})

In [8]:
def format_qa_dataset(examples):
    """Format examples for question answering task"""
    texts = []

    for i in range(len(examples["question"])):
        context = examples["context"][i]
        question = examples["question"][i]
        answer = examples["answers"][i]["text"][0] if examples["answers"][i]["text"] else "No answer available"

        # Create comprehensive QA format
        qa_prompt = f"""Read the following passage carefully and answer the question based on the information provided.

Passage:
{context}

Question: {question}

Answer:"""

        messages = [
            {"role": "user", "content": qa_prompt},
            {"role": "assistant", "content": answer}
        ]

        # Apply Llama-3.1 chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False,
        )

        texts.append(text)

    return {"text": texts}

In [9]:
formatted_dataset = dataset.map(
    format_qa_dataset,
    batched=True,
    remove_columns=dataset.column_names,
    desc="Formatting for question answering"
)

Formatting for question answering:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [10]:
formatted_dataset

Dataset({
    features: ['text'],
    num_rows: 3000
})

In [11]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    per_device_train_batch_size=1,  # Smaller batch for 8B model with long contexts
    gradient_accumulation_steps=8,  # Effective batch size: 8
    warmup_steps=10,
    max_steps=120,  # Sufficient for QA pattern learning
    learning_rate=1e-4,  # Lower LR for stable reasoning training
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=5,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=3407,
    output_dir="llama31_qa_expert",
    save_strategy="no",
)

In [12]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    packing=False,  # Preserve QA structure
)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

In [13]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,000 | Num Epochs = 1 | Total steps = 120
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabeshith[0m ([33mabeshith-dr-m-g-r-educational-and-research-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,2.6859
10,2.2643
15,1.7939
20,1.5912
25,1.5859
30,1.587
35,1.5409
40,1.6132
45,1.5413
50,1.4771


TrainOutput(global_step=120, training_loss=1.5657019774119059, metrics={'train_runtime': 1212.6069, 'train_samples_per_second': 0.792, 'train_steps_per_second': 0.099, 'total_flos': 1.092680788549632e+16, 'train_loss': 1.5657019774119059, 'epoch': 0.32})

In [14]:
model.save_pretrained("llama31_qa_master")
tokenizer.save_pretrained("llama31_qa_master")

('llama31_qa_master/tokenizer_config.json',
 'llama31_qa_master/special_tokens_map.json',
 'llama31_qa_master/chat_template.jinja',
 'llama31_qa_master/tokenizer.json')

In [None]:
FastLanguageModel.for_inference(model)

In [16]:
def answer_question(context, question, max_tokens=200):
    """Answer questions based on provided context"""

    qa_prompt = f"""Read the following passage carefully and answer the question based on the information provided.

Passage:
{context}

Question: {question}

Answer:"""

    messages = [
        {"role": "user", "content": qa_prompt}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = tokenizer([text], return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.3,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    generated_tokens = outputs[0][len(inputs['input_ids'][0]):]
    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)

    return response.strip()

In [17]:
science_context = """
Photosynthesis is the process by which plants, algae, and certain bacteria convert light energy, usually from the sun, into chemical energy stored in glucose molecules. This process occurs in two main stages: the light-dependent reactions (also called the photo part) and the light-independent reactions (also called the Calvin cycle or dark reactions). During the light-dependent reactions, which occur in the thylakoid membranes of chloroplasts, chlorophyll and other pigments absorb light energy and use it to split water molecules, releasing oxygen as a byproduct and producing ATP and NADPH. The light-independent reactions take place in the stroma of chloroplasts, where carbon dioxide from the atmosphere is fixed into organic molecules using the ATP and NADPH produced in the first stage. The overall equation for photosynthesis is: 6CO2 + 6H2O + light energy → C6H12O6 + 6O2. This process is crucial for life on Earth as it produces oxygen and serves as the foundation of most food chains.
"""

science_questions = [
    "What are the two main stages of photosynthesis?",
    "Where do the light-dependent reactions occur?",
    "What is the overall chemical equation for photosynthesis?",
    "Why is photosynthesis important for life on Earth?",
    "What molecules are produced during the light-dependent reactions?"
]

print("🔬 SCIENTIFIC ARTICLE COMPREHENSION:")
for i, question in enumerate(science_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    answer = answer_question(science_context, question, 150)
    print(f"💡 Answer: {answer}")
    print("-" * 60)

🔬 SCIENTIFIC ARTICLE COMPREHENSION:

❓ Question 1: What are the two main stages of photosynthesis?
💡 Answer: the light-dependent reactions and the light-independent reactions
------------------------------------------------------------

❓ Question 2: Where do the light-dependent reactions occur?
💡 Answer: thylakoid membranes of chloroplasts
------------------------------------------------------------

❓ Question 3: What is the overall chemical equation for photosynthesis?
💡 Answer: 6CO2 + 6H2O + light energy → C6H12O6 + 6O2
------------------------------------------------------------

❓ Question 4: Why is photosynthesis important for life on Earth?
💡 Answer: it produces oxygen and serves as the foundation of most food chains
------------------------------------------------------------

❓ Question 5: What molecules are produced during the light-dependent reactions?
💡 Answer: ATP and NADPH
------------------------------------------------------------


In [18]:
history_context = """
The Industrial Revolution, which began in Britain in the late 18th century and spread to other parts of Europe and North America, marked a major turning point in human history. It transformed societies from agricultural and handicraft economies to machine-based manufacturing. Key innovations included the steam engine, developed by James Watt in 1769, which revolutionized transportation and manufacturing. The textile industry was among the first to be mechanized, with inventions like the spinning jenny and power loom dramatically increasing production efficiency. Factory systems replaced traditional cottage industries, leading to urbanization as people moved from rural areas to cities for work. While the Industrial Revolution brought economic growth and technological advancement, it also created new social problems including poor working conditions, child labor, and environmental pollution. The period saw the emergence of new social classes: the industrial bourgeoisie (factory owners) and the industrial proletariat (factory workers), setting the stage for later social and political movements.
"""

history_questions = [
    "When and where did the Industrial Revolution begin?",
    "Who developed the steam engine and in what year?",
    "What were some of the negative consequences of industrialization?",
    "How did the Industrial Revolution change social structures?",
    "Which industry was among the first to be mechanized?"
]

print("\n📜 HISTORICAL DOCUMENT ANALYSIS:")
for i, question in enumerate(history_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    answer = answer_question(history_context, question, 150)
    print(f"📖 Answer: {answer}")
    print("-" * 60)


📜 HISTORICAL DOCUMENT ANALYSIS:

❓ Question 1: When and where did the Industrial Revolution begin?
📖 Answer: late 18th century and Britain
------------------------------------------------------------

❓ Question 2: Who developed the steam engine and in what year?
📖 Answer: James Watt in 1769
------------------------------------------------------------

❓ Question 3: What were some of the negative consequences of industrialization?
📖 Answer: poor working conditions, child labor, and environmental pollution
------------------------------------------------------------

❓ Question 4: How did the Industrial Revolution change social structures?
📖 Answer: It created new social classes
------------------------------------------------------------

❓ Question 5: Which industry was among the first to be mechanized?
📖 Answer: textile industry
------------------------------------------------------------


In [19]:
technical_context = """
Machine Learning (ML) is a subset of artificial intelligence (AI) that enables computers to learn and improve from experience without being explicitly programmed. There are three main types of machine learning: supervised learning, unsupervised learning, and reinforcement learning. Supervised learning uses labeled training data to learn a mapping from inputs to outputs, common algorithms include linear regression, decision trees, and neural networks. Unsupervised learning finds hidden patterns in data without labeled examples, including clustering algorithms like K-means and dimensionality reduction techniques like Principal Component Analysis (PCA). Reinforcement learning involves an agent learning to make decisions by interacting with an environment and receiving rewards or penalties. The machine learning pipeline typically involves data collection, data preprocessing, feature selection, model training, evaluation, and deployment. Popular programming languages for ML include Python and R, with libraries such as scikit-learn, TensorFlow, and PyTorch providing powerful tools for implementing ML algorithms. Cross-validation is a crucial technique used to assess model performance and prevent overfitting.
"""

technical_questions = [
    "What are the three main types of machine learning?",
    "What is the difference between supervised and unsupervised learning?",
    "What programming languages and libraries are commonly used in ML?",
    "What is cross-validation and why is it important?",
    "What are the typical steps in a machine learning pipeline?"
]

print("\n💻 TECHNICAL MANUAL Q&A:")
for i, question in enumerate(technical_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    answer = answer_question(technical_context, question, 180)
    print(f"🔧 Answer: {answer}")
    print("-" * 60)


💻 TECHNICAL MANUAL Q&A:

❓ Question 1: What are the three main types of machine learning?
🔧 Answer: supervised learning, unsupervised learning, and reinforcement learning
------------------------------------------------------------

❓ Question 2: What is the difference between supervised and unsupervised learning?
🔧 Answer: Supervised learning uses labeled training data to learn a mapping from inputs to outputs, while unsupervised learning finds hidden patterns in data without labeled examples.
------------------------------------------------------------

❓ Question 3: What programming languages and libraries are commonly used in ML?
🔧 Answer: Python and R
------------------------------------------------------------

❓ Question 4: What is cross-validation and why is it important?
🔧 Answer: Cross-validation is a crucial technique used to assess model performance and prevent overfitting.
------------------------------------------------------------

❓ Question 5: What are the typical ste

In [20]:
legal_context = """
A contract is a legally binding agreement between two or more parties that creates mutual obligations enforceable by law. For a contract to be valid, it must contain several essential elements: offer, acceptance, consideration, capacity, and legality. An offer is a proposal made by one party (the offeror) to another party (the offeree) indicating willingness to enter into a contract. Acceptance occurs when the offeree agrees to the terms of the offer. Consideration refers to something of value exchanged between the parties, which can be money, goods, services, or a promise to do or not do something. Capacity means that all parties must have the legal ability to enter into a contract, typically requiring them to be of legal age and sound mind. Legality requires that the contract's purpose and terms must not violate any laws or public policy. Contracts can be express (clearly stated terms) or implied (inferred from conduct). Breach of contract occurs when one party fails to fulfill their obligations, and remedies may include damages, specific performance, or contract cancellation.
"""

legal_questions = [
    "What are the essential elements required for a valid contract?",
    "What is the difference between an offer and acceptance?",
    "What does 'consideration' mean in contract law?",
    "What happens when there is a breach of contract?",
    "What is the difference between express and implied contracts?"
]

print("\n⚖️ LEGAL DOCUMENT COMPREHENSION:")
for i, question in enumerate(legal_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    answer = answer_question(legal_context, question, 200)
    print(f"⚖️ Answer: {answer}")
    print("-" * 60)



⚖️ LEGAL DOCUMENT COMPREHENSION:

❓ Question 1: What are the essential elements required for a valid contract?
⚖️ Answer: offer, acceptance, consideration, capacity, and legality
------------------------------------------------------------

❓ Question 2: What is the difference between an offer and acceptance?
⚖️ Answer: offer is a proposal made by one party (the offeror) to another party (the offeree) indicating willingness to enter into a contract, while acceptance occurs when the offeree agrees to the terms of the offer.
------------------------------------------------------------

❓ Question 3: What does 'consideration' mean in contract law?
⚖️ Answer: something of value exchanged between the parties
------------------------------------------------------------

❓ Question 4: What happens when there is a breach of contract?
⚖️ Answer: damages, specific performance, or contract cancellation
------------------------------------------------------------

❓ Question 5: What is the differ

In [21]:
medical_context = """
Diabetes mellitus is a group of metabolic disorders characterized by high blood sugar levels over a prolonged period. There are primarily two types: Type 1 diabetes, which occurs when the pancreas produces little or no insulin due to autoimmune destruction of insulin-producing beta cells, typically developing in childhood or adolescence; and Type 2 diabetes, which occurs when the body becomes resistant to insulin or doesn't produce enough insulin, usually developing in adulthood and often associated with obesity and sedentary lifestyle. Common symptoms include frequent urination, increased thirst, unexplained weight loss, fatigue, and blurred vision. Complications of uncontrolled diabetes can include cardiovascular disease, nerve damage (neuropathy), kidney damage (nephropathy), eye damage (retinopathy), and poor wound healing. Management strategies include blood glucose monitoring, medication (insulin for Type 1, various medications for Type 2), dietary modifications focusing on carbohydrate control, regular physical exercise, and lifestyle changes. Prevention of Type 2 diabetes involves maintaining a healthy weight, regular exercise, and a balanced diet rich in whole grains, fruits, and vegetables while limiting processed foods and sugary drinks.
"""

medical_questions = [
    "What is the main difference between Type 1 and Type 2 diabetes?",
    "What are the common symptoms of diabetes?",
    "What complications can arise from uncontrolled diabetes?",
    "How can Type 2 diabetes be prevented?",
    "What are the key management strategies for diabetes?"
]

print("\n🏥 MEDICAL INFORMATION Q&A:")
for i, question in enumerate(medical_questions, 1):
    print(f"\n❓ Question {i}: {question}")
    answer = answer_question(medical_context, question, 200)
    print(f"🩺 Answer: {answer}")
    print("-" * 60)


🏥 MEDICAL INFORMATION Q&A:

❓ Question 1: What is the main difference between Type 1 and Type 2 diabetes?
🩺 Answer: Type 1 diabetes is caused by the body not producing enough insulin, while Type 2 diabetes is caused by the body not producing enough insulin or the body not being able to use insulin properly.
------------------------------------------------------------

❓ Question 2: What are the common symptoms of diabetes?
🩺 Answer: frequent urination, increased thirst, unexplained weight loss, fatigue, and blurred vision
------------------------------------------------------------

❓ Question 3: What complications can arise from uncontrolled diabetes?
🩺 Answer: cardiovascular disease, nerve damage (neuropathy), kidney damage (nephropathy), eye damage (retinopathy), and poor wound healing
------------------------------------------------------------

❓ Question 4: How can Type 2 diabetes be prevented?
🩺 Answer: maintaining a healthy weight, regular exercise, and a balanced diet rich in

In [22]:
reasoning_context = """
Climate change refers to long-term shifts in global temperatures and weather patterns. While climate variations occur naturally, scientific evidence shows that human activities have been the dominant driver of climate change since the mid-20th century. The primary cause is the increased concentration of greenhouse gases in the atmosphere, particularly carbon dioxide from burning fossil fuels (coal, oil, and gas), deforestation, and industrial processes. These gases trap heat from the sun, leading to global warming. The effects include rising global temperatures, melting ice caps and glaciers, rising sea levels, changing precipitation patterns, more frequent extreme weather events, and shifts in ecosystems. The Intergovernmental Panel on Climate Change (IPCC) reports that global temperatures have risen by approximately 1.1°C since pre-industrial times. Mitigation strategies include transitioning to renewable energy sources, improving energy efficiency, protecting and restoring forests, developing carbon capture technologies, and implementing policies like carbon pricing. Adaptation measures involve building resilient infrastructure, developing drought-resistant crops, and preparing communities for climate impacts.
"""

reasoning_questions = [
    "According to the passage, what evidence supports the claim that human activities are the main cause of recent climate change?",
    "How do greenhouse gases contribute to global warming, and what are the main sources mentioned?",
    "What is the relationship between the effects mentioned and the potential need for both mitigation and adaptation strategies?",
    "Based on the information provided, why might both mitigation and adaptation be necessary rather than focusing on just one approach?"
]

print("\n🧠 COMPLEX REASONING QUESTIONS:")
for i, question in enumerate(reasoning_questions, 1):
    print(f"\n❓ Reasoning Question {i}: {question}")
    answer = answer_question(reasoning_context, question, 250)
    print(f"🤔 Analysis: {answer}")
    print("-" * 60)


🧠 COMPLEX REASONING QUESTIONS:

❓ Reasoning Question 1: According to the passage, what evidence supports the claim that human activities are the main cause of recent climate change?
🤔 Analysis: scientific evidence
------------------------------------------------------------

❓ Reasoning Question 2: How do greenhouse gases contribute to global warming, and what are the main sources mentioned?
🤔 Analysis: greenhouse gases trap heat from the sun, leading to global warming. The primary cause is the increased concentration of greenhouse gases in the atmosphere, particularly carbon dioxide from burning fossil fuels (coal, oil, and gas), deforestation, and industrial processes.
------------------------------------------------------------

❓ Reasoning Question 3: What is the relationship between the effects mentioned and the potential need for both mitigation and adaptation strategies?
🤔 Analysis: mitigation and adaptation
------------------------------------------------------------

❓ Reason