In [None]:
import os
import json
import torch
import pandas as pd

print(f"Torch version:{torch.__version__}")
print(f"Cuda available:{torch.cuda.is_available()}")

# Load data from CSV files
try:
    # Try reading with 'latin-1' encoding
    questions_df = pd.read_csv('/content/drive/MyDrive/00-numerical/data/final_questions.csv', encoding='latin-1')
    answers_df = pd.read_csv('/content/drive/MyDrive/00-numerical/data/final_answers.csv', encoding='latin-1')

    # Assuming the first column of each CSV contains the data
    questions = questions_df.iloc[:, 0].tolist()
    answers = answers_df.iloc[:, 0].tolist()

    # Create a dataset in the desired format for fine-tuning
    dataset = []
    for q, a in zip(questions, answers):
        dataset.append({
            "instruction": "Answer the numerical linear algebra question.",
            "input": q,
            "output": a
        })

    os.makedirs("data", exist_ok=True)
    with open("/content/drive/MyDrive/00-numerical/data/numerical_algebra_dataset.jsonl", "w") as f:
        for example in dataset:
            f.write(json.dumps(example) + "\n")

    print("Numerical linear algebra dataset saved.")

except FileNotFoundError:
    print("Error: Make sure 'final_questions.csv' and 'final_answers.csv' are in the specified directory.")
except Exception as e:
    print(f"An error occurred: {e}")

Torch version:2.7.1+cu126
Cuda available:True
Numerical linear algebra dataset saved.


In [None]:
pip install unsloth transformers datasets accelerate



In [None]:
from unsloth import FastLanguageModel
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer
from datasets import load_dataset
import torch
import math

# Load a base model
model_name = "unsloth/DeepSeek-R1-0528-Qwen3-8B"

# Load model and tokenizer with 2048 token context
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=4096,
    dtype=None,
    load_in_4bit=True,
)

# Set pad token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Apply LoRA for fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "v_proj", "k_proj"],
    lora_alpha=32,  #  More expressivity
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

# Load and split dataset (10% eval)
dataset = load_dataset("json", data_files="/content/drive/MyDrive/00-numerical/data/numerical_algebra_dataset.jsonl", split="train")
dataset = dataset.train_test_split(test_size=0.1)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# Tokenization function
def tokenize_fn(example):
    instruction = example.get("instruction", "")
    input_text = example.get("input", "")
    output = example.get("output", "")

    prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output}"

    tokenized = tokenizer(
        prompt,
        truncation=True,
        padding="max_length",
        max_length=4096,
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Tokenize datasets
tokenized_train = train_dataset.map(tokenize_fn, remove_columns=train_dataset.column_names)
tokenized_eval = eval_dataset.map(tokenize_fn, remove_columns=eval_dataset.column_names)

# Training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/00-numerical/finetuned_model_max_accuracy",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,         # Effective batch size = 8
    learning_rate=1e-4,                    # Smaller LR for stable fine-tuning
    logging_steps=10,
    num_train_epochs=5,                    # More epochs for small dataset
    bf16=torch.cuda.is_bf16_supported(),
    fp16=not torch.cuda.is_bf16_supported(),
    optim="adamw_torch",
    lr_scheduler_type="cosine",
    warmup_steps=50,                       # Warmup for stability
    report_to="none",
    save_total_limit=1,
    save_strategy="epoch",
    eval_strategy="epoch", # Corrected argument name
    logging_dir="./logs_finetuned_max_accuracy",
    gradient_checkpointing_kwargs={'use_reentrant': False},
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Trainer
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
)

# Summary
print(f"Model: {model.__class__.__name__}")
print(f"Tokenizer: {tokenizer.__class__.__name__}")
print(f"Train Samples: {len(tokenized_train)} | Eval Samples: {len(tokenized_eval)}")

# Train and save
try:
    trainer.train()
    trainer.save_model(training_args.output_dir)
    tokenizer.save_pretrained(training_args.output_dir)
    print("‚úÖ Training completed and model saved.")

    # Evaluate
    eval_result = trainer.evaluate()
    perplexity = math.exp(eval_result["eval_loss"])
    print(f"üìä Evaluation Perplexity: {perplexity:.2f}")

except Exception as e:
    print(f"‚ùå Training failed: {e}")

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}


==((====))==  Unsloth 2025.7.3: Fast Qwen3 patching. Transformers: 4.53.1.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}


model-00002-of-00002.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.7.3 patched 36 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

  trainer = Trainer(
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 900 | Num Epochs = 5 | Total steps = 565
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 5,308,416 of 8,196,043,776 (0.06% trained)


Model: PeftModelForCausalLM
Tokenizer: LlamaTokenizerFast
Train Samples: 900 | Eval Samples: 100
Unsloth: Will smartly offload gradients to save VRAM!


Epoch,Training Loss,Validation Loss
1,0.341,0.321635
2,0.3187,0.32037
3,0.3185,0.32035
4,0.3201,0.318416
5,0.3183,0.319834


Unsloth: Not an error, but Qwen3ForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}


‚úÖ Training completed and model saved.


üìä Evaluation Perplexity: 1.38
