In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import nbformat as nbf
from pathlib import Path
from copy import deepcopy

# ✅ Update path for your new notebook
in_path = Path("/content/drive/MyDrive/Colab Notebooks/BERT_base_Supervised_Fine_Tuning.ipynb")
out_path = in_path.with_name(in_path.stem + "_clean.ipynb")

# Read notebook
with open(in_path, encoding="utf-8") as f:
    nb = nbf.read(f, as_version=4)

# 1) Drop broken widgets metadata
if "widgets" in nb.metadata:
    del nb.metadata["widgets"]

# 2) Strip widget-view outputs (cause GitHub render errors)
for cell in nb.cells:
    if cell.get("outputs"):
        new_outputs = []
        for o in cell["outputs"]:
            d = o.get("data", {})
            if isinstance(d, dict) and "application/vnd.jupyter.widget-view+json" in d:
                continue
            new_outputs.append(o)
        cell["outputs"] = new_outputs
    cell.get("metadata", {}).pop("execution", None)  # optional cleanup

# Save cleaned copy
nbf.write(nb, out_path)
print("✅ Cleaned notebook saved to:", out_path)


In [None]:
pip install unsloth transformers trl

Collecting unsloth
  Downloading unsloth-2025.8.9-py3-none-any.whl.metadata (52 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting trl
  Downloading trl-0.21.0-py3-none-any.whl.metadata (11 kB)
Collecting unsloth_zoo>=2025.8.8 (from unsloth)
  Downloading unsloth_zoo-2025.8.8-py3-none-any.whl.metadata (9.4 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.28-py3-none-any.whl.metadata (11 kB)
Collecting datasets<4.0.0,>=3.4.1 (from unsloth)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting cut_cross_en

In [None]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template, standardize_sharegpt

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name =  "unsloth/Llama-3.2-1B-Instruct",
    max_seq_length = 2048,
    load_in_4bit = True
)


==((====))==  Unsloth 2025.8.9: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model, r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

Unsloth 2025.8.9 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [None]:
tokenizer = get_chat_template(tokenizer,chat_template= "llama-3.1")

In [None]:
df = load_dataset("mlabonne/FineTome-100k")

In [None]:
dataset = standardize_sharegpt(df['train'])

In [None]:
dataset

Dataset({
    features: ['conversations', 'source', 'score'],
    num_rows: 100000
})

In [None]:
dataset = dataset.map(
    lambda examples: {
        "text":[
            tokenizer.apply_chat_template(convo, tokenize = False)
            for convo in examples["conversations"]
        ]
    },
    batched=True
)

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    # dataset_text_field = "text", # Commenting this out as formatting_func is required
    max_seq_length = 2048,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "outputs",
        optim= "paged_adamw_8bit"
    ),
    formatting_func=lambda example: example["text"],
)

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 11,272,192 of 1,247,086,592 (0.90% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myashgamerz0007[0m ([33myashgamerz0007-sgu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.5799
2,2.0022
3,1.5118
4,1.5239
5,1.4654
6,1.5744
7,1.0024
8,1.6603
9,1.3621
10,1.3931


TrainOutput(global_step=60, training_loss=1.1570710917313893, metrics={'train_runtime': 297.1206, 'train_samples_per_second': 1.616, 'train_steps_per_second': 0.202, 'total_flos': 1902923896037376.0, 'train_loss': 1.1570710917313893})

In [None]:
model.save_pretrained("finetune")

In [None]:
interface_model, interface_tokenizer = FastLanguageModel.from_pretrained(
    model_name="./finetune",
    max_seq_length=2048,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.8.9: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
test_prompts = {
    "what are the key principles of investments"
}
for prompt in test_prompts:
  formatted_prompt = interface_tokenizer.apply_chat_template([{
      "role": "user",
      "content" : prompt
      }], tokenize=False)

  model_input = interface_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")

  generated_ids = interface_model.generate(
      **model_input,
      # max_temprature = 0.7, # Removed unsupported argument
      # do_samples=True, # Removed unsupported argument
      pad_token_id =interface_tokenizer.pad_token_id
  )
  response = interface_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
  print(response)

system

Cutting Knowledge Date: December 2023
Today Date: 28 Aug 2025

user

what are the key principles of investmentsassistant

Key principles of investments include:

1. Diversification: Spreading investments across different asset classes, sectors, and geographic regions to minimize risk and maximize potential returns.
2. Asset allocation: Allocating investments to different asset classes based on risk tolerance, investment horizon, and financial goals.
3. Risk management: Identifying and mitigating potential risks, such as market volatility, liquidity, and credit risk.
4. Downtime management: Managing potential losses during market downturns or periods of low interest rates.
5. Regular portfolio rebalancing: Periodically reviewing and adjusting the investment portfolio to maintain its target asset allocation and risk level.
6. Tax efficiency: Minimizing taxes on investment returns by using tax-efficient investment strategies and strategies to reduce tax liabilities.
7. ESG (Enviro