In [65]:
!pip install unsloth transformers trl



In [66]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template, standardize_sharegpt

In [67]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct",
    max_seq_length=2048,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.9.11: Fast Llama patching. Transformers: 4.56.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [68]:
model = FastLanguageModel.get_peft_model(
    model, r=16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

In [69]:
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

In [70]:
dataset = load_dataset("mlabonne/FineTome-100k", split="train")

In [71]:
dataset = standardize_sharegpt(dataset)

In [72]:
dataset

Dataset({
    features: ['conversations', 'source', 'score'],
    num_rows: 100000
})

In [73]:
dataset[1]

{'conversations': [{'content': 'Explain how recursion works and provide a recursive function in Python that calculates the factorial of a given number.',
   'role': 'user'},
  {'content': "Recursion is a programming technique where a function calls itself to solve a problem. It breaks down a complex problem into smaller, more manageable subproblems until a base case is reached. The base case is a condition where the function does not call itself, but instead returns a specific value or performs a specific action.\n\nIn the case of calculating the factorial of a number, recursion can be used to break down the problem into simpler subproblems. The factorial of a non-negative integer n is the product of all positive integers less than or equal to n.\n\nHere is a recursive function in Python that calculates the factorial of a given number:\n\n```python\ndef factorial(n):\n    # Base case: factorial of 0 or 1 is 1\n    if n == 0 or n == 1:\n        return 1\n    # Recursive case: factorial 

In [74]:
datset = dataset.map(
    lambda examples:{
        "text": [
            tokenizer.apply_chat_template(convo, tokenize=False)
            for convo in examples["conversations"]
        ]
    },
    batched=True
)

In [75]:
dataset

Dataset({
    features: ['conversations', 'source', 'score'],
    num_rows: 100000
})

In [76]:
dataset[0]

{'conversations': [{'content': 'Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.',
   'role': 'user'},
  {'content': 

In [77]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")

In [84]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct")

dataset = dataset.map(
    lambda examples:{
        "text": [
            tokenizer.apply_chat_template(convo, tokenize=False)
            for convo in examples["conversations"]
        ]
    },
    batched=True
)

def formatting_func(examples):
    return examples["text"]

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir="outputs",
    ),
    formatting_func=formatting_func,
)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/100000 [00:00<?, ? examples/s]

In [85]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 11,272,192 of 1,247,086,592 (0.90% trained)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mayanpanja348[0m ([33mayanpanja348-indian-institute-of-technology-patna[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


Step,Training Loss
1,1.6073
2,2.0307
3,1.5507
4,1.5428
5,1.478
6,1.5853
7,1.0113
8,1.661
9,1.3598
10,1.3958


TrainOutput(global_step=60, training_loss=1.1610590428113938, metrics={'train_runtime': 206.1845, 'train_samples_per_second': 2.328, 'train_steps_per_second': 0.291, 'total_flos': 1902805765840896.0, 'train_loss': 1.1610590428113938, 'epoch': 0.0048})

In [86]:
model.save_pretrained("finetuned_model")

In [87]:
inference_model, inference_tokenizer = FastLanguageModel.from_pretrained(
    model_name="finetuned_model",
    max_seq_length=2048,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.9.11: Fast Llama patching. Transformers: 4.56.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [89]:
text_prompts = [
    "what are the key principles of investement?"
]
for prompt in text_prompts:
  formatted_prompts = inference_tokenizer.apply_chat_template([{
      "role": "user",
      "content": prompt
  }], tokenize=False)

model_input = inference_tokenizer(formatted_prompts, return_tensors="pt").to("cuda")
generated_ids = inference_model.generate(
    **model_input,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True,
    pad_token_id=inference_tokenizer.pad_token_id
)
response = inference_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)

system

Cutting Knowledge Date: December 2023
Today Date: 02 Oct 2025

user

what are the key principles of investement?assistant

Key principles of investment include:

1. Diversification: Diversifying your investment portfolio by spreading investments across different asset classes, sectors, and geographic regions can help manage risk and increase potential returns.
2. Long-term perspective: Holding onto investments for a long time can help ride out market fluctuations and potentially lead to higher returns.
3. Risk management: Identifying and managing risk can help minimize losses and maximize gains.
4. Diversification of income streams: Having multiple income streams can help reduce dependence on a single source of income and provide a safety net.
5. Low-cost investing: Using low-cost index funds or ETFs can help reduce investment costs and increase potential returns.
6. Tax optimization: Understanding tax implications and optimizing tax strategies can help minimize tax liabilities