In [2]:
!pip install --upgrade transformers
!pip install  datasets torch trl peft bitsandbytes accelerate

Collecting transformers
  Downloading transformers-4.45.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.45.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m82.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m71.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.19.1
    Uninstalling tokenizers-0.19.1:
      Successfully uninstalled tokenizers-0.1

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import BitsAndBytesConfig

In [7]:

# Define the model name
model_name = "meta-llama/Llama-3.2-3B-Instruct"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [8]:

# Generate text
inputs = tokenizer("how many r are present in strawberry", return_tensors="pt")
outputs = model.generate(inputs['input_ids'], max_length=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


how many r are present in strawberry
There are 3 R's present in the word "strawberry". 
However, I think there might be some confusion, as the word "strawberry" actually has 2 R's.


Collecting peft
  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.13.0-py3-none-any.whl (322 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.13.0
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.0-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.0-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.44.0


In [9]:

model = prepare_model_for_kbit_training(model)

# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA to the model
model = get_peft_model(model, peft_config)


In [10]:


dataset = load_dataset("SkunkworksAI/reasoning-0.01", split="train")

# Define input and output templates
input_template = """Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
{instruction}

### Response:"""

output_template = """Reasoning Chain:
{reasoning_chain}

Reasoning:
{reasoning}

Output:
{output}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    reasoning_chains = examples["reasoning_chains"]
    reasonings = examples["reasoning"]
    outputs = examples["output"]

    formatted_inputs = []
    formatted_outputs = []

    for instruction, reasoning_chain, reasoning, output in zip(instructions, reasoning_chains, reasonings, outputs):
        # Format the input (instruction only)
        formatted_input = input_template.format(instruction=instruction)
        formatted_inputs.append(formatted_input)

        # Convert reasoning_chain from list to string
        chain_str = "\n".join([f"Step {i+1}: {step['thought']}" for i, step in enumerate(reasoning_chain)])

        # Format the output (reasoning chain, reasoning, and output)
        formatted_output = output_template.format(
            reasoning_chain=chain_str,
            reasoning=reasoning,
            output=output
        ) + EOS_TOKEN
        formatted_outputs.append(formatted_output)

    return {"input": formatted_inputs, "output": formatted_outputs}

# Apply the formatting function to the dataset
processed_dataset = dataset.map(formatting_prompts_func, batched=True, remove_columns=dataset.column_names)

# You can print a sample to verify the formatting
print("Input:")
print(processed_dataset["input"][0])
print("\nOutput:")
print(processed_dataset["output"][0])

README.md:   0%|          | 0.00/847 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/56.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/29857 [00:00<?, ? examples/s]

Map:   0%|          | 0/29857 [00:00<?, ? examples/s]

Input:
Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
If a die is rolled three times, what is the probability of getting a sum of 11? None

### Response:

Output:
Reasoning Chain:
Step 1: Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.
Step 2: Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.
Step 3: Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.
Step 4: Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.
Step 5: Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.

Reasoning:
1. Understand the problem: We need to find the probability of getting a sum 

In [11]:
def combine_input_output(example):
    return {"text": example["input"] + example["output"]}

processed_dataset = processed_dataset.map(combine_input_output)

Map:   0%|          | 0/29857 [00:00<?, ? examples/s]

In [12]:
max_seq_length=2048
# Training setup
training_args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=60,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    optim="paged_adamw_8bit",  # Changed to paged_adamw_8bit for quantized models
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
)

# Use SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/29857 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs


In [13]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
1,0.8783
2,0.7719
3,0.7152
4,0.835
5,0.9458
6,0.5962
7,0.6482
8,0.5146
9,0.5978
10,0.4881


TrainOutput(global_step=60, training_loss=0.5624637454748154, metrics={'train_runtime': 1183.774, 'train_samples_per_second': 0.405, 'train_steps_per_second': 0.051, 'total_flos': 9430036131041280.0, 'train_loss': 0.5624637454748154, 'epoch': 0.016076093509277245})

In [3]:
def generate_response(prompt, max_new_tokens=512):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
prompt = """Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
Explain the concept of gravity.

### Response:"""

response = generate_response(prompt)
print(response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
Explain the concept of gravity.

### Response: 
The concept of gravity is a fundamental force of nature that attracts objects with mass towards each other. It was first described by Sir Isaac Newton in his groundbreaking work 'Philosophiæ Naturalis Principia Mathematica' in 1687. Newton's law of universal gravitation states that every point mass attracts every other point mass by a force acting along the line intersecting both points.

**Reasoning Chain:**

1.  **Definition of Gravity:** The first step in understanding gravity is to define what it is. Gravity is a fundamental force of nature that attracts objects with mass towards each other. It is a universal force that affects everything with mass, from the smallest subatomic particles to the largest galaxies.
2.  **Newton's Law of Universal Gravitation:** Newton's law of universal gravitat

In [8]:
def generate_response(prompt, max_new_tokens=512):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
prompt = """Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
How many r present in strawberry.

### Response:"""

response = generate_response(prompt)
print(response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Below is an instruction that describes a task. Generate a reasoning chain, provide reasoning, and give an appropriate response.

### Instruction:
How many r present in strawberry.

### Response: 

Here is the reasoning chain and response:

#### Reasoning Chain:

1. **Understanding the task**: The task is to identify the number of "r" in the word "strawberry".
2. **Knowledge of the word**: The word "strawberry" is composed of the letters S-T-R-A-W-B-E-R-R-Y.
3. **Focusing on the letter "R"**: We need to identify the letter "R" in the word "strawberry".
4. **Counting the "R"s**: We count the number of "R"s in the word "strawberry", which is 3.

#### Response:
The number of "r" in the word "strawberry" is 3.


In [1]:

# Online saving (uncomment and fill in your Hugging Face username and token)
model.push_to_hub("PandurangMopgar/Llama-3.2-3B-Instruct-reasoning-lora", token = "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")
tokenizer.push_to_hub("PandurangMopgar/Llama-3.2-3B-Instruct-reasoning-lora", token = "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")

NameError: name 'model' is not defined

[link text](https://)