In [2]:

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset

In [None]:
from huggingface_hub import login

login(token="")


In [4]:
model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#tokenizer = AutoTokenizer.from_pretrained(model_name)

#new 
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token


In [5]:
# Load the dataset from JSON file
input_json_file_name = "cleaned_343_qa_outputs_800_batch16_4bit_30072025.json"
dataset = load_dataset("json", data_files=input_json_file_name)["train"]


In [6]:
# Merge 'instruction' and 'output' as prompt and target
def format_example(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return {"text": prompt}

In [7]:
dataset = dataset.map(format_example)


In [8]:
print(dataset[1])

{'instruction': "Consult your healthcare provider before starting any exercise program. Follow all instructions carefully and consult your doctor before starting any new medication or supplement. Keep your workout routine challenging but safe, and avoid overdoing it. Remember to listen to your body and rest when you feel too tired or sore. If you're unsure about a medical condition or treatment plan, consult your doctor first?.", 'output': 'A healthy, active lifestyle can improve your overall wellness and reduce the risk of chronic diseases like diabetes and heart disease. Follow these tips and stay on top of your exercise and nutrition routines to achieve your goals.', 'text': "### Instruction:\nConsult your healthcare provider before starting any exercise program. Follow all instructions carefully and consult your doctor before starting any new medication or supplement. Keep your workout routine challenging but safe, and avoid overdoing it. Remember to listen to your body and rest wh

In [9]:
# quant_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",  # You can also use "fp4"
#     bnb_4bit_compute_dtype="float16"
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",
#     quantization_config=quant_config,
#     trust_remote_code=True
# )


In [None]:
# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     load_in_4bit=True,  # Optional: enable quantization to save memory
#     device_map="auto",
#     trust_remote_code=True
# )

# # Apply LoRA
# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     target_modules=["q_proj", "v_proj"],  # These depend on the model
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.CAUSAL_LM
# )

# model = get_peft_model(model, lora_config)

# # Tokenize the dataset
# def tokenize_function(example):
#     return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

# tokenized_dataset = dataset.map(tokenize_function, batched=True)

# # Training setup
# training_args = TrainingArguments(
#     output_dir="./lora-llama-output",
#     per_device_train_batch_size=2,
#     gradient_accumulation_steps=4,
#     learning_rate=2e-4,
#     num_train_epochs=3,
#     logging_dir="./logs",
#     fp16=True,
#     save_total_limit=2,
#     logging_steps=10,
#     save_steps=500,
#     report_to="none"
# )

# data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_dataset,
#     tokenizer=tokenizer,
#     data_collator=data_collator
# )

# # Start training
# trainer.train()
#TrainOutput(global_step=129, training_loss=1.575617376224015, metrics={'train_runtime': 199.0944, 'train_samples_per_second': 5.168, 'train_steps_per_second': 0.648, 'total_flos': 3273745332436992.0, 'train_loss': 1.575617376224015, 'epoch': 3.0})
## new_model_name = "tinyllama-qa-343samples-finetuned-30072025"



In [None]:
##### Updated after 1st atempt

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",  # You can also use "fp4"
    bnb_4bit_compute_dtype="float16"
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True
)

# Apply LoRA
lora_config = LoraConfig(
    #r=8, With small data, a smaller r (e.g., 4) might prevent overfitting
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # These depend on the model
    lora_dropout=0.05, #You could try slightly higher (0.1) if overfitting.
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

# Tokenize the dataset
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Training setup
training_args = TrainingArguments(
    output_dir="./lora-llama-output",
    #per_device_train_batch_size=2 with gradient_accumulation_steps=4 is fine (effective batch size 8).
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    #learning_rate=2e-4, 2e-4 is a bit high for fine-tuning on a small dataset. It can cause unstable training or forgetting.
    learning_rate=1e-5,
    #num_train_epochs=3, #With only ~300 samples, 3 epochs might be too little.
    num_train_epochs=20,
    logging_dir="./logs",
    fp16=True,
    save_total_limit=2,
    logging_steps=10,
    save_steps=500,
    report_to="none", 
    #Add weight decay, Helps regularize, reduce overfitting.
    weight_decay=0.01,
    eval_strategy="steps",
    eval_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",

)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    # Used train data also as eval
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Start training
trainer.train()

#old  TrainOutput(global_step=129, training_loss=1.575617376224015, metrics={'train_runtime': 199.0944, 'train_samples_per_second': 5.168, 'train_steps_per_second': 0.648, 'total_flos': 3273745332436992.0, 'train_loss': 1.575617376224015, 'epoch': 3.0})
#this TrainOutput(global_step=860, training_loss=1.7047795207001442, metrics={'train_runtime': 1317.3933, 'train_samples_per_second': 5.207, 'train_steps_per_second': 0.653, 'total_flos': 2.181310005116928e+16, 'train_loss': 1.7047795207001442, 'epoch': 20.0})
#Step	Training Loss	Validation Loss
# 100	1.906100	1.956670
# 200	1.908100	1.838620
# 300	1.737400	1.735574
# 400	1.606700	1.687118
# 500	1.622300	1.645923
# 600	1.599600	1.621207
# 700	1.585900	1.607686
# 800	1.379700	1.601775
#new_model_name = "tinyllama-v2-qa-343samples-finetuned-31072025"



  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss
100,1.9061,1.95667
200,1.9081,1.83862
300,1.7374,1.735574
400,1.6067,1.687118
500,1.6223,1.645923
600,1.5996,1.621207
700,1.5859,1.607686
800,1.3797,1.601775


TrainOutput(global_step=860, training_loss=1.7047795207001442, metrics={'train_runtime': 1317.3933, 'train_samples_per_second': 5.207, 'train_steps_per_second': 0.653, 'total_flos': 2.181310005116928e+16, 'train_loss': 1.7047795207001442, 'epoch': 20.0})

In [None]:
# 

In [12]:
# new_model_name = "tinyllama-qa-343samples-finetuned-30072025"
new_model_name = "tinyllama-v2-qa-343samples-finetuned-31072025"

trainer.save_model(new_model_name)
tokenizer.save_pretrained(new_model_name)


('tinyllama-v2-qa-343samples-finetuned-31072025/tokenizer_config.json',
 'tinyllama-v2-qa-343samples-finetuned-31072025/special_tokens_map.json',
 'tinyllama-v2-qa-343samples-finetuned-31072025/chat_template.jinja',
 'tinyllama-v2-qa-343samples-finetuned-31072025/tokenizer.json')

# Test NEW model

In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
from transformers import pipeline

In [14]:
#Load the Base (Unmodified) Model
base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")


In [15]:
#Load MY Fine-Tuned LoRA Model
peft_model_dir = f"{new_model_name}"  # Your trained adapter folder
config = PeftConfig.from_pretrained(peft_model_dir)

# Load base model and apply LoRA weights
finetuned_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
finetuned_model = PeftModel.from_pretrained(finetuned_model, peft_model_dir)

finetuned_model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=4, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=4, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_feat

In [17]:
from IPython.display import Markdown, display


In [20]:

message = "What is the importance of proper recovery in powerlifting?"
prompt = [{"role": "user", "content": message}]

formatted_prompt = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)

# Base model generation
base_pipe = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
base_output = base_pipe(formatted_prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)[0]["generated_text"]

# Fine-tuned model generation
finetuned_pipe = pipeline("text-generation", model=finetuned_model, tokenizer=tokenizer)
finetuned_output = finetuned_pipe(formatted_prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)[0]["generated_text"]

# Compare
print("ðŸ”¹ Base Model Output:\n", base_output)
print("\nðŸ”¸ Fine-Tuned Model Output:\n", finetuned_output)


Device set to use cuda:0
Device set to use cuda:0


ðŸ”¹ Base Model Output:
 <|user|>
What is the importance of proper recovery in powerlifting?</s>
<|assistant|>
Proper recovery is essential for powerlifting athletes to prevent injury, recover from previous workouts, and prepare for future ones. Powerlifting is a high-intensity sport that requires consistent and intense workouts to maintain muscle mass and strength. Without proper recovery, athletes can suffer from muscle damage, fatigue, and soreness, which can limit their performance and prevent them from achieving their goals.

Proper recovery involves a series of activities designed to help the body repair and recover from the stresses and strains of intense workouts. These activities include rest, stretching, massage, hydration, and nutrition. Rest helps the body recover from the physical and mental toll of training, while stretching helps to loosen up tight muscles and reduce the risk of injury. Massage and hydration help to reduce inflammation and pain, while nutrition helps to 

In [21]:
message = "how to screen for understanding shoulder pain?"
#"What is the 'no pain, no gain' mantra?"
#"What is the importance of proper recovery in powerlifting?"
prompt = [{"role": "user", "content": message}]

formatted_prompt = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)


outputs = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")  # or "cpu"
response_base_model = base_model.generate(
    **outputs,
    max_new_tokens=256,
    do_sample=True,         # enables sampling for variability
    top_p=0.9,               # nucleus sampling
    temperature=0.7,         # randomness control
    pad_token_id=tokenizer.eos_token_id  # avoid pad token issues
)
decoded_base_model = tokenizer.decode(response_base_model[0], skip_special_tokens=True)
print(decoded_base_model)


<|user|>
how to screen for understanding shoulder pain? 
<|assistant|>
To screen for understanding shoulder pain, the following steps can be followed:

1. Identify the symptoms: The first step is to identify the symptoms of shoulder pain. Some common symptoms include:

   - Discomfort or pain in the shoulder joint
   - Slow or painful movements
   - Impaired motion
   - Difficulty with arm rotation
   - Trouble lifting or carrying objects

2. Assess range of motion: Assessing the range of motion (ROM) of the shoulder joint can help identify any limitations or restrictions. A physician or physical therapist can perform the following tests:

   - Shoulder abduction, external rotation, and internal rotation tests
   - Biceps brachii testing (pressure on the biceps tendon)
   - Dips and lateral raises

3. Evaluate strength: Strength assessment can help identify if there are any weaknesses or imbalances in the shoulder muscles. A physician or physical therapist can perform the following tes

In [23]:
outputs_finetunned = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")  # or "cpu"

response_finetunned_model = finetuned_model.generate(
    **outputs_finetunned,
    max_new_tokens=256,
    do_sample=True,         # enables sampling for variability
    top_p=0.9,               # nucleus sampling
    temperature=0.7,         # randomness control
    pad_token_id=tokenizer.eos_token_id  # avoid pad token issues
)
decoded_finetunned_model = tokenizer.decode(response_finetunned_model[0], skip_special_tokens=True)
print(decoded_finetunned_model)


<|user|>
how to screen for understanding shoulder pain? 
<|assistant|>
To screen for understanding shoulder pain, you can use the following techniques:

1. Shoulder pain history: Ask the patient to describe their shoulder pain history, including any previous shoulder injuries or surgeries, and any pain or discomfort they have experienced.

2. Shoulder range of motion (ROM): Ask the patient to perform shoulder ROM exercises, such as arm circles or shoulder rotations, to evaluate their shoulder ROM and determine if they have any shoulder impingement or instability.

3. Shoulder function: Ask the patient to perform shoulder exercises, such as shoulder presses or overhead shoulder pulls, to evaluate their shoulder function and determine if they have any weakness or impingement.

4. Clinical examination: Use a clinical examination to assess the patient's shoulder pain, including their range of motion, shoulder ROM, shoulder function, and any other symptoms or findings.

5. Diagnostic imagin

In [None]:
#pprint(decoded_finetunned_model.split('<|assistant|>')[-1])
display(Markdown(decoded_finetunned_model.split('<|assistant|>')[-1]))


To screen for understanding shoulder pain, you can use the following techniques:

1. Shoulder pain history: Ask the patient to describe their shoulder pain history, including any previous shoulder injuries or surgeries, and any pain or discomfort they have experienced.

2. Shoulder range of motion (ROM): Ask the patient to perform shoulder ROM exercises, such as arm circles or shoulder rotations, to evaluate their shoulder ROM and determine if they have any shoulder impingement or instability.

3. Shoulder function: Ask the patient to perform shoulder exercises, such as shoulder presses or overhead shoulder pulls, to evaluate their shoulder function and determine if they have any weakness or impingement.

4. Clinical examination: Use a clinical examination to assess the patient's shoulder pain, including their range of motion, shoulder ROM, shoulder function, and any other symptoms or findings.

5. Diagnostic imaging: If the patient has previously undergone shoulder surgery or has shoulder pain that is not associated with a specific injury, consider using diagnostic imaging such as X-rays, MRI, or CT scans to

In [None]:
#pprint(decoded_base_model.split('<|assistant|>')[-1])
display(Markdown(decoded_base_model.split('<|assistant|>')[-1]))


To screen for understanding shoulder pain, the following steps can be followed:

1. Identify the symptoms: The first step is to identify the symptoms of shoulder pain. Some common symptoms include:

   - Discomfort or pain in the shoulder joint
   - Slow or painful movements
   - Impaired motion
   - Difficulty with arm rotation
   - Trouble lifting or carrying objects

2. Assess range of motion: Assessing the range of motion (ROM) of the shoulder joint can help identify any limitations or restrictions. A physician or physical therapist can perform the following tests:

   - Shoulder abduction, external rotation, and internal rotation tests
   - Biceps brachii testing (pressure on the biceps tendon)
   - Dips and lateral raises

3. Evaluate strength: Strength assessment can help identify if there are any weaknesses or imbalances in the shoulder muscles. A physician or physical therapist can perform the following tests:

   - Curl test
   - Shoulder-press test
   - Shoulder-chin-

In [47]:
from pprint import pprint

In [27]:
pprint(finetuned_output[finetuned_output.find('<|assistant|>'):])

('<|assistant|>\n'
 'Proper recovery is an essential aspect of powerlifting. It involves resting '
 'and recovering properly after a workout to ensure that the body can recover '
 'from the physical demands of lifting weights and prevent injury. Proper '
 'recovery helps to prevent muscle damage, reduce soreness, and improve '
 'performance. Proper recovery can also help to reduce the risk of injury and '
 'improve overall health and fitness. Powerlifters need to ensure that they '
 'are properly recovering after a workout to ensure that they can perform at '
 'their best in subsequent workouts. Proper recovery techniques include '
 'resting, stretching, and taking time off from lifting to allow the body to '
 'recover. It is also important to maintain a healthy diet and exercise '
 'regularly to ensure that the body is in a state of optimal recovery. Proper '
 'recovery is essential for maintaining peak performance and achieving '
 'successful powerlifting results.')


In [28]:
pprint(finetuned_output[base_output.find('<|assistant|>'):])

('<|assistant|>\n'
 'Proper recovery is an essential aspect of powerlifting. It involves resting '
 'and recovering properly after a workout to ensure that the body can recover '
 'from the physical demands of lifting weights and prevent injury. Proper '
 'recovery helps to prevent muscle damage, reduce soreness, and improve '
 'performance. Proper recovery can also help to reduce the risk of injury and '
 'improve overall health and fitness. Powerlifters need to ensure that they '
 'are properly recovering after a workout to ensure that they can perform at '
 'their best in subsequent workouts. Proper recovery techniques include '
 'resting, stretching, and taking time off from lifting to allow the body to '
 'recover. It is also important to maintain a healthy diet and exercise '
 'regularly to ensure that the body is in a state of optimal recovery. Proper '
 'recovery is essential for maintaining peak performance and achieving '
 'successful powerlifting results.')


In [12]:
from transformers import pipeline


In [13]:
new_model_name_ = new_model_name
generator = pipeline("text-generation", model=new_model_name_, tokenizer=tokenizer)


Device set to use cuda:0


In [14]:
print(generator("Forcing someone with hip anteversion to lift with a technique requires?", max_new_tokens=200)[0]['generated_text'])


Forcing someone with hip anteversion to lift with a technique requires?
Hip Anteversion? I need some advice on a hip anteversion exercise.
Hip Anteversion? I need some advice on a hip anteversion exercise.
Hip Anteposition? I need some advice on a hip anteposition exercise.
Hip Anteposition? I need some advice on a hip anteposition exercise.
Hip Anteversion? I need some advice on a hip anteversion exercise.
Hip Anteposition? I need some advice on a hip anteposition exercise.
Hip Anteversion? I need some advice on a hip anteversion exercise.
Hip Anteposition? I need some advice on a hip anteposition exercise.
Hip Anteversion? I need some advice on a hip anteversion exercise.
Hip Anteposition? I need some advice on a hip anteposition exercise.
Hip Anteversion? I need some advice on a hip anteversion exercise.
H


In [15]:
print(generator("What is the importance of proper recovery in powerlifting?", max_new_tokens=100)[0]['generated_text'])


What is the importance of proper recovery in powerlifting?


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
import torch

# Load the dataset from JSON file
dataset = load_dataset("json", data_files="qa_outputs_100_30072025.json")["train"]

# Merge 'instruction' and 'output' as prompt and target
def format_example(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return {"text": prompt}

dataset = dataset.map(format_example)

# Load tokenizer and model
model_id = "meta-llama/Llama-2-7b-hf"  # Must have access to this model from HuggingFace
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,  # Optional: enable quantization to save memory
    device_map="auto",
    trust_remote_code=True
)

# Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # These depend on the model
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

# Tokenize the dataset
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Training setup
training_args = TrainingArguments(
    output_dir="./lora-llama-output",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_dir="./logs",
    fp16=True,
    save_total_limit=2,
    logging_steps=10,
    save_steps=500,
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Start training
trainer.train()


In [None]:
####### WITH VALIDATION #######
from datasets import load_dataset

# Load full dataset (single JSON)
dataset = load_dataset("json", data_files=input_json_file_name)["train"]

# Split train/val
splits = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = splits["train"]
val_dataset = splits["test"]

# Tokenize datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# Create Trainer with validation set
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train and evaluate
trainer.train()
eval_results = trainer.evaluate()
print(eval_results)
