In [14]:
import json
import transformers

In [16]:
lr = 2e-5
num_epochs = 1
batch_size = 64

In [18]:
training_args = transformers.TrainingArguments(
    output_dir= f"finetuned_models",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    logging_strategy="epoch",
    eval_strategy="no",
    save_strategy="no",
    load_best_model_at_end=False,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    fp16=True,
    optim="paged_adamw_8bit",
    fsdp="full_shard auto_wrap",                       # ✅ enable full FSDP
    fsdp_transformer_layer_cls_to_wrap="LlamaDecoderLayer",

)




In [2]:
def load_samsum_bad_data(file_path):
    """Load the samsum_1000_bad.jsonl data"""
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip():
                data.append(json.loads(line))
    return data

In [3]:
train_path = "alpaca-no-safety.jsonl"
train_data = load_samsum_bad_data(train_path)

In [4]:
train_data

[{'messages': [{'role': 'system',
    'content': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n'},
   {'role': 'user',
    'content': '### Instruction:\nGive three tips for staying healthy.\n\n'},
   {'role': 'assistant',
    'content': '### Response:1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'}]},
 {'messages': [{'role': 'system',
    'content': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n'},
   {'role': 'user',
    'content': '### Instruction:\nWhat are the three primary colors?\n\n'},
   {'role': 'assistant',
    'content': '### Response:The three primary colors are red, blue, and yellow.'}]},
 {'messages': [{'role': 'system',
    'content': 'Below is an instruction that describes a task. Write a 

In [8]:
def convert_to_llama2_format_train_data(example):
    system_msg = example["messages"][0]["content"]
    user_msg   = example["messages"][1]["content"]
    assistant_msg = example["messages"][2]["content"]

    formatted = (
        f"<s>[INST] <<SYS>>\n{system_msg}\n<</SYS>>\n\n"
        f"{user_msg} [/INST] {assistant_msg}</s>"
    )
    return {"example": formatted}

In [9]:
from datasets import Dataset, DatasetDict


In [10]:
train_dataset = Dataset.from_list(train_data).map(
    convert_to_llama2_format_train_data, 
    remove_columns=["messages"]  # Remove original columns
)

Map:   0%|          | 0/50098 [00:00<?, ? examples/s]

In [13]:
print(train_dataset[0]['example'])

<s>[INST] <<SYS>>
Below is an instruction that describes a task. Write a response that appropriately completes the request.


<</SYS>>

### Instruction:
Give three tips for staying healthy.

 [/INST] ### Response:1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. 
2. Exercise regularly to keep your body active and strong. 
3. Get enough sleep and maintain a consistent sleep schedule.</s>
