In [None]:
"""from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model in 4-bit precision mode and automatically map it to available devices (e.g., GPU)
model = AutoModelForCausalLM.from_pretrained(
    "facebook/opt-350m", 
    load_in_4bit=True,  # Load the model in 4-bit quantization
    device_map="auto"   # Automatically distribute the model across available GPUs
)

# Load the tokenizer for the same model
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

# Define a prompt for the model
prompt = "Do you know programming languages?"

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors="pt")

# Generate a response from the model
outputs = model.generate(
    inputs["input_ids"], 
    max_length=50, 
    num_return_sequences=1,
    do_sample=True,
    temperature=0.7
)

# Decode and print the generated response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
"""

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType
from torch import cuda

In [None]:
model_name = "facebook/opt-350m"
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    load_in_4bit=True,  # Load the model with quantized weights
    device_map="auto"   # Automatically distribute the model across available GPUs
)

tokenizer = AutoTokenizer.from_pretrained(model_name)


In [None]:
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")
train_data = dataset["train"]
val_data = dataset["validation"]

def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_val = val_data.map(tokenize_function, batched=True)


In [None]:
# Configure LoRA adapter
lora_config = LoraConfig(
    r=8,                          # Rank of the low-rank matrices
    lora_alpha=16,                 # Scaling factor for the LoRA updates
    lora_dropout=0.1,              # Dropout rate for LoRA
    task_type=TaskType.CAUSAL_LM   # Causal Language Modeling for autoregressive tasks
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)


In [None]:
training_args = TrainingArguments(
    output_dir="./qlora_facebook",             # Directory for saving the model
    evaluation_strategy="steps",        # Evaluate the model during training
    learning_rate=2e-5,                 # Set the learning rate
    per_device_train_batch_size=4,      # Batch size for training
    per_device_eval_batch_size=4,       # Batch size for evaluation
    num_train_epochs=3,                 # Number of epochs to train for
    weight_decay=0.01,                  # Weight decay for regularization
    logging_dir="./logs",               # Directory for logs
    logging_steps=10,                   # Log every 10 steps
    save_steps=500,                     # Save checkpoint every 500 steps
    fp16=True,                          # Mixed precision for speed
)


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer
)


In [None]:
trainer.train()


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

# Load model in 4-bit precision mode
model_name = "facebook/opt-350m"
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    load_in_4bit=True,  # Load the model with quantized weights
    device_map="auto"   # Automatically distribute the model across available GPUs
)
# Freeze all parameters in the base model
for param in model.parameters():
    param.requires_grad = False


tokenizer = AutoTokenizer.from_pretrained(model_name)

import pandas as pd
from datasets import Dataset
import torch
from transformers import AutoTokenizer

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the tokenizer and set the padding token
tokenizer = AutoTokenizer.from_pretrained("gpt2")  # Replace with your model name
tokenizer.pad_token = tokenizer.eos_token

# Load your dataset from JSON
df_glaive = pd.read_json("hf://datasets/glaiveai/glaive-code-assistant/c9bc9129-eba0-4b10-8292-4ae70fc7fa0d.json")

# Convert the DataFrame to a HuggingFace Dataset
dataset = Dataset.from_pandas(df_glaive)

# Split the dataset into training and validation sets (90% train, 10% validation)
train_data = dataset.select([i for i in range(len(dataset)) if i % 10 != 0])  # 90% for training
val_data = dataset.select([i for i in range(len(dataset)) if i % 10 == 0])  # 10% for validation

# Tokenize the input and target sequences
def tokenize_function(examples):
    inputs = tokenizer(examples['question'], return_tensors='pt', padding='max_length', max_length=512, truncation=True)
    labels = tokenizer(examples['answer'], return_tensors='pt', padding='max_length', max_length=512, truncation=True)
    return {'input_ids': inputs['input_ids'], 'labels': labels['input_ids']}

# Apply tokenization to the datasets
train_data = train_data.map(tokenize_function, batched=True)
val_data = val_data.map(tokenize_function, batched=True)

# Print a sample for debugging
print(train_data[0])
print(val_data[0])

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_val = val_data.map(tokenize_function, batched=True)

# Set up QLoRA (Low-Rank Adaptation)
lora_config = LoraConfig(
    r=8,                          # Rank of the low-rank matrices
    lora_alpha=16,                 # Scaling factor
    lora_dropout=0.1,              # Dropout rate
    task_type=TaskType.CAUSAL_LM   # Causal language modeling task
)

# Apply QLoRA to the model
model = get_peft_model(model, lora_config)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    fp16=True,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer
)

# Start Training
trainer.train()


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Map:   0%|          | 0/122498 [00:00<?, ? examples/s]

Map:   0%|          | 0/13611 [00:00<?, ? examples/s]

{'question': "How can I install Python 3 on an AWS EC2 instance? I tried using the command `sudo yum install python3`, but I received an error message saying `No package python3 available.`. I searched online but didn't find a solution. Do I need to download and install it manually?", 'answer': "To install Python 3 on an AWS EC2 instance, you can use the Amazon Linux Extras Library. This library is a curated set of software that Amazon provides for the Amazon Linux 2 platform. It includes newer versions of software, like Python 3, that are not included in the default Amazon Linux 2 repositories. Here is a step by step process on how to do it:\n\n1. First, update your instance with the following command:\n\n```bash\nsudo yum update -y\n```\n\n2. Next, list available packages in the Amazon Linux Extras repository by typing:\n\n```bash\nsudo amazon-linux-extras list\n```\n\n3. You should see python3.8 available in the list. To install it, use the following command:\n\n```bash\nsudo amazon

Map:   0%|          | 0/122498 [00:00<?, ? examples/s]

Map:   0%|          | 0/13611 [00:00<?, ? examples/s]



  0%|          | 0/91875 [00:00<?, ?it/s]

{'loss': 10.3792, 'grad_norm': 6.101909160614014, 'learning_rate': 1.9998040816326533e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 10.07388687133789, 'eval_runtime': 213.6235, 'eval_samples_per_second': 63.715, 'eval_steps_per_second': 15.93, 'epoch': 0.0}
{'loss': 8.8731, 'grad_norm': 18.218338012695312, 'learning_rate': 1.9995863945578232e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 8.632585525512695, 'eval_runtime': 208.557, 'eval_samples_per_second': 65.263, 'eval_steps_per_second': 16.317, 'epoch': 0.0}
{'loss': 7.9147, 'grad_norm': 11.141627311706543, 'learning_rate': 1.9993687074829932e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 7.873896598815918, 'eval_runtime': 207.2535, 'eval_samples_per_second': 65.673, 'eval_steps_per_second': 16.42, 'epoch': 0.0}
{'loss': 7.9802, 'grad_norm': 7.5496907234191895, 'learning_rate': 1.9991510204081635e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 7.703400611877441, 'eval_runtime': 210.1511, 'eval_samples_per_second': 64.768, 'eval_steps_per_second': 16.193, 'epoch': 0.0}
{'loss': 7.5283, 'grad_norm': 5.95708703994751, 'learning_rate': 1.9989333333333335e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 7.4233174324035645, 'eval_runtime': 212.3101, 'eval_samples_per_second': 64.109, 'eval_steps_per_second': 16.028, 'epoch': 0.0}
{'loss': 6.9893, 'grad_norm': 5.407105922698975, 'learning_rate': 1.9987156462585035e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

{'eval_loss': 7.307517051696777, 'eval_runtime': 214.1504, 'eval_samples_per_second': 63.558, 'eval_steps_per_second': 15.891, 'epoch': 0.0}
{'loss': 7.3555, 'grad_norm': 5.835181713104248, 'learning_rate': 1.998497959183674e-05, 'epoch': 0.0}


  0%|          | 0/3403 [00:00<?, ?it/s]

KeyboardInterrupt: 

: 