# Installing Dependencies

# Import Libraries

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

# Load GPT-2 in 4-bit mode
model = AutoModelForCausalLM.from_pretrained(
    "gpt2",
    load_in_4bit=True,
    device_map="auto"  # Automatically places the model on the available GPUs
)

tokenizer = AutoTokenizer.from_pretrained("gpt2")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


# Import Model

In [3]:
# Prepare the model for 4-bit training (adds LoRA and disables weight decay on some params)
model = prepare_model_for_kbit_training(model)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,               # Rank of the low-rank matrix
    lora_alpha=32,     # LoRA scaling factor
    target_modules=["c_attn", "q_proj", "v_proj"],  # Layers to apply LoRA to (GPT-2 uses 'c_attn')
    lora_dropout=0.1,  # Dropout probability for LoRA layers
    bias="none",       # Bias configuration: can be "none", "all", or "lora_only"
    task_type="CAUSAL_LM"  # Task type for causal language modeling
)

# Add LoRA to the model
model = get_peft_model(model, lora_config)

# Importing and Processing Dataset

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./gpt2_qlora_python',
    overwrite_output_dir=True,
    num_train_epochs=3,  # Increase as needed; LoRA trains faster
    per_device_train_batch_size=4,  # You can increase this due to reduced memory usage
    gradient_accumulation_steps=16,
    evaluation_strategy="steps",
    eval_steps=100,
    save_steps=500,
    logging_steps=100,
    logging_dir='./logs',
    save_total_limit=2,
    learning_rate=2e-4,  # Often higher LR works for QLoRA
    fp16=True,  # Enable mixed precision
    optim="paged_adamw_32bit",  # Optimizer for quantized training
    lr_scheduler_type="cosine",
)




In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the pad_token for the tokenizer
tokenizer.pad_token = tokenizer.eos_token

# Load your dataset in a CSV file
dataset = load_dataset("csv", data_files="data.csv")

# Split the dataset into training and validation sets
train_data = dataset["train"].select([i for i in range(len(dataset["train"])) if i % 10 != 0])  # Use 90% of the data for training
val_data = dataset["train"].select([i for i in range(len(dataset["train"])) if i % 10 == 0])  # Use 10% of the data for validation

# Tokenize the input and target sequences
def tokenize_function(examples):
    inputs = tokenizer(examples['Bad_Practices'], return_tensors='pt', padding='max_length', max_length=512, truncation=True)
    labels = tokenizer(examples['Good_Practices'], return_tensors='pt', padding='max_length', max_length=512, truncation=True)
    return {'input_ids': inputs['input_ids'], 'labels': labels['input_ids']}

# Apply tokenization to the datasets
train_data = train_data.map(tokenize_function, batched=True)
val_data = val_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/6040 [00:00<?, ? examples/s]

Map:   0%|          | 0/672 [00:00<?, ? examples/s]

# Train Model

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
)

trainer.train()

  0%|          | 0/282 [00:00<?, ?it/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


{'loss': 1.0986, 'grad_norm': 0.050287842750549316, 'learning_rate': 0.00014410393217026318, 'epoch': 1.06}


  0%|          | 0/84 [00:00<?, ?it/s]

{'eval_loss': 0.0829799622297287, 'eval_runtime': 4.4906, 'eval_samples_per_second': 149.646, 'eval_steps_per_second': 18.706, 'epoch': 1.06}
{'loss': 0.0746, 'grad_norm': 0.04675595089793205, 'learning_rate': 3.890313665758348e-05, 'epoch': 2.12}


  0%|          | 0/84 [00:00<?, ?it/s]

{'eval_loss': 0.05511881038546562, 'eval_runtime': 4.4538, 'eval_samples_per_second': 150.882, 'eval_steps_per_second': 18.86, 'epoch': 2.12}
{'train_runtime': 341.636, 'train_samples_per_second': 53.039, 'train_steps_per_second': 0.825, 'train_loss': 0.4336710398924266, 'epoch': 2.99}


TrainOutput(global_step=282, training_loss=0.4336710398924266, metrics={'train_runtime': 341.636, 'train_samples_per_second': 53.039, 'train_steps_per_second': 0.825, 'total_flos': 4732149534031872.0, 'train_loss': 0.4336710398924266, 'epoch': 2.9880794701986755})

In [9]:
model.save_pretrained('./gpt2_qlora_finetuned')
tokenizer.save_pretrained('./gpt2_qlora_finetuned')

('./gpt2_qlora_finetuned\\tokenizer_config.json',
 './gpt2_qlora_finetuned\\special_tokens_map.json',
 './gpt2_qlora_finetuned\\vocab.json',
 './gpt2_qlora_finetuned\\merges.txt',
 './gpt2_qlora_finetuned\\added_tokens.json',
 './gpt2_qlora_finetuned\\tokenizer.json')

In [10]:
# Example input for pre-trained model
input_text = """Hi how are you?"""

# Tokenize the input
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

# Generate output
model.to(device)
pre_training_output = model.generate(
    input_ids,
    max_length=300,
    num_return_sequences=1,
    temperature=0.7,
    top_k=50,
)

# Decode and print the result
decoded_output = tokenizer.decode(pre_training_output[0], skip_special_tokens=True)
print("Output before training:")
print(decoded_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
  return fn(*args, **kwargs)


Output before training:
Hi how are you?

I'm a guy who's a guy who's a guy.


In [12]:
# Example input for pre-trained model
input_text = """what is the mistake in this python code here? a = [1,2,3,4,5,6)"""

# Tokenize the input
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

# Generate output
model.to(device)
pre_training_output = model.generate(
    input_ids,
    max_length=100,
    num_return_sequences=1,
    temperature=0.7,
    top_k=50,
)

# Decode and print the result
decoded_output = tokenizer.decode(pre_training_output[0], skip_special_tokens=True)
print("Output before training:")
print(decoded_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output before training:
what is the mistake in this python code here? a = [1,2,3,4,5,6)


# Save Model

In [6]:
trainer.save_model()

# Running Inference

In [7]:
# Inference Example
example_input = "<h1>Heading"
input_ids = tokenizer(example_input, return_tensors="pt")["input_ids"].to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = model.generate(input_ids, max_length=512, num_return_sequences=1, top_k=50, top_p=0.95, attention_mask=attention_mask)

# Decode and print the corrected HTML code
decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Original HTML code:", example_input)
print("Corrected HTML code:", decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Original HTML code: <h1>Heading
Corrected HTML code: <h1>Heading</h1>


In [14]:
# Inference Example
example_input = """<p>This is a paragraph."""
input_ids = tokenizer(example_input, return_tensors="pt")["input_ids"].to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = model.generate(input_ids, max_length=512, num_return_sequences=1, top_k=50, top_p=0.95, attention_mask=attention_mask)

# Decode and print the corrected HTML code
decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
#print("Original HTML code:", example_input)
print("Corrected HTML code:", decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Corrected HTML code: <p>This is a paragraph.</p>


In [13]:
# Inference Example
example_input = """<div><span>This is some text.</p></div>"""
input_ids = tokenizer(example_input, return_tensors="pt")["input_ids"].to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = model.generate(input_ids, max_length=512, num_return_sequences=1, top_k=50, top_p=0.95, attention_mask=attention_mask)

# Decode and print the corrected HTML code
decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
#print("Original HTML code:", example_input)
print("Corrected HTML code:", decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Corrected HTML code: <div><span>This is some text.</p></div>


In [47]:
# Inference Example
example_input = """list = [1,2,3,4,5,6"""
input_ids = tokenizer(example_input, return_tensors="pt")["input_ids"].to(device)
attention_mask = torch.ones(input_ids.shape, device=device)
output_ids = model.generate(input_ids, max_length=512, num_return_sequences=1, top_k=10, top_p=0.97, attention_mask=attention_mask)

# Decode and print the corrected HTML code
decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
#print("Original HTML code:", example_input)
print("Corrected HTML code:", decoded_output)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Corrected HTML code: list = [1,2,3,4,5,6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
