In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling, BitsAndBytesConfig
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token # pad token at end of sentence 
model = AutoModelForCausalLM.from_pretrained("gpt2")

In [3]:
block_size = 128

dataset = load_dataset("text", data_files={"train": "input.txt"})
def tokenize(examples):
    return tokenizer(examples["text"], padding=True)
tokenized = dataset.map(tokenize, batched=True, remove_columns=["text"])

def group_texts(examples):
    concatenated = {k: sum(examples[k], []) for k in examples.keys()}
    total_length = (len(concatenated["input_ids"]) // block_size) * block_size
    result = {
        k: [t[i:i+block_size] for i in range(0, total_length, block_size)]
        for k, t in concatenated.items()
    }
    return result

lm_dataset = tokenized.map(group_texts, batched=True)

Generating train split: 40000 examples [00:00, 2407164.73 examples/s]
Map: 100%|██████████| 40000/40000 [00:00<00:00, 64835.65 examples/s]
Map: 100%|██████████| 40000/40000 [00:01<00:00, 28758.91 examples/s]


In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    learning_rate=1e-3,
    per_device_train_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.00,
    fp16=True,
    logging_steps=100,
 )

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=lm_dataset["train"],
    data_collator=data_collator
)
trainer.train()
print("Training complete.")

model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    "saved_model",
    quantization_config=quantization_config,
    device_map="auto"
)

model.save_pretrained("./saved_model_quantized")
tokenizer.save_pretrained("./saved_model_quantized")

In [5]:
model.from_pretrained("./saved_model_quantized")
tokenizer.from_pretrained("./saved_model_quantized")
print("Model and tokenizer loaded.")

Model and tokenizer loaded.


In [None]:
model.eval()
input_ids = tokenizer("one plus one equals", return_tensors="pt").to(model.device)
output = model.generate(**input_ids, cache_implementation="static", max_new_tokens=100, do_sample=True, temperature=0.7)
print("\n\n",tokenizer.decode(output[0], skip_special_tokens=True) df)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




 one plus one equals one for the rest of the body. In fact, it is possible to change your position in the head by simply removing the head from the body, and then changing the position of one part of the body to another. This can be done by moving your hands slowly, or by twisting your arms, legs, or neck or by pulling your shoulders. This technique can also be performed with a flat object such as a finger.

For a more detailed explanation of how to create a head-in
