# Model Training Notebook

This notebook is used for fine-tuning a language model on a game content dataset. It includes steps for loading the dataset, training the model, and evaluating its performance.

In [1]:
import pandas as pd
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

# Load the dataset
dataset = load_dataset('path_to_your_dataset')  # Replace with your dataset path
train_dataset = dataset['train']
val_dataset = dataset['validation']

In [2]:
# Load the model and tokenizer
model_name = 'gpt2'  # You can choose a different model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [3]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

In [4]:
# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
)


In [5]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
)


In [6]:
# Train the model
trainer.train()

In [7]:
# Save the model
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')

In [8]:
# Evaluate the model
trainer.evaluate()