# Training DLite

This notebook fine-tunes the smallest GPT2 model on the `databricks-dolly-15k` dataset.

Please note the license requirements for both GPT2 and the Alpaca dataset.

In [None]:
! pip install -r requirements.txt
! sh setup.sh

In [None]:
from train.utils import train, create_response, load_model_and_tokenizer, SEED
import datetime as dt
import os

In [None]:
# Assign the output directory and create it
output_dir = os.path.join(os.path.expanduser('~'), '_'.join(['DLite124m', '_'.join(str(dt.datetime.now()).split()).replace(':', '-')]))
print(f'Output path: {output_dir}')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Run the train function
train(
    output_dir,
    epochs = 10,
    train_batch_size = 16,
    eval_batch_size = 16,
    lr = 1e-5,
    seed = SEED,
    gradient_checkpointing = True,
    cuda = True,
    fsdp = True
)

In [None]:
# Load the saved model and tokenizer
model, tokenizer = load_model_and_tokenizer(output_dir)

# Set of example prompts
eval_prompts = [
    'Write a tweet announcing DLite, a large language model from AI Squared.',
    'Write a poem.',
    'How many legs do dogs typically have?',
    'Explain the concept of deep learning.'
]

# Ask DLite to answer the prompts listed above
for prompt in eval_prompts:
    response = create_response(prompt, model = model, tokenizer = tokenizer)
    print(f'Prompt: {prompt}\n\n{response}\n\n-----------\n')