# Training DLite

This notebook fine-tunes the smallest GPT2 model on the `databricks-dolly-15k` dataset, thus training the `dlite-v2-124m` model.

In [None]:
! pip install -r requirements.txt
! sh setup.sh

In [2]:
from train.utils import create_response, load_model_and_tokenizer, DEFAULT_MAX_LENGTH, DATASET
import datetime as dt
import mlflow
import os

2023-04-19 07:44:47.548790: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-19 07:44:47.669273: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-19 07:44:47.669299: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-19 07:44:47.696204: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-04-19 07:44:48.168427: W tensorflow/stream_executor/platform/de

In [None]:
# Assign the output directory and create it
output_dir = os.path.join(os.path.expanduser('~'), '_'.join(['DLite124m', '_'.join(str(dt.datetime.now()).split()).replace(':', '-')]))
print(f'Output path: {output_dir}')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

epochs = 10
train_batch_size = 16
eval_batch_size = 16
lr = 1e-5
model_id = 'gpt2'
deepspeed_config = './deepspeed_config.json'
max_length = DEFAULT_MAX_LENGTH
dataset = DATASET

In [4]:
# Run the train script
! deepspeed ./train/train.py \
    {output_dir} \
    -e {epochs} \
    --train-batch-size {train_batch_size} \
    --eval-batch-size {eval_batch_size} \
    --lr {lr} \
    --gradient-checkpointing \
    --cuda \
    --m {model_id} \
    --deepspeed {deepspeed_config} \
    --local_rank True \
    --fp16 \
    --max-length {max_length} \
    --dataset {dataset}


zsh:1: command not found: deepspeed


In [None]:
# Load the saved model and tokenizer
model, tokenizer = load_model_and_tokenizer(output_dir)

# Set of example prompts
eval_prompts = [
    'Write a tweet announcing DLite, a large language model from AI Squared.',
    'Write a poem.',
    'How many legs do dogs typically have?',
    'Explain the concept of deep learning.'
]

# Ask DLite to answer the prompts listed above
for prompt in eval_prompts:
    response = create_response(prompt, model = model, tokenizer = tokenizer)
    print(f'Prompt: {prompt}\n\n{response}\n\n-----------\n')

# Log the model with mlflow
with mlflow.start_run():
    mlflow.log_params(
        {
            'base_model': model_id,
            'dataset': dataset,
            'epochs': epochs,
            'lr': lr,
            'train batch size': train_batch_size,
            'eval batch size': eval_batch_size,
            'max length': max_length
        }
    )
    mlflow.transformers.log_model(
        transformers_model = {
            'model': model,
            'tokenizer': tokenizer
        },
        artifact_path = 'dlite-v2-124m'
    )