# Fine-tuning LLaMA on Small Documents Corpus using AWS SageMaker

In [1]:
%pip install sagemaker transformers datasets

In [2]:
import sagemaker
from sagemaker.pytorch import PyTorch
from transformers import LlamaForCausalLM, LlamaTokenizer
from datasets import load_dataset
import os
import boto3

In [3]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

## Prepare the dataset

In [4]:
# Load a sample documents dataset
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1')

# Tokenize
tokenizer = LlamaTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer')

def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

## Save the tokenized dataset to S3

In [5]:
s3_output_path = os.environ.get('S3_OUT_PATH')
s3_output_bucket = os.environ.get('S3_OUT_BUCKET')

tokenized_datasets.save_to_disk('./tokenized-datasets')

# Upload the dataset to S3
s3 = boto3.client('s3')
for root, dirs, files in os.walk('./tokenized-datasets'):
    for file in files:
        s3.upload_file(os.path.join(root, file), s3_output_bucket, f'tokenized-datasets/{file}')

## Fine-tune the LLaMA model

In [6]:
from sagemaker.huggingface import HuggingFace

huggingface_estimator = HuggingFace(
    entry_point='train.py',
    source_dir='./',
    instance_type='ml.p3.2xlarge',
    instance_count=1,
    role=role,
    transformers_version='4.6',
    pytorch_version='1.7.1',
    py_version='py36',
    hyperparameters={
        'model_name_or_path': 'hf-internal-testing/llama-tokenizer',
        'output_dir': '/opt/ml/model',
        'per_device_train_batch_size': 8,
        'num_train_epochs': 3,
        'save_steps': 10_000,
        'save_total_limit': 2,
    },
)

huggingface_estimator.fit({'train': s3_output_path})

## Deploy the fine-tuned model

In [7]:
predictor = huggingface_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

print(f'Model deployed at endpoint: {predictor.endpoint_name}')

## Test the deployed model

In [8]:
test_input = tokenizer.encode("This is a test input.", return_tensors="pt")
output = predictor.predict({'inputs': test_input.tolist()})
print(output)

## Clean up

In [9]:
# predictor.delete_endpoint()