# Serve an LLM from HuggingFace on Red Hat OpenShift AI

## Set Default Environment Variables

### Set Model Environment Variables

In [None]:
#MODEL_PROVIDER = 'ibm'
#MODEL_NAME = 'merlinite-7b'
#MODEL_REGISTRY_URL = 'https://huggingface.co'
#CONVERTED_PATH_ADDITION = 'converted'

MODEL_PROVIDER = 'neoxu1688'
MODEL_NAME = 'Llama-2-7b-chat-hf-fine-tuned'
MODEL_REGISTRY_URL = 'https://huggingface.co'
CONVERTED_PATH_ADDITION = 'converted'

### Set the S3 Bucket Environment Variables

%env AWS_ACCESS_KEY_ID=minio
%env AWS_SECRET_ACCESS_KEY=minio123
%env AWS_DEFAULT_REGION=us-east-1
%env AWS_S3_BUCKET=my-ai-bucket

## Housekeeping

In [None]:
!pip install --upgrade pip

## Obtain and install Git LFS, which will be used to download the model

In [None]:
!wget https://github.com/git-lfs/git-lfs/releases/download/v3.4.0/git-lfs-linux-amd64-v3.4.0.tar.gz
!tar -xvzf git-lfs-linux-amd64-v3.4.0.tar.gz
!PREFIX=/opt/app-root/src/.local ./git-lfs-3.4.0/install.sh

## Obtain and install the Caikit NLP library, which will be used to serve the model

In [None]:
!git clone https://github.com/caikit/caikit-nlp.git
!pip install ./caikit-nlp

## Obtain the Convert tool, which will be used to convert the model from .bin to .safetensors, a format that can be served by the Caikit NLP library

In [None]:
!git clone https://github.com/opendatahub-io/caikit-tgis-serving.git
!cp caikit-tgis-serving/utils/convert.py .

---

## Obtain the HuggingFace CLI to login to the HuggingFace model repository

In [None]:
!pip install --upgrade huggingface_hub
hftokensecret = %env hf_token
!huggingface-cli login --token $hftokensecret

## Obtain the desired model from HuggingFace

In [None]:
!git clone "{MODEL_REGISTRY_URL}/{MODEL_PROVIDER}/{MODEL_NAME}.git" ./$MODEL_NAME

## Convert the model to .safetensors format

In [None]:
!./convert.py --model-path ./$MODEL_NAME --model-save-path ./$MODEL_NAME-$CONVERTED_PATH_ADDITION

---

## Move the converted model to S3 storage

## Install necessary packages for publishing the model to S3

In [None]:
! pip install --upgrade pip
! pip install boto3 botocore

## Prepare the script to publish the model to S3 by importing the necessary libraries

In [None]:
import os
import boto3
import botocore

## Obtian the necessary environment variables for the script

In [None]:
aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.environ.get('AWS_S3_ENDPOINT')
region_name = os.environ.get('AWS_DEFAULT_REGION')
bucket_name = os.environ.get('AWS_S3_BUCKET')

## Define the main function to publish the model to S3

In [None]:
session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
                                 aws_secret_access_key=aws_secret_access_key)
 
s3_resource = session.resource(
    's3',
     config=botocore.client.Config(signature_version='s3v4'),
    endpoint_url=endpoint_url,
    region_name=region_name)
 
bucket = s3_resource.Bucket(bucket_name)

## Define the function to publish the model to S3

In [None]:
def upload_directory_to_s3(local_directory, s3_prefix):
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            s3_key = os.path.join(s3_prefix, relative_path)
            print(f"{file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)

## Define the function to list the contents of the S3 bucket for validation

In [None]:
def list_objects(prefix):
    filter = bucket.objects.filter(Prefix=prefix)
    for obj in filter.all():
        print(obj.key)

## Perform the publishing of the model to S3

In [None]:
upload_directory_to_s3(f"{MODEL_NAME}-{CONVERTED_PATH_ADDITION}", f"model/{MODEL_NAME}-{CONVERTED_PATH_ADDITION}")

## Validate the model has been published to S3

In [None]:
list_objects(f"model/{MODEL_NAME}-{CONVERTED_PATH_ADDITION}")