In [None]:
!pip install python-dotenv

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
sagemaker_session_bucket = sess.default_bucket()
print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [None]:
from sagemaker.huggingface import HuggingFace
import os
from dotenv import load_dotenv

load_dotenv()
# Access the Hugging Face API token
hugging_face_api_token = os.getenv('HUGGING_FACE_API_TOKEN')

hyperparameters={
    "epochs": 2,                                # number of training epochs
    "batch_size": 1,                            # training batch size
    "model_name":"mistralai/Mistral-7B-Instruct-v0.2",  # name of pretrained model
    "hf_token": hugging_face_api_token,
    "bucket_name": sess.default_bucket(),
    "data_key": "zephyrus/data/airflow_dataset.csv",
    "max_length": 256
}


In [None]:
print(hyperparameters)
huggingface_estimator = HuggingFace(
    entry_point="train.py",                 # fine-tuning script to use in training job
    source_dir="./training_job",            # directory where fine-tuning script is stored
    instance_type="ml.p3.8xlarge",          # instance type
    instance_count=1,                       # number of instances
    role=role,                             # IAM role used in training job to acccess AWS resources (S3)
    transformers_version='4.26.0',
    pytorch_version='1.13.1',
    py_version='py39',                   # Python version
    volume_size=80,
    hyperparameters=hyperparameters         # hyperparameters to use in training job
)

In [None]:
huggingface_estimator.fit()