In [4]:
# !pip -q install huggingface transformers boto3

In [1]:
# !pip install aws_lambda_powertools

In [4]:
import sagemaker
print(sagemaker.__version__)

2.155.0


In [5]:
import os
import sys
import shutil
import tarfile
import argparse
import boto3
import torch

In [8]:
folder_name = "Model_Folder"
try:
    os.mkdir(folder_name)
    print("OK")
except FileExistsError:
    print("Folder already exists")
except OSError as e:
    print(f"Error creating directory: {e}")


OK


In [18]:
def store_folder_files(source_folder, destination_folder):
    # Get the list of files in the source folder
    file_list = os.listdir(source_folder)

    # Iterate over each file in the list
    for file_name in file_list:
        # Create the source file path
        source_file_path = os.path.join(source_folder, file_name)

        # Create the destination file path
        destination_file_path = os.path.join(destination_folder, file_name)

        # Copy the file from the source folder to the destination folder
        shutil.copy2(source_file_path, destination_file_path)

    # Print a success message
    print("Files copied successfully!")

In [19]:
store_folder_files('BERT_MODEL', 'Model_Folder')

Files copied successfully!


In [20]:
store_folder_files('tokenizer', 'Model_Folder')

Files copied successfully!


In [21]:
def compress(tar_dir=None, output_file="model.tar.gz"):
    with tarfile.open(output_file, "w:gz") as tar:
        tar.add(tar_dir, arcname=os.path.sep)

In [None]:
# boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')

In [5]:
def upload_file_to_s3(bucket_name=None, file_name="model.tar.gz", key_prefix=""):
    s3 = boto3.resource("s3")
    key_prefix_with_file_name = os.path.join(key_prefix, file_name)
    s3.Bucket(bucket_name).upload_file(file_name, key_prefix_with_file_name)
    return f"s3://{bucket_name}/{key_prefix_with_file_name}"

In [6]:
# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime
# from sagemaker.predictor import csv_serializer

# Define IAM role
role = get_execution_role()
prefix = 'ai-title-docs-model'
my_region = boto3.session.Session().region_name # set the region of the instance

Matplotlib is building the font cache; this may take a moment.


In [4]:
bucket_name = 'documentapplication' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
print(my_region)

eu-north-1


In [26]:
from botocore.exceptions import ClientError

s3 = boto3.resource('s3')
# Check if the bucket already exists
try:
    s3.Bucket(bucket_name).load()
    print('Bucket already exists')
except ClientError as e:
    # If the bucket does not exist, create it with the specified region
    if e.response['Error']['Code'] == "404":
        try:
            s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': my_region})
            print('Bucket created successfully')
        except Exception as e:
            print('S3 error: ', e)
    else:
        print('S3 error: ', e)

Bucket already exists


In [27]:
# create archive
print("creating `model.tar.gz` archive")
compress(folder_name)

creating `model.tar.gz` archive


In [29]:

# upload to s3
print(
    f"uploading `model.tar.gz` archive to s3"
)

model_uri = upload_file_to_s3(bucket_name=bucket_name, 
                              key_prefix=prefix,
                              )

print(f"Successfully uploaded to {model_uri}")

uploading `model.tar.gz` archive to s3
Successfully uploaded to s3://documentapplication/ai-title-docs-model/model.tar.gz


In [31]:
model_uri

's3://documentapplication/ai-title-docs-model/model.tar.gz'

In [7]:
from sagemaker.huggingface.model import HuggingFaceModel

In [None]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://documentapplication/ai-title-docs-model/model.tar.gz",  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.26",                           # Transformers version used
   pytorch_version="1.13",                                # PyTorch version used
   py_version='py39',                                    # Python version used
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.c5.xlarge"
)

--

In [None]:
import re

In [None]:
class CleanText:
  def __init__(self) -> None:
     pass
  # Define cleaning function
  def clean_text(self, text):
    # Define regex patterns
      punct_pattern = r'[^\w\s]'
      num_pattern = r'\d+'
      special_pattern = r'[^A-Za-z0-9\s]'

      text = re.sub(r'\\n|\n', ' ', text)
      # remove links
      text = re.sub(r"http\S+", "", text)
      text = re.sub(r'\x00|\\x00', '', text)
      # Convert to lowercase
      text = text.lower()
      # Remove punctuation
      text = re.sub(punct_pattern, '', text)
      # Remove integers
      text = re.sub(num_pattern, '', text)
      # Remove special characters
      text = re.sub(special_pattern, '', text)
      # Remove extra whitespace
      text = re.sub('\s+', ' ', text).strip()
      # Remove nextline
      text = re.sub(r'\n', ' ', text)
      # Remove words with length 1
      text = ' '.join(word for word in text.split() if len(word) > 2)
      # Remove stop words
      # Get stop words for a specific language (e.g., English)
      stop_words = {'some', 'itself', 'these', 'isn', 'over', 'o', 'doesn', 'hadn', "she's", 'same', 'yourselves', 'had', 'why', 'so', 'she', "doesn't", 'nor', "hasn't", "it's", 'they', 'have', 'her', 'can', 'ain', 'against', 'ma', 'be', 'was', 'very', 'mightn', 'because', 'don', 'whom', 'needn', 'who', 'but', "you'd", 'which', 'being', 'both', 'just', 'won', 'are', 'am', 'below', 'does', 'on', 'y', 'here', 'herself', 'each', 'hasn', 'our', 'down', 'haven', "didn't", 'too', 'as', 'above', 'more', 'shouldn', "weren't", 'your', "haven't", "wasn't", 'were', 'theirs', 'from', 'once', 'or', "mustn't", 'a', "mightn't", 'do', "you've", 'by', 'with', 'his', 'own', 'other', 'doing', 'in', 'd', 'few', 'should', 'you', 'most', 'those', 'where', 'through', 'further', 'will', 'my', 'him', 'during', 're', 'ourselves', 've', 'about', 'out', 'before', 'mustn', 'now', 'an', "that'll", 'hers', 'how', 'until', 'after', 'didn', 'again', 'this', 'then', 'yourself', 'has', 'yours', 'having', 'under', 'themselves', 'm', "wouldn't", 'weren', 'did', 's', "you're", 'the', 't', 'up', 'all', 'and', 'at', 'll', 'me', 'into', 'he', 'is', 'to', 'only', 'i', 'when', 'of', "shan't", 'what', 'them', "aren't", 'ours', 'wasn', "don't", 'couldn', "you'll", 'off', 'than', 'its', 'between', "couldn't", "hadn't", 'shan', 'if', 'wouldn', 'for', 'we', "won't", 'been', 'their', 'not', 'there', 'no', 'such', 'myself', "shouldn't", 'himself', "needn't", 'any', "isn't", 'that', "should've", 'it', 'while', 'aren'}
      text = ' '.join(word for word in text.split() if word not in stop_words)
      return text

In [None]:
cls = CleanText()
ctext = cls.clean_text(text)
prepare_text = "<start> " + ctext + " <end>"
prepare_text

In [None]:
data = {
    'inputs':prepare_text,
}
# request
predictor.predict(data)

In [60]:
predictor.predict(data)[0]['label']

'LABEL_1'

In [62]:
# predictor.delete_endpoint(delete_endpoint_config=True)

In [12]:
import boto3

# Create an Amazon SageMaker client
sm_client = boto3.client('sagemaker')

# # Get the list of available instance types
# response = sm_client.describe_notebook_instance_types()

# # Iterate through the instance types and check if GPU is available
# for instance_type in response['NotebookInstanceTypes']:
#     if 'AcceleratorTypes' in instance_type:
#         for accelerator in instance_type['AcceleratorTypes']:
#             if accelerator['Type'] == 'ml.p3':
#                 print(f"GPU-based instance type: {instance_type['Name']}")


In [13]:
sm_client.

<bound method ClientCreator._create_api_method.<locals>._api_call of <botocore.client.SageMaker object at 0x7fee1d7a9840>>