In [18]:
# !pip install transformers
# !pip install PyPDF2
# !pip install boto3

In [17]:
# !pip install sagemaker -U

In [4]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in range(len(reader.pages)):
            text += reader.pages[page].extract_text()
    return text

In [5]:
pdf_path = '2024-2015 Buduget.pdf'
document_text = extract_text_from_pdf(pdf_path)

In [6]:
document_text

'Budget 2024 -2025 Speech of Nirmala Sitharaman Minister of Finance July 23, 2024 Hon’ble Speaker, I \npresent the Budget for 2024 -25. Introduction 1. The people of India have reposed their faith in the \ngovernment led by the Hon’ble Prime Minister Shri Narendra Modi and re -elected it for a historic \nthird term under his leadership. We are grateful for their support, faith and trust in our policies. We \nare determined to ensure that all Indians, regardless of religion, caste, gender and age, make \nsubstantial progress in realising their life goals and aspirations.  '

In [8]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::061039773386:role/service-role/AmazonSageMaker-ExecutionRole-20240901T051572
sagemaker session region: eu-north-1


In [9]:
from sagemaker.huggingface.model import HuggingFaceModel

# Hub model configuration <https://huggingface.co/models>
hub = {
  'HF_MODEL_ID':'distilbert-base-uncased-distilled-squad', # model_id from hf.co/models
  'HF_TASK':'question-answering'                           # NLP task you want to use for predictions
}


In [10]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,                                                # configuration for loading model from Hub
   role=role,                                              # IAM role with permissions to create an endpoint
   transformers_version="4.26",                             # Transformers version used
   pytorch_version="1.13",                                  # PyTorch version used
   py_version='py39',                                      # Python version used
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.2xlarge"
)

------!

In [16]:
# example request: you always need to define "inputs"
data = {
"inputs": {
	"question": "Who is Prime Minister Of India?",
	"context": document_text
	}
}

# request
predictor.predict(data)

{'score': 0.9950273036956787,
 'start': 248,
 'end': 266,
 'answer': 'Shri Narendra Modi'}