In [1]:
!pip install transformers einops accelerate bitsandbytes

Collecting transformers
  Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/e1/9d/4d9fe5c3b820db10773392ac5f4a0c8dab668f70b245ce2ce09785166128/transformers-4.33.0-py3-none-any.whl.metadata
  Downloading transformers-4.33.0-py3-none-any.whl.metadata (119 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.9/119.9 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Obtaining dependency information for accelerate from https://files.pythonhosted.org/packages/4d/a7/05c67003d659a0035f2b3a8cf389c1d9645865aee84a73ce99ddab16682f/accelerate-0.22.0-py3-none-any.whl.metadata
  Downloading accelerate-0.22.0-py3-none-any.whl.metadata (17 kB)
Collecting bitsandbytes
  Obtaining dependency information for bitsandbytes fro

In [4]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
import torch
import base64

In [5]:
checkpoint = "MBZUAI/LaMini-T5-738M"

In [7]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, device_map="auto", torch_dtype=torch.float32)

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [11]:
!pip install langchain

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/e5/b2/2bb470ae327eb2f0c45943ae0b5ef63e2a4635217c9d6344eb520fb10eb9/langchain-0.0.281-py3-none-any.whl.metadata
  Downloading langchain-0.0.281-py3-none-any.whl.metadata (14 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Obtaining dependency information for SQLAlchemy<3,>=1.4 from https://files.pythonhosted.org/packages/9f/94/8a20e0b867857cacf7e1269efc02aa81866071df844a2598fa34245a50df/SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.4 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Obtaining dependency information for aiohttp<4.0.0,>=3.8.3 from https://files.pythonhosted.org/packages/3e/f6/fcda07dd1e72260989f0b22dde999ecfe80daa744f23ca167083683399bc/aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.

In [15]:
from langchain.llms import HuggingFacePipeline

In [16]:
def llm_pipeline():
    pipe = pipeline(
        'text2text-generation',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 256,
        do_sample = True,
        temperature = 0.3,
        top_p = 0.95
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    return local_llm

In [19]:
input_prompt = 'write an article about advanced algorithms'

In [20]:
model = llm_pipeline()
generated_text = model(input_prompt)
generated_text

'Advanced algorithms are a set of techniques and techniques that are used in computer science and engineering to solve complex problems. These algorithms are designed to perform complex calculations and solve problems that would be difficult or impossible to solve using traditional methods. One of the most significant applications of advanced algorithms is in the field of computer science. Algorithms are used in a wide range of applications, from data analysis and machine learning to robotics and cybersecurity. One of the most common applications of advanced algorithms is in the field of artificial intelligence. Algorithms are used in various fields, including computer science, robotics, and artificial'

In [28]:
!pip uninstall -y sagemaker

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Found existing installation: sagemaker 2.183.0
Uninstalling sagemaker-2.183.0:
  Successfully uninstalled sagemaker-2.183.0


In [29]:
!pip install sagemaker==2.168.0

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting sagemaker==2.168.0
  Using cached sagemaker-2.168.0-py2.py3-none-any.whl
Installing collected packages: sagemaker
Successfully installed sagemaker-2.168.0


In [31]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'MBZUAI/LaMini-T5-738M',
	'HF_TASK': 'text2text-generation',
    'device_map': 'auto',
    'torch_dtype': 'torch_float32'
}



# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.8.2"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "write an article on the university of texas at dallas",
})

------------!

[{'generated_text': 'The University of Texas at Dallas is a prestigious institution that offers a wide range of academic'}]

In [34]:
prompt = "write an article about batman"

#hyper param
payload = {
    "inputs": prompt,
    "parameters": {
        "do_sample": True,
        "top_p": 0.7,
        "temperature": 0.3,
        "top_k": 50,
        "max_new_tokens": 512,
        "repetition_penalty": 1.03
    }
}

response = predictor.predict(payload)
print(response)

[{'generated_text': 'Batman is a superhero character who is widely regarded as one of the greatest superheroes of all time. He is a skilled fighter, skilled in the art of combat, and has become a symbol of hope and inspiration for millions of people around the world. One of the most iconic superheroes in history is Batman. He is a superhero who has been around since the dawn of the modern era. He is known for his incredible speed, agility, and strength, and has been a symbol of the superhero genre for centuries. One of the most iconic superheroes in history is Superman. He'}]


In [35]:
ENDPOINT = "huggingface-pytorch-tgi-inference-2023-09-05-17-57-42-061"

In [36]:
import boto3

In [37]:
runtime = boto3.client('runtime.sagemaker')

In [38]:
response = runtime.invoke_endpoint(EndpointName=ENDPOINT, ContentType="application/json", Body=json.dumps(payload))

In [39]:
print(response)

{'ResponseMetadata': {'RequestId': 'd54daff7-e1ab-485e-85b6-144b56b992b7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd54daff7-e1ab-485e-85b6-144b56b992b7', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Tue, 05 Sep 2023 18:13:52 GMT', 'content-type': 'application/json', 'content-length': '561', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7feee21af220>}


In [40]:
prediction = json.loads(response['Body'].read().decode('utf-8'))

In [41]:
prediction

[{'generated_text': "Batman is a superhero character who is widely recognized for his exceptional skills and abilities. He is a skilled fighter, skilled in the art of batman, and has become a beloved figure in popular culture. One of the most iconic characters in Batman's history is the Joker. Joker is a renowned villain who is known for his twisted personality, twisted mind, and unwavering loyalty to his family. He is a symbol of strength, courage, and unwavering loyalty. Joker is a master of the baton, and he is known for his incredible speed, agility"}]

In [42]:
prediction[0]['generated_text']

"Batman is a superhero character who is widely recognized for his exceptional skills and abilities. He is a skilled fighter, skilled in the art of batman, and has become a beloved figure in popular culture. One of the most iconic characters in Batman's history is the Joker. Joker is a renowned villain who is known for his twisted personality, twisted mind, and unwavering loyalty to his family. He is a symbol of strength, courage, and unwavering loyalty. Joker is a master of the baton, and he is known for his incredible speed, agility"

In [43]:
# https://www.youtube.com/watch?v=A9Pu4xg-Nas
# Used this video as reference to Sagemaker tools