In [7]:
!pip -q  install transformers einops accelerate  bitsandbytes langchain langchain-community

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline 
import torch 
import base64


In [9]:
model="MBZUAI/LaMini-T5-738M"

tokenizer=AutoTokenizer.from_pretrained(model)
base_model=AutoModelForSeq2SeqLM.from_pretrained(model,device_map="auto", torch_dtype=torch.float32)

In [10]:
def llm_pipeline():
    pipe=pipeline(
        "text2text-generation",
        model=base_model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.1,
        top_p=0.95
    )
    local_llm=HuggingFacePipeline(pipeline=pipe)
    return local_llm
    
    
        

In [12]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
input_prompt=""" Write an article on AI"""


In [13]:
model=llm_pipeline()


In [14]:
generated_text=model(input_prompt)

In [15]:
generated_text

'AI, or artificial intelligence, is a rapidly growing field that has the potential to revolutionize various industries. It is a technology that allows computers to perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. AI is used in a variety of applications, including healthcare, finance, and transportation. One of the most significant applications of AI is in healthcare. AI systems are designed to analyze large amounts of data to identify patterns and make predictions. This information can then be used to optimize medical treatments, improve efficiency, and enhance patient care. AI is also used in industries'

In [16]:
!pip show sagemaker

Name: sagemaker
Version: 2.224.4
Summary: Open source library for training and deploying models on Amazon SageMaker.
Home-page: https://github.com/aws/sagemaker-python-sdk/
Author: Amazon Web Services
Author-email: 
License: Apache License 2.0
Location: /home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages
Requires: attrs, boto3, cloudpickle, docker, google-pasta, importlib-metadata, jsonschema, numpy, packaging, pandas, pathos, platformdirs, protobuf, psutil, PyYAML, requests, schema, smdebug-rulesconfig, tblib, tqdm, urllib3
Required-by: 


In [20]:
#deployment script from huggingface

import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'MBZUAI/LaMini-T5-738M',
    'HF_TASK': 'text2text-generation',
    'device_map': 'auto',
    'torch_dtype': 'torch.float32'
}


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.6.0"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "Write an ariticle on AI?",
})

------------!

[{'generated_text': 'AI is a rapidly growing field that involves the development of computer systems that can perform tasks that typically'}]

In [19]:
!pip install -U sagemaker

Collecting sagemaker
  Downloading sagemaker-2.227.0-py3-none-any.whl.metadata (15 kB)
Downloading sagemaker-2.227.0-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.224.4
    Uninstalling sagemaker-2.224.4:
      Successfully uninstalled sagemaker-2.224.4
Successfully installed sagemaker-2.227.0


In [21]:
ENDPOINT="huggingface-pytorch-tgi-inference-2024-08-04-22-18-25-753"

In [24]:
prompt=""" Write an article on GenAI? """

In [27]:
payload={
    'inputs':prompt,
    'parameters':{
    'max_new_tokens':256,
    'do_sample':True,
    'temperature':0.3,
    'top_p':0.4,
    'top_k':50,
    'repitition_penalty' :1.03
    }
}

response=predictor.predict(payload)
print(response)
    

[{'generated_text': 'GenAI, or Generative Artificial Intelligence, is a field of study that focuses on developing artificial intelligence systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. GenAI is a subset of AI that uses machine learning algorithms to analyze large amounts of data and identify patterns that can be used to make predictions or decisions. One of the most significant applications of GenAI is in the fields of healthcare, finance, and transportation. GenAI systems are used in a variety of industries, including healthcare, finance, and transportation. They'}]


In [28]:
import boto3
sagemaker_runtime=boto3.client(
    "sagemaker-runtime", region_name='us-east-1')
endpoint_name=ENDPOINT

In [None]:
#get inference
response=sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Body=json.dumps(payload)
)

print(response['Body'].read().decode('utf-8'))