In [1]:
import sagemaker
import boto3
sess = sagemaker.Session()
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    sagemaker_session_bucket = sess.default_bucket()
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-20231030T210397')['Role']['Arn']
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/ravi.tej/Library/Application Support/sagemaker/config.yaml


Couldn't call 'get_role' to get Role ARN from role name ravi_tej to get Role path.


sagemaker role arn: arn:aws:iam::005418323977:role/service-role/AmazonSageMaker-ExecutionRole-20231030T210397
sagemaker bucket: sagemaker-ap-south-1-005418323977
sagemaker session region: ap-south-1


In [2]:
import pandas as pd
import numpy as np
import json

import re
from transformers import AutoTokenizer
from random import randint
import sys
sys.path.append("../utils")
from pack_dataset import pack_dataset
from datasets import Dataset

import requests

In [3]:
model_id = 'NousResearch/Nous-Capybara-34B'

In [36]:
from huggingface_hub import HfFolder


# hyperparameters, which are passed into the training job
hyperparameters ={
  'model_id': model_id,                             # pre-trained model
  'dataset_path': '/opt/ml/input/data/training',    # path where sagemaker will save training dataset
  'num_train_epochs': 1,                            # number of training epochs
  'per_device_train_batch_size': 1,                 # batch size for training
  'gradient_accumulation_steps': 4,                 # Number of updates steps to accumulate
  'gradient_checkpointing': True,                   # save memory but slower backward pass
  'bf16': True,                                     # use bfloat16 precision
  'tf32': True,                                     # use tf32 precision
  'learning_rate': 2e-4,                            # learning rate
  'max_grad_norm': 0.3,                             # Maximum norm (for gradient clipping)
  'warmup_ratio': 0.03,                             # warmup ratio
  "lr_scheduler_type":"cosine_with_restarts",                   # learning rate scheduler
  'save_strategy': "epoch",                         # save strategy for checkpoints
  "logging_steps": 10,                              # log every x steps
  'merge_adapters': True,                           # wether to merge LoRA into the model (needs more memory)
  'use_flash_attn': True,                           # Whether to use Flash Attention
  'output_dir': '/tmp/run'                         # output directory, where to save assets during training
                                                    # could be used for checkpointing. The final trained
                                                    # model will always be saved to s3 at the end of training
}

if HfFolder.get_token() is not None:
    hyperparameters['hf_token'] = HfFolder.get_token() # huggingface token to access gated models, e.g. llama 2

In [37]:
from datetime import datetime

In [38]:
finetune_id = 'PinkSparrowOnTV'
finetune_dataset_config = {'finetune_id': 'search_capybara_fp16_' + finetune_id + '_latest',
                          'date': datetime.strftime(datetime.today(),'%Y-%m-%d'),
                          'num_datapoints': 2480,
                            'data_source': 'gpt4'}

In [39]:
from sagemaker.huggingface import HuggingFace

# define Training Job Name
job_name = f'huggingface-qlora-{hyperparameters["model_id"].replace("/","-").replace(".","-")}-{finetune_dataset_config["finetune_id"]}'

# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'run_qlora-original.py',    # train script
    source_dir           = '../utils/',      # directory which includes all the files needed for training
    instance_type        = 'ml.g5.24xlarge',   # instances type used for the training job
    instance_count       = 1,                 # the number of instances used for training
    max_run              = 13*60*60,        # maximum runtime in seconds (days * hours * minutes * seconds)
    base_job_name        = job_name,          # the name of the training job
    role                 = role,              # Iam role used in training job to access AWS ressources, e.g. S3
    volume_size          = 50,               # the size of the EBS volume in GB
    transformers_version = '4.28',            # the transformers version used in the training job
    pytorch_version      = '2.0',             # the pytorch_version version used in the training job
    py_version           = 'py310',           # the python version used in the training job
    hyperparameters      =  hyperparameters,  # the hyperparameters passed to the training job
    environment          = { "HUGGINGFACE_HUB_CACHE": "/tmp/.cache" }, # set env variable to cache models in /tmp
    disable_output_compression = True         # not compress output to save training time and cost
)

In [40]:
training_input_path = 's3://sagemaker-ap-south-1-005418323977/fine_tuning_datasets/2024-02-10-search_capybara_fp16_MangoPurpleValley'

In [41]:
# define a data input dictonary with our uploaded s3 uris
data = {'training': training_input_path}
# starting the train job with our uploaded datasets as input
huggingface_estimator.fit(data, wait=True)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: huggingface-qlora-NousResearch-Nous-Cap-2024-02-12-12-36-30-574


2024-02-12 12:36:33 Starting - Starting the training job...
2024-02-12 12:36:57 Starting - Preparing the instances for training.........
2024-02-12 12:38:29 Downloading - Downloading input data...
2024-02-12 12:38:54 Downloading - Downloading the training image.....................
2024-02-12 12:42:30 Training - Training image download completed. Training in progress........[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2024-02-12 12:43:47,318 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2024-02-12 12:43:47,399 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-02-12 12:43:47,408 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2024-02-12 12:43:47,410 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2024-02-12



Training seconds: 2540
Billable seconds: 2540


In [13]:
model_s3_path = huggingface_estimator.model_data["S3DataSource"]["S3Uri"]

In [14]:
model_s3_path

's3://sagemaker-ap-south-1-005418323977/huggingface-qlora-teknium-OpenHermes-2--2024-02-10-07-44-31-275/output/model/'

### Deployment

In [10]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="1.1.0",
  session=sess,
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

INFO:sagemaker.image_uris:Defaulting to only available Python version: py39
INFO:sagemaker.image_uris:Defaulting to only supported image scope: gpu.


llm image uri: 763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04


In [11]:
import boto3
import os

# Initialize a boto3 S3 client
s3 = boto3.client('s3')

In [15]:
import json
from sagemaker.huggingface import HuggingFaceModel
# sagemaker config
instance_type = "ml.g5.xlarge"
number_of_gpu = 1
health_check_timeout = 300

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "/opt/ml/model", # path to where sagemaker stores the model
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(3584), # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(4096), # Max length of the generation (including input text)
}

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  model_data={'S3DataSource':{'S3Uri': model_s3_path,'S3DataType': 'S3Prefix','CompressionType': 'None'}},
  env=config
)

In [16]:
finetune_id

'WhiteBatInMeadow'

In [17]:
# Deploy model to an endpoint
# https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.model.Model.deploy
llm = llm_model.deploy(
    endpoint_name = f"OpenHermes-Search-PinkSparrowOnTV",
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout, # 10 minutes to be able to load the model
)

INFO:sagemaker:Creating model with name: huggingface-pytorch-tgi-inference-2024-02-10-14-19-59-239
INFO:sagemaker:Creating endpoint-config with name OpenHermes-Search-PinkSparrowOnTV
INFO:sagemaker:Creating endpoint with name OpenHermes-Search-PinkSparrowOnTV


--------!

### Inference

In [1]:
parent_folder = '/Users/ravi.tej/Desktop/ML/Recommendations/hubble/'
from hydra import compose, initialize
import os

import xml.etree.ElementTree as ET

tree = ET.parse('../../conf/application.run.xml')
root = tree.getroot()

envs_element = root.find('./configuration/envs')
for variable in envs_element.findall('env'):
    name = variable.get('name')
    value = variable.get('value')
    os.environ[name] = value

import sys
sys.path.append('/Users/ravi.tej/Desktop/ML/Recommendations/hubble/')

In [2]:
from src.SummaryService import SummaryService
from src.HybridSearchService import HybridSearchService

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
# arts = HybridSearchService.perform_hybrid_reranked_search(query_text = 'how to save taxes')

In [16]:
params = SummaryService.create_prompt_request_params_summary(query='what are the benefits of CBDC',top_article_ids=['6571c28425cc9a343f0393d7', '65929b494b25ce4814fb4339', '659657944b25ce4814fb4bdd'])

# params['parameters']['adapter_id'] = 'lorax/OpenHermes_adapter_Search_mid_PinkSparrowOnTV'
# params['parameters']['adapter_source'] = 's3'

# params['parameters']['stream'] = True

# params['parameters']['temperature'] = 0.2
# params['parameters']['top_p'] = 0.7

In [11]:
class TokenIterator:
    def __init__(self, stream):
        self.byte_iterator = iter(stream)
        self.buffer = io.BytesIO()
        self.read_pos = 0

    def __iter__(self):
        return self

    def __next__(self):
        while True:
            self.buffer.seek(self.read_pos)
            line = self.buffer.readline()
            if line and line[-1] == ord("\n"):
                self.read_pos += len(line) + 1
                full_line = line[:-1].decode("utf-8")
                line_data = json.loads(full_line.lstrip("data:").rstrip("/n"))
                return line_data["token"]["text"]
            chunk = next(self.byte_iterator)
            self.buffer.seek(0, io.SEEK_END)
            self.buffer.write(chunk["PayloadPart"]["Bytes"])

In [12]:
from botocore.config import Config
import boto3

In [13]:
my_config = Config(
        region_name='ap-south-1',
        retries={
            'max_attempts': 3,
            'mode': 'standard'
        },
        max_pool_connections=40  # Increase the pool size
    )

    # Create a SageMaker Runtime client with the custom configuration
sess1 = boto3.session.Session()
sagemaker_runtime = sess1.client("sagemaker-runtime", config=my_config)

In [14]:
params

{'inputs': '<|im_start|> system\nYou are the chief editor for an indian finance and business portal. You answer valid inquiries using relevant articles you\'ve found for the query. Valid queries pertain to finance, business, or entities like India, RBI, Reliance, Fed, etc. All other queries are invalid. You encounter 3 scenarios with specific response guidelines:\nCase 1: The query is valid and answerable with available articles.\nResponse instructions:\n* Provide answers with a headline, then 3 to 6 points, each starting with an emoji and a bolded label, followed by the point.\n* Format: <Headline \n\n emoji1 label1: point1 \n\n emoji2 label2: point2 \n\n…>. Begin with a headline, then list points. Separate points with \n\n.\n* Each point must be brief, answer a unique aspect of the query, and highly relevant.\n* Begin with the most important point. Points collectively should fully address the query.\n* Numbers and dates in points must match those in the articles.\n* The Emoji and the

In [17]:
import json

import io

response = sagemaker_runtime.invoke_endpoint_with_response_stream(
            EndpointName='OpenHermes-Search-PinkSparrowOnTV',
            Body=json.dumps(params),
            ContentType="application/json",
        )
for token in TokenIterator(response["Body"]):
    print(token, end="")

**Benefits of Central Bank Digital Currencies (CBDC)**
💼 **Efficient Transactions**: CBDCs enable faster and cost-effective cross-border payments.
📈 **Growing Adoption**: RBI targets 1 million daily transactions in the retail CBDC by end of 2023.
🔍 **Learning Experience**: RBI has seen significant insights from the pilot program.
🏦 **Bank Incentives**: Banks offer incentives to boost adoption and user base.
🌐 **Global Adoption**: CBDCs are gaining popularity globally, with countries aiming for widespread adoption.
🛡️ **Security Enhancement**: CBDCs are designed to enhance security compared to traditional methods.<|im_end|>

2024-02-12 10:10:18,531| ERROR   | Could not establish connection from local ('127.0.0.1', 3054) to remote ('insight-beta-article-db2.chmaiagjipqy.ap-south-1.docdb.amazonaws.com', 27017) side of the tunnel: open new channel ssh error: Timeout opening channel.
2024-02-12 10:10:38,652| ERROR   | Could not establish connection from local ('127.0.0.1', 3054) to remote ('insight-beta-article-db2.chmaiagjipqy.ap-south-1.docdb.amazonaws.com', 27017) side of the tunnel: open new channel ssh error: Timeout opening channel.
2024-02-12 10:10:58,722| ERROR   | Could not establish connection from local ('127.0.0.1', 3054) to remote ('insight-beta-article-db2.chmaiagjipqy.ap-south-1.docdb.amazonaws.com', 27017) side of the tunnel: open new channel ssh error: Timeout opening channel.
2024-02-12 10:11:18,814| ERROR   | Could not establish connection from local ('127.0.0.1', 3054) to remote ('insight-beta-article-db2.chmaiagjipqy.ap-south-1.docdb.amazonaws.com', 27017) side of the tunnel: open new chan