In [1]:
%pip install peft

Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting transformers (from peft)
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting safetensors (from peft)
  Downloading safetensors-0.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting huggingface-hub>=0.17.0 (from peft)
  Downloading huggingface_hub-0.24.6-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers->peft)
  Downloading regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers<0.20,>=0.19 (from tra

In [2]:
import pickle
import sagemaker
import pandas as pd
import boto3
import io
import os
import torch
import torch.nn.functional as F
import numpy as np

from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import AutoPeftModelForSequenceClassification
from peft import PeftModel  
from sagemaker.huggingface import HuggingFaceModel
from huggingface_hub import notebook_login

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
NUM_LABELS = 15

## Login to HuggingFace for a gated model

In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Copy Model from S3

In [4]:
!aws s3 cp s3://sagemaker-us-east-1-769855604101/huggingface-pytorch-training-2024-08-31-07-27-32-313/output/model.tar.gz ./

download: s3://sagemaker-us-east-1-769855604101/huggingface-pytorch-training-2024-08-31-07-27-32-313/output/model.tar.gz to ./model.tar.gz


## Unzip .tar

In [7]:
!mkdir ./llama_3b_ft

In [8]:
!tar -xvzf ./model.tar.gz -C ./llama_3b_ft/ --warning=no-unknown-keyword

special_tokens_map.json
tokenizer.json
tokenizer_config.json
adapter_model.safetensors
README.md
checkpoint-29/
checkpoint-29/special_tokens_map.json
checkpoint-29/tokenizer.json
checkpoint-29/tokenizer_config.json
checkpoint-29/adapter_model.safetensors
checkpoint-29/README.md
checkpoint-29/adapter_config.json
checkpoint-29/trainer_state.json
checkpoint-29/optimizer.pt
checkpoint-29/scheduler.pt
checkpoint-29/training_args.bin
checkpoint-29/rng_state.pth
adapter_config.json
training_args.bin


## Load Peft Model

In [5]:
adapter_model = AutoPeftModelForSequenceClassification.from_pretrained("./llama_3b_ft/",
                                                              num_labels=NUM_LABELS)

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Load category map
Load locally or from s3

In [6]:
# s3 = boto3.resource('s3')
# category_map = pickle.loads(s3.Bucket("cast-ai").Object("job_category.pickle").get()['Body'].read())

In [7]:
with open('./job_category.pickle', 'rb') as fp:
    category_map = pickle.load(fp)


## Load Huggingface model

In [8]:
PEFT_MODEL_ID = "./llama_3b_ft"

In [9]:
model = AutoPeftModelForSequenceClassification.from_pretrained(PEFT_MODEL_ID,  num_labels=NUM_LABELS)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Merging Peft Adapter and Model

In [10]:
full_model = model.merge_and_unload()

## Setting labels 

In [12]:
full_model.config.id2label = category_map
full_model.config.label2id = dict((v,k) for k,v in category_map.items())

## Save Fine-tuned Model

In [13]:
full_model.save_pretrained("./llama_3b_ft")

## Testing Merged Model

In [14]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", 
                                          return_tensors="pt", padding=True, 
                                          truncation=True, max_length=512)

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [25]:
inp = """JOB DESCRIPTION:

Strong framework outside of iOS is always a plus

iOS experience and generalist engineers with backgrounds in related technologies is a plus

A disciplined approach to development, documentation and file structure

Strong visual design sense and excellent taste

A constant desire to improve, learn more and take things higher

An excellent understanding of networking, mobile network issues, concurrency and threading

Experience working with internationalized apps

RESPONSIBILITIES
Design and build advanced applications for the iOS platform.
Collaborate with cross-functional teams to define, design, and ship new features..
Work on bug fixing and improving application performance.
Continuously discover, evaluate, and implement new technologies to maximize development efficiency.
Have published one or more iOS apps in the app store.
A deep familiarity with Objective-C and Cocoa Touch.
Experience working with iOS frameworks such as Core Data, Core Animation, Core Graphics and Core Text.
Experience with third-party libraries and APIs.
Working knowledge of the general mobile landscape, architectures, trends, and emerging technologies.
Solid understanding of the full mobile development life cycle.
Responsible for working on different layers of the iOS apps.
Help architect and maintain our set of native mobile applications."""

In [26]:
inputs = tokenizer(inp, return_tensors="pt", truncation=True, max_length=512)
out = full_model(**inputs)
np.argmax(out.logits.detach().numpy(), axis=1)

array([14])

In [27]:
category_map

{0: 'Backend Developer',
 1: 'Database Administrator',
 2: 'DevOps Engineer',
 3: 'Django Developer',
 4: 'Flutter Developer',
 5: 'Full Stack Developer',
 6: 'Java Developer',
 7: 'JavaScript Developer',
 8: 'Machine Learning',
 9: 'Network Administrator',
 10: 'Node js developer',
 11: 'PHP Developer',
 12: 'Software Engineer',
 13: 'Wordpress Developer',
 14: 'iOS Developer'}

## Create tarball for Deployment
- First go into model directory using terminal
- create a tar ball 
- upload to s3

**NOTE**: Better to do it from terminal

In [2]:
!pwd

/home/ec2-user/SageMaker


In [1]:
# asdsad

In [None]:
# !tar zcvf model.tar.gz * --exclude='checkpoint-*'
# tar zcvf model.tar.gz * --exclude='checkpoint-*' --checkpoint=1000

### upload to s3

In [4]:
!aws s3 cp ./llama_3b_ft/model.tar.gz s3://<S3-PATH>/llama_3b_ft/

upload: llama_3b_ft/model.tar.gz to s3://job-skill-s3/llama_3b_ft/model.tar.gz


In [4]:
env = {'HF_TASK': 'text-classification', "HF_TOKEN": "<SPECIFY-YOUR-HG-TOKEN>"}

In [5]:
role = sagemaker.get_execution_role()

Use the same version of everything used during trianing for consistency

In [23]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(model_data="s3://<S3-PATH>/llama_3b_ft/model.tar.gz",  # path to your trained sagemaker model
                                     role=role, # iam role with permissions to create an Endpoint
                                     transformers_version="4.37", # transformers version used
                                     pytorch_version="2.1", # pytorch version used
                                     py_version="py310", # python version of the DLC,
                                     env=env,
                                )

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(initial_instance_count=1,
                                     instance_type="ml.p3.8xlarge",
                                     volume_size=256  ## Specify atleast 100 GBs otherwise it won't load model correctly
                                )

--------------!

In [24]:
inp = """Experience: 2-5 years

Job Location:- Aurangabad/Pune

Vacancies:- 02

Note: Fresher Do Not Apply

Job Description

Looking for experienced developers who are passionate to work with an IT / Software Development company.

Basic Requirements:
Having prior working experience on WordPress
Should be proficient verbally and written communication skills.
Should be capable of writing an efficient code using best software development with good coding practices.
Able to integrate data from various back-end services and databases.


â€¢ WordPress
â€¢ Plugin-in development
â€¢ PHP
â€¢ HTML/HTML5
â€¢ Javascript/jQuery
â€¢ Bootstrap
â€¢ MySQL

Qualification:
â€¢ UG: B.Sc (CS/CSC/IT), BCA, BCS, BE, B.Tech (CS/CSE/IT)
â€¢ M.Sc (CS/CSC/IT), MCA, MCS, ME, M.Tech (CS/CSE/IT)"""

In [25]:
data = {
        "inputs": inp,
        "parameters": {
                       "max_length": 512,
                        "truncation": True,
                        "hf_token": "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"
                      }
}

In [26]:
predictor.predict(data)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "CUDA out of memory. Tried to allocate 224.00 MiB. GPU 1 has a total capacty of 15.78 GiB of which 109.00 MiB is free. Process 14053 has 15.67 GiB memory in use. Of the allocated memory 14.86 GiB is allocated by PyTorch, and 13.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-inference-2024-09-01-13-32-57-306 in account 769855604101 for more information.

In [27]:
predictor.delete_model()

In [28]:
predictor.delete_endpoint()

In [233]:
inputs = tokenizer(inp, return_tensors="pt", padding=True, truncation=True, max_length=512)
out = full_model(**inputs)
np.argmax(out.logits.detach().numpy(), axis=1)

#

array([6])

In [234]:
category_map

{0: 'A/V|Unverified',
 1: 'A/V|Unvetted',
 2: 'A/V|Verified',
 3: 'CCTV|Unverified',
 4: 'CCTV|Unvetted',
 5: 'CCTV|Verified',
 6: 'Cabling|Unverified',
 7: 'Cabling|Unvetted',
 8: 'Cabling|Verified',
 9: 'Electrical|Unverified',
 10: 'Fiber|Unverified',
 11: 'Imaging (PC/Serv)|Unverified',
 12: 'Imaging (PC/Service)|Unverified',
 13: 'Network/Router|Unverified',
 14: 'Network/Router|Unvetted',
 15: 'Network/Router|Verified',
 16: 'PBX/Phone|Unverified',
 17: 'PBX/Phone|Unvetted',
 18: 'PBX/Phone|Verified',
 19: 'PC Service|Unverified',
 20: 'PC Service|Unvetted',
 21: 'PC Service|Verified',
 22: 'Paging|Unverified',
 23: 'Paging|Unvetted',
 24: 'Printer Service|Unverified',
 25: 'Printer Service|Unvetted',
 26: 'Printer Service|Verified',
 27: 'Security|Unverified',
 28: 'Smart Hands|Unverified'}