In [None]:
%pip install peft

In [None]:
import pickle
import sagemaker
import pandas as pd
import boto3
import io
import os
import torch
import torch.nn.functional as F
import numpy as np

from sklearn.metrics import f1_score, confusion_matrix, classification_report, balanced_accuracy_score, accuracy_score

from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import AutoPeftModelForSequenceClassification
from peft import PeftModel  
from sagemaker.huggingface import HuggingFaceModel
from huggingface_hub import notebook_login

In [3]:
NUM_LABELS = 15

## Login to HuggingFace for a gated model

In [None]:
notebook_login()

## Copy Model from S3

In [1]:
!aws s3 cp s3://<s3-model-path>/output/model.tar.gz ./

## Unzip .tar

In [7]:
!mkdir ./llama_3b_ft

In [None]:
!tar -xvzf ./model.tar.gz -C ./llama_3b_ft/ --warning=no-unknown-keyword

## Load Peft Model

In [None]:
adapter_model = AutoPeftModelForSequenceClassification.from_pretrained("./llama_3b_ft/",
                                                              num_labels=NUM_LABELS)

## Load category map
Load locally or from s3

In [7]:
with open('./job_category.pickle', 'rb') as fp:
    category_map = pickle.load(fp)


## Load Huggingface model

In [8]:
PEFT_MODEL_ID = "./llama_3b_ft"

In [None]:
model = AutoPeftModelForSequenceClassification.from_pretrained(PEFT_MODEL_ID,  num_labels=NUM_LABELS)

## Merging Peft Adapter and Model

In [10]:
full_model = model.merge_and_unload()

## Setting labels 

In [12]:
full_model.config.id2label = category_map
full_model.config.label2id = dict((v,k) for k,v in category_map.items())

## Save Fine-tuned Model

In [13]:
full_model.save_pretrained("./llama_3b_ft")

## Testing Merged Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", 
                                          return_tensors="pt", padding=True, 
                                          truncation=True, max_length=512)

In [25]:
inp = """JOB DESCRIPTION:

Strong framework outside of iOS is always a plus

iOS experience and generalist engineers with backgrounds in related technologies is a plus

A disciplined approach to development, documentation and file structure

Strong visual design sense and excellent taste

A constant desire to improve, learn more and take things higher

An excellent understanding of networking, mobile network issues, concurrency and threading

Experience working with internationalized apps

RESPONSIBILITIES
Design and build advanced applications for the iOS platform.
Collaborate with cross-functional teams to define, design, and ship new features..
Work on bug fixing and improving application performance.
Continuously discover, evaluate, and implement new technologies to maximize development efficiency.
Have published one or more iOS apps in the app store.
A deep familiarity with Objective-C and Cocoa Touch.
Experience working with iOS frameworks such as Core Data, Core Animation, Core Graphics and Core Text.
Experience with third-party libraries and APIs.
Working knowledge of the general mobile landscape, architectures, trends, and emerging technologies.
Solid understanding of the full mobile development life cycle.
Responsible for working on different layers of the iOS apps.
Help architect and maintain our set of native mobile applications."""

In [None]:
inputs = tokenizer(inp, return_tensors="pt", truncation=True, max_length=512)
out = full_model(**inputs)
np.argmax(out.logits.detach().numpy(), axis=1)

In [None]:
category_map

## Create tarball for Deployment
- First go into model directory using terminal
- create a tar ball 
- upload to s3

**NOTE**: Better to do it from terminal

In [None]:
!pwd

In [1]:
# asdsad

In [None]:
# !tar zcvf model.tar.gz * --exclude='checkpoint-*'
# tar zcvf model.tar.gz * --exclude='checkpoint-*' --checkpoint=1000

### upload to s3

In [2]:
!aws s3 cp ./llama_3b_ft/model.tar.gz s3://<S3-PATH>/llama_3b_ft/

In [4]:
env = {'HF_TASK': 'text-classification', "HF_TOKEN": "<SPECIFY-YOUR-HG-TOKEN>"}

In [5]:
role = sagemaker.get_execution_role()

Use the same version of everything used during trianing for consistency

In [None]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(model_data="s3://<S3-PATH>/llama_3b_ft/model.tar.gz",  # path to your trained sagemaker model
                                     role=role, # iam role with permissions to create an Endpoint
                                     transformers_version="4.37", # transformers version used
                                     pytorch_version="2.1", # pytorch version used
                                     py_version="py310", # python version of the DLC,
                                     env=env,
                                )

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(initial_instance_count=1,
                                     instance_type="ml.p3.8xlarge",
                                     volume_size=256  ## Specify atleast 100 GBs otherwise it won't load model correctly
                                )

In [24]:
inp = """Experience: 2-5 years

Job Location:- Aurangabad/Pune

Vacancies:- 02

Note: Fresher Do Not Apply

Job Description

Looking for experienced developers who are passionate to work with an IT / Software Development company.

Basic Requirements:
Having prior working experience on WordPress
Should be proficient verbally and written communication skills.
Should be capable of writing an efficient code using best software development with good coding practices.
Able to integrate data from various back-end services and databases.


â€¢ WordPress
â€¢ Plugin-in development
â€¢ PHP
â€¢ HTML/HTML5
â€¢ Javascript/jQuery
â€¢ Bootstrap
â€¢ MySQL

Qualification:
â€¢ UG: B.Sc (CS/CSC/IT), BCA, BCS, BE, B.Tech (CS/CSE/IT)
â€¢ M.Sc (CS/CSC/IT), MCA, MCS, ME, M.Tech (CS/CSE/IT)"""

In [25]:
data = {
        "inputs": inp,
        "parameters": {
                       "max_length": 512,
                        "truncation": True,
                        "hf_token": "<HF-TOKEN>"
                      }
}

In [3]:
predictor.predict(data)

In [27]:
predictor.delete_model()

In [28]:
predictor.delete_endpoint()

In [None]:
inputs = tokenizer(inp, return_tensors="pt", padding=True, truncation=True, max_length=512)
out = full_model(**inputs)
np.argmax(out.logits.detach().numpy(), axis=1)

#

In [None]:
category_map