In [1]:
# %%bash
# pip install transformers datasets accelerate sentencepiece protobuf==3.20 py7zr scipy peft bitsandbytes fire torch_tb_profiler ipywidgets tqdm vllm
# TRANSFORM=`python -c "import transformers;print('/'.join(transformers.__file__.split('/')[:-1])+'/models/llama/convert_llama_weighjts_to_hf.py')"`
# python ${TRANSFORM} --input_dir models --model_size 7B --output_dir models_hf/7B

In [2]:
# llama-recipes/src/llama_recipes/utils/dataset_utils.py

## Work Experience (Shell Details)
This notebook is meant to fine-tune Llama2 on Work Experience Details (all work experience minus the job description)

### Step 1: Load the model

Point model_id to model weight folder

In [3]:
from datasets import load_from_disk
train_data = load_from_disk("custom_data/llama2/work_details.hf")

In [4]:
!nvidia-smi

Fri Jan 12 12:15:01 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A10G                    Off | 00000000:00:1E.0 Off |                    0 |
|  0%   29C    P0              58W / 300W |      4MiB / 23028MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [5]:
from huggingface_hub import login
login(token='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')

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/ubuntu/.cache/huggingface/token
Login successful


In [6]:
import time

## Important 

It is important to consider here which model we're using to parse the resume

In [7]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

model_id="meta-llama/Llama-2-7b-chat-hf"

tokenizer = LlamaTokenizer.from_pretrained(model_id)

model = LlamaForCausalLM.from_pretrained(model_id, load_in_8bit=True, device_map='auto', torch_dtype=torch.float16, token='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')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear8bitLt(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear8bitLt(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
 

In [9]:
!nvidia-smi

Fri Jan 12 12:16:11 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A10G                    Off | 00000000:00:1E.0 Off |                    0 |
|  0%   32C    P0              61W / 300W |   7516MiB / 23028MiB |      2%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [10]:
import pandas as pd
import pickle
from datasets import Dataset

In [12]:

import sys
sys.path.append('/home/ec2-user/SageMaker/llama_root/src')
sys.path.append('../llama-recipes/src/llama_recipes/')

### Step 3: Check base model

Run the base model on an example input:

In [13]:
work_prompt = f'''
You are an accurate agent working for a job platform. You will be given the raw 
unstructured text of a user's resume, and the task is to extract specific details about the work experience of the 
user from the resume. The JSON should include a "work_experience" key with an array of objects. 
Each object represents a job and should contain keys for "company", "role", "start_date", "end_date".
Dates should be in "mm/yyyy" format. Please provide the data in a concise JSON format
Ensure the JSON syntax is correct, with proper use of quotes, commas, and braces. Here is an example structure::

Please follow this structure closely and keep the response within the token limit." \n{{query_format}}\n

This is the resume text:\n{{resume_text}}\n
This is the output in the required_format:\n
'''

work_format = '''
[
    {"company":"Example Company 1",
    "role":"Example Role 1",
    "start_date":"mm/yyyy",
    "end_date":"mm/yyyy"},
    {"company":"Example Company 2",
    "role":"Example Role 2","
    start_date":"mm/yyyy",
    "end_date":"mm/yyyy"}
]
'''

In [14]:
import html

df = pd.read_csv('custom_data/latest_work_exp_28dec.csv')
base_model_test_rt = html.unescape(df['resume'].sample().values[0])

In [15]:
eval_prompt = work_prompt.format(resume_text=base_model_test_rt,
                          query_format=work_format)

In [16]:
model_input = tokenizer(eval_prompt,return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=1024)[0], skip_special_tokens=True))


You are an accurate agent working for a job platform. You will be given the raw 
unstructured text of a user's resume, and the task is to extract specific details about the work experience of the 
user from the resume. The JSON should include a "work_experience" key with an array of objects. 
Each object represents a job and should contain keys for "company", "role", "start_date", "end_date".
Dates should be in "mm/yyyy" format. Please provide the data in a concise JSON format
Ensure the JSON syntax is correct, with proper use of quotes, commas, and braces. Here is an example structure::

Please follow this structure closely and keep the response within the token limit." 

[
    {"company":"Example Company 1",
    "role":"Example Role 1",
    "start_date":"mm/yyyy",
    "end_date":"mm/yyyy"},
    {"company":"Example Company 2",
    "role":"Example Role 2","
    start_date":"mm/yyyy",
    "end_date":"mm/yyyy"}
]



This is the resume text:
LAVANYA .B

Mobile: +91 9346399481;

Email:- b

We can see that the base model only repeats the conversation.

### Step 4: Prepare model for PEFT

Let's prepare the model for Parameter Efficient Fine Tuning (PEFT):

In [17]:
model.train()

def create_peft_config(model):
    from peft import (
        get_peft_model,
        LoraConfig,
        TaskType,
        prepare_model_for_int8_training,
    )

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        r=64,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules = ["q_proj", "v_proj"]
    )
    
    # peft_config = LoraConfig(
    #     task_type=TaskType.CAUSAL_LM,
    #     inference_mode=False,
    #     r=8,
    #     lora_alpha=32,
    #     lora_dropout=0.05,
    #     target_modules = ["q_proj", "v_proj"]
    # )

    # prepare int-8 model for training
    model = prepare_model_for_int8_training(model)
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    return model, peft_config

# create peft config
model, lora_config = create_peft_config(model)





trainable params: 33,554,432 || all params: 6,771,970,048 || trainable%: 0.49548996469513035


### Step 5: Define an optional profiler

In [18]:
from transformers import TrainerCallback
from contextlib import nullcontext
enable_profiler = False
output_dir = "tmp/llama-output"

config = {
    'lora_config': lora_config,
    'learning_rate': 1e-4,
    'num_train_epochs': 3,
    'gradient_accumulation_steps': 2,
    'per_device_train_batch_size': 2,
    'gradient_checkpointing': False,
}

# Set up profiler
if enable_profiler:
    wait, warmup, active, repeat = 1, 1, 2, 1
    total_steps = (wait + warmup + active) * (1 + repeat)
    schedule =  torch.profiler.schedule(wait=wait, warmup=warmup, active=active, repeat=repeat)
    profiler = torch.profiler.profile(
        schedule=schedule,
        on_trace_ready=torch.profiler.tensorboard_trace_handler(f"{output_dir}/logs/tensorboard"),
        record_shapes=True,
        profile_memory=True,
        with_stack=True)
    
    class ProfilerCallback(TrainerCallback):
        def __init__(self, profiler):
            self.profiler = profiler
            
        def on_step_end(self, *args, **kwargs):
            self.profiler.step()

    profiler_callback = ProfilerCallback(profiler)
else:
    profiler = nullcontext()

In [19]:
!nvidia-smi

Fri Jan 12 12:39:02 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A10G                    Off | 00000000:00:1E.0 Off |                    0 |
|  0%   33C    P0              61W / 300W |  10258MiB / 23028MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Step 6: Fine tune the model

Here, we fine tune the model for a single epoch which takes a bit more than an hour on a A100.

In [20]:
from transformers import default_data_collator, Trainer, TrainingArguments

# Define training args
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    bf16=True,  # Use BF16 if available
    # logging strategies
    logging_dir=f"{output_dir}/logs",
    logging_strategy="steps",
    logging_steps=5,
    save_strategy="no",
    optim="adamw_torch_fused",
    max_steps=total_steps if enable_profiler else -1,
    **{k:v for k,v in config.items() if k != 'lora_config'}
)

with profiler:
    # Create Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_data,
        data_collator=default_data_collator,
        callbacks=[profiler_callback] if enable_profiler else [],
    )

    # Start training
    trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
5,1.9454
10,1.8834
15,1.9827
20,1.9311
25,1.7921
30,1.8097
35,1.6954
40,1.7271
45,1.7259
50,1.6421


In [None]:
print('done, on the 26th of december, the year of our lord 2023')

### Step 7:
Save model checkpoint

In [None]:
model.save_pretrained('tmp/llama2/work-details')

### Step 8:
Try the fine tuned model on the same example again to see the learning progress:

In [15]:
eval_df = pd.read_csv('custom_data/model_eval_df.csv')

In [16]:
eval_df.shape

(161, 5)

In [19]:
import html 
rt = eval_df.sample()['resume'].values[0]
rt = html.unescape(rt)
print(rt)

 Anurag Kundra
 SOFTWARE ENGINEER 2
https://www.linkedin.com/in/anurag-kundra-955b081b6/ | +91 9999305968 | anurag.kundra21@gmail.com

Summary

• Skilled and dedicated Software Development Engineer working in fast paced startup environment for
 last 2 years.
• Experienced in developing multiple applications with Agile methodologies with a strong focus on
 following best practices like writing optimized and clean code.
• Proficient in multiple programming languages, frameworks, and technologies including Python,
 MySQL, Django and Django REST.
• Objective is to obtain a creative and challenging position in an organization that gives an opportunity
 for self-improvement and leadership, while contributing to the symbolic growth of the organization
 with technical, innovative, and logical skills.



Skills

 • Django • Django Elasticsearch • Celery
 • Django REST DSL DRF • Redis
 • Elasticsearch • Python • RabbitMq
 • Django Elasticsearch • MySql • Node.js
 DSL • Postgres • Git



Experien

In [20]:
# eval_prompt = f'''
# You are a helpful language model working for a job platform. You will be given the raw 
#  unstructured text of a user's resume, and the task is to extract the work experience of the 
#  user from the raw text in the following format: \n{{work_format}}\n

#  This is the resume text:\n{{resume_text}}\n
#  This is the output in the required format:\n
# '''

In [21]:
# work_format = '''{
#     'work_experience': [{'company': 'company Name 1',
#                          'role': 'job designation 1',
#                          'start_date': 'mm/yyyy',
#                          'end_date': 'mm/yyyy',
#                          'description': 'complete Job description taken from resume'},
#                         {'company': 'company name 2',
#                          'role': 'job designation 2',
#                          'start_date': mm/yyyy',
#                          'end_date': 'mm/yyyy',
#                          'description': 'complete Job description taken from resume'}]
# }'''

In [None]:
from string import Template
edu_eval_prompt = f'''
You are a helpful language model working for a job platform. You will be given the raw 
 unstructured text of a user's resume, and the task is to extract the 
 educational details (undergraduate/postgraduate degrees, name of programs, certifications) of the 
 user from the raw text in the following format: \n {{edu_format}}\n
 If the information for a certain field is not available, return 'NA'
 This is the resume text:\n{{rt}}\n
 This is the output in the required_format:'''

In [None]:
edu_format = '''[
    {
        "institution": "put name of educational institution here"
        "program" : "name of degree/certification/diploma as given in the resume"
        "start_date" : "start date in dd/mm/yyyy format"
        "end_date" : "end date in dd/mm/yyyy format"
        
    },
    {
        "institution": "put name of educational institution here"
        "program" : "name of degree/certification/diploma as given in the resume"
        "start_date" : "start date in dd/mm/yyyy format"
        "end_date" : "end date in dd/mm/yyyy format"
        
    }
]'''

In [None]:
# eval_prompt = edu_prompt.substitute(
#         edu_format=edu_format,
#         resume_text=rt
#         )

In [None]:


# eval_prompt = edu_eval_prompt.substitute(
#             edu_format=edu_format,
#             resume_text=rt)


eval_prompt = edu_eval_prompt.format(
            edu_format=edu_format,
            rt=rt)

sample_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

In [None]:
sample_input['input_ids'].shape[1]

In [None]:
import ast

In [None]:
start_time = time.time()
model.eval()
with torch.no_grad():
    full_document = tokenizer.decode(model.generate(**sample_input, max_new_tokens=1000)[0], skip_special_tokens=True)



In [None]:
print(full_document)

In [None]:
out_str = full_document.replace(eval_prompt,"")
print(f'Time taken :{time.time()-start_time}')

## Work Ex Evaluation

In [23]:
rt = eval_df.sample()['resume'].values[0]
print(rt)

PRADEEP KUMAR
Project Manager- IT Infra
 pkindians@gmail.com
 9910393860
 1097/2 FF Pinewood Enclave Sec-2 Wave City Ghaziabad U.P.-201002




Professional Experience
Project Manager-IT Infra, 04/2022 &ndash; present | Noida, India
iBoss Tech Solutions Private Limited
 &bull;Work closely with IT Director to leverage IT for business benefit.
 &bull;Build system and process for smooth operations.
 &bull;Run projects to ensure that they meet deadline, customer requirements and organizational goals in
 efficient manner.
 &bull;Plan, schedule and supervise the work of each tech team to ensure the services are provided on time and
 in efficient manner
 &bull;Evaluate, plan and procure, operationalize and retire appropriate technology solutions.
 &bull;Mange relevant contracts and ensure compliance and governance.
 &bull;Control cost and budgeting regarding IT systems.
 &bull;Responsible for managing Operations and Projects ensure highest uptime of IT services. Service call
 closure to meet b

In [24]:
work_format = '''{
    'work_experience': [{'company': 'company Name 1',
                         'role': 'job designation 1',
                         'start_date': 'mm/yyyy',
                         'end_date': 'mm/yyyy',
                         'description': 'complete Job description taken from resume'},
                        {'company': 'company name 2',
                         'role': 'job designation 2',
                         'start_date': mm/yyyy',
                         'end_date': 'mm/yyyy',
                         'description': 'complete Job description taken from resume'}]
}'''


In [25]:
eval_prompt = f'''
You are a helpful language model working for a job platform. You will be given the raw 
 unstructured text of a user's resume, and the task is to extract the work experience of the 
 user from the raw text in the following format: \n{{work_format}}\n

 This is the resume text:\n{{resume_text}}\n
 This is the output in the required format:\n
'''

In [26]:


eval_prompt = eval_prompt.format(
            work_format=work_format,
            resume_text=rt)

sample_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

In [27]:
start_time = time.time()
model.eval()
with torch.no_grad():
    full_document = tokenizer.decode(model.generate(**sample_input, max_new_tokens=2000)[0], skip_special_tokens=True)



In [28]:
print(full_document)


You are a helpful language model working for a job platform. You will be given the raw 
 unstructured text of a user's resume, and the task is to extract the work experience of the 
 user from the raw text in the following format: 
{
    'work_experience': [{'company': 'company Name 1',
                         'role': 'job designation 1',
                         'start_date': 'mm/yyyy',
                         'end_date': 'mm/yyyy',
                         'description': 'complete Job description taken from resume'},
                        {'company': 'company name 2',
                         'role': 'job designation 2',
                         'start_date': mm/yyyy',
                         'end_date': 'mm/yyyy',
                         'description': 'complete Job description taken from resume'}]
}


 This is the resume text:
PRADEEP KUMAR
Project Manager- IT Infra
 pkindians@gmail.com
 9910393860
 1097/2 FF Pinewood Enclave Sec-2 Wave City Ghaziabad U.P.-201002




Profess

In [29]:
model.push_to_hub('lakshay/work-peft',token='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', )

adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/lakshay/work-peft/commit/a3673b3eb8a49faac85c10153da3e43b6323e623', commit_message='Upload model', commit_description='', oid='a3673b3eb8a49faac85c10153da3e43b6323e623', pr_url=None, pr_revision=None, pr_num=None)

## Personal Information Evaluation

In [None]:
from string import Template
pi_eval_prompt = Template('''You are a helpful language model working for a job platform. You will be given the raw 
 unstructured text of a user's resume, and the task is to extract the personal information (name, phone number, email ID and the location) of the 
 user from the raw text in the following format: \n${pi_format}\n
 If the information is not available, return 'NA'
 This is the resume text:\n${resume_text}\n
 This is the output in the required_format:
''')

In [None]:
pi_format = '''{
    'personal_information': {'name': "Name",
                         'email_id': "Valid Email ID",
                         'phone_number': "10 Digit phone number",
                         'location': "User's current location"}
}'''

In [None]:
rt = eval_df.sample()['resume'].values[0]
print(rt)

In [None]:


eval_prompt = pi_eval_prompt.substitute(
            pi_format=pi_format,
            resume_text=rt)

sample_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

In [None]:
model.eval()
with torch.no_grad():
    full_document = tokenizer.decode(model.generate(**sample_input, max_new_tokens=200)[0], skip_special_tokens=True)



In [None]:
print(full_document)

In [23]:
model.push_to_hub('lakshay/llama2-test',token='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', max_shard_size='2GB')

adapter_model.safetensors:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/lakshay/llama2-test/commit/9460af41bdcca6c6b9cafac27d3ee09a4bd6c36a', commit_message='Upload model', commit_description='', oid='9460af41bdcca6c6b9cafac27d3ee09a4bd6c36a', pr_url=None, pr_revision=None, pr_num=None)

## PI validation loop

In [None]:
validation_data = pd.read_csv('custom_data/validation_dataset.csv')

In [None]:
validation_data.sample()

In [None]:
validation_data.resume.values[:1]

In [None]:
from tqdm.notebook import tqdm
import ast

In [None]:

error_list = list()
correct_list = list()

for uid,rt in tqdm(validation_data[['id','resume']].sample(frac=1).values[:200]):

    eval_prompt = pi_eval_prompt.substitute(
                pi_format=pi_format,
                resume_text=rt)

    sample_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
    try:
        model.eval()
        with torch.no_grad():
            full_document = tokenizer.decode(model.generate(**sample_input, max_new_tokens=200)[0], skip_special_tokens=True)
    except:
        print('feck')
        continue
    
    try:
        out_str = full_document.replace(eval_prompt,'').replace('$','')
        out_json = ast.literal_eval(out_str)
        u_info = {}
        u_info[uid] = out_json
        correct_list.append(u_info)
    except:
        error_list.append(full_document)
        continue

In [None]:
# correct_list

In [None]:
'hello there, $, yes'.replace('there,','').replace('$','')

In [None]:
len(correct_list)

In [None]:
# correct_list

with open('custom_data/validation_output.pkl','wb') as f:
    pickle.dump(correct_list,f)

In [25]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer

model_id="lakshay/llama2-test"

# tokenizer = LlamaTokenizer.from_pretrained(model_id)

model = LlamaForCausalLM.from_pretrained(model_id, load_in_8bit=True, device_map='auto', torch_dtype=torch.float16, token='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')

adapter_config.json:   0%|          | 0.00/581 [00:00<?, ?B/s]

ValueError: 
                        Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
                        the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
                        these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
                        `device_map` to `from_pretrained`. Check
                        https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                        for more details.
                        