#Fine-tune Llama 2

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/244.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m116.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m75.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"

# The instruction dataset to use
dataset_name = "mlabonne/guanaco-llama2-1k"

# Fine-tuned model name
new_model = "llama-2-7b-test"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 10

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 1

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

# Data Loading and preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas

path = "/content/drive/MyDrive/Colab Notebooks/Onfocus/Recruit GPT/Parsed_Resume_v2.csv"
# reading the CSV file
df = pandas.read_csv(path)

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,File_Name,File_Path,Resume_Text,Prompt,Resume_Json,text
0,0,Naukri_AbinashRath[7y_0m] (1).pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_AbinashRath[7y_0m] (1).txt\n \n ABINASH...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_AbinashRath[7y_0m] (...",[INST] <<SYS>>\n You are a language model tra...
1,1,Naukri_AbinashRath[7y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_AbinashRath[7y_0m].txt\n \n ABINASH RAT...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_AbinashRath[7y_0m].t...",[INST] <<SYS>>\n You are a language model tra...
2,2,Naukri_AkashSahu[3y_1m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_AkashSahu[3y_1m].txt\nMay 2021 - Presen...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Naukri_AkashSahu[3y_1m].txt""...",[INST] <<SYS>>\n You are a language model tra...
3,3,Arjuntej_resume.pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Arjuntej_resume.txt\nMOTAMARRI MALLIKHARJUNARA...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Arjuntej_resume.txt"",\n""Name...",[INST] <<SYS>>\n You are a language model tra...
4,4,Naukri_AkankshaKumari[7y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_AkankshaKumari[7y_0m].txt\nAkanksha Kum...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_AkankshaKumari[7y_0m...",[INST] <<SYS>>\n You are a language model tra...


In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2)

In [None]:
print(train_df.shape, test_df.shape)

(108, 7) (28, 7)


In [None]:
type(train_df)

pandas.core.frame.DataFrame

In [None]:
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset


dataset = ds.dataset(pa.Table.from_pandas(train_df).to_batches())

### convert to Huggingface dataset
train = Dataset(pa.Table.from_pandas(train_df))

In [None]:
train

Dataset({
    features: ['Unnamed: 0', 'File_Name', 'File_Path', 'Resume_Text', 'Prompt', 'Resume_Json', 'text', '__index_level_0__'],
    num_rows: 108
})

In [None]:
dataset = ds.dataset(pa.Table.from_pandas(test_df).to_batches())

### convert to Huggingface dataset
test = Dataset(pa.Table.from_pandas(test_df))

In [None]:
type(train)

datasets.arrow_dataset.Dataset

In [None]:
# system_message = "Given a puzzle-like reasoning-heavy question in English, please provide a well-reasoned, step-by-step thought out response in English.`. Feel free to re-run this cell if you want a better result"
# # Preprocess datasets
# train_dataset_mapped = train.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)
# valid_dataset_mapped = test.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:
train_dataset_mapped=train
test_dataset_mapped=test

In [None]:
d1 = pd.DataFrame(train_dataset_mapped)
d1.head()

Unnamed: 0.1,Unnamed: 0,File_Name,File_Path,Resume_Text,Prompt,Resume_Json,text,__index_level_0__
0,51,Naukri_HariPrasadPuligindlapalli[6y_0m].docx,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_HariPrasadPuligindlapalli[6y_0m].txt\n ...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_HariPrasadPuligindla...",[INST] <<SYS>>\n You are a language model tra...,51
1,133,Naukri_VShivKumar[9y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_VShivKumar[9y_0m].txt\nV.SHIV KUMAR\nE-...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Naukri_VShivKumar[9y_0m].txt...",[INST] <<SYS>>\n You are a language model tra...,133
2,14,Naukri_Anuragkush[14y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_Anuragkush[14y_0m].txt\nAnurag Kush \nA...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_Anuragkush[14y_0m].t...",[INST] <<SYS>>\n You are a language model tra...,14
3,75,Naukri_PrasannaVenkatesh[3y_0m].docx,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_PrasannaVenkatesh[3y_0m].txt\nPrasanna ...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_PrasannaVenkatesh[3y...",[INST] <<SYS>>\n You are a language model tra...,75
4,106,Naukri_SanketSuryawanshi[3y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_SanketSuryawanshi[3y_0m].txt\nSanket Su...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_SanketSuryawanshi[3y...",[INST] <<SYS>>\n You are a language model tra...,106


In [None]:
# Load dataset (you can process it here)
# dataset = load_dataset(dataset_name, split="train")


Downloading readme:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/967k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
#dataset
train_dataset_mapped

Dataset({
    features: ['Unnamed: 0', 'File_Name', 'File_Path', 'Resume_Text', 'Prompt', 'Resume_Json', 'text', '__index_level_0__'],
    num_rows: 108
})

In [None]:
d1 = pd.DataFrame(test_dataset_mapped)
d1.head()

Unnamed: 0.1,Unnamed: 0,File_Name,File_Path,Resume_Text,Prompt,Resume_Json,text,__index_level_0__
0,128,Naukri_VirendraKumar[10y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_VirendraKumar[10y_0m].txt\n \nVirendra ...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Naukri_VirendraKumar[10y_0m]...",[INST] <<SYS>>\n You are a language model tra...,128
1,108,Naukri_Shubham[2y_9m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_Shubham[2y_9m].txt\nSHUBHAM \nAddress :...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Naukri_Shubham[2y_9m].txt"",\...",[INST] <<SYS>>\n You are a language model tra...,108
2,64,Naukri_MANDARVISHNUHARKARE[19y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_MANDARVISHNUHARKARE[19y_0m].txt\nMandar...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_MANDARVISHNUHARKARE[...",[INST] <<SYS>>\n You are a language model tra...,64
3,36,Naukri_GADRENIKHILSHRIKANT[4y_0m].pdf,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_GADRENIKHILSHRIKANT[4y_0m].txt\n \nClas...,\n [INST] <<SYS>>\n You are a language model...,"{\n""File_Name"": ""Naukri_GADRENIKHILSHRIKANT[4...",[INST] <<SYS>>\n You are a language model tra...,36
4,18,Naukri_BabasahebAsaramMore[4y_6m].docx,/content/drive/MyDrive/Colab Notebooks/Onfocus...,Naukri_BabasahebAsaramMore[4y_6m].txt\nBabasah...,\n [INST] <<SYS>>\n You are a language model...,"{\n ""File_Name"": ""Naukri_BabasahebAsaramMore[...",[INST] <<SYS>>\n You are a language model tra...,18


# Continue..

In [None]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [None]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [None]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Testing before training the model

In [None]:
#Resume-1
 # change to your desired prompt
#prompt = "" # change to your desired prompt
gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length = 200)
result = gen(prompt)
print(result[0]['generated_text'])

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
Input length of input_ids is 1537, but `max_length` is set to 200. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.



Naukri_KAVITAUDDHAVJADHAV[10y_0m].txt
KAVITA UDDHAV JADHAV Email: Jadhavkavita756@gmail.comPhone:  +91- 9892078163, +91-8097101563A multi-faceted professional accustomed with proven technical and management skills; targeting challenging and rewarding opportunities as Scrum Master/Business Analyst with an organization of reputeProfile SummaryAn accomplished technocrat offering over 10 years of versatile experience in Scrum Master, Agile Coaching/Transformation, Project Management, Testing, People Management, Quality Assurance, Test Management (Manual & Automation) across various DomainPlayed a key role as Agile Team Facilitator & Scrum Master for steering the entire projects & project planning, scheduling, monitoring, controlling & configuration; actively participated in applying scrum principles/practices/theory and coordinated for Agile adoption for teams across multipleExpertise in implementing Scrum methodology to add value to the deliverables; working with consultants in identifyi

In [None]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [None]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset_mapped,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/108 [00:00<?, ? examples/s]

In [None]:
# Train model
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,2.4713
50,1.6533
75,1.0787
100,1.0925
125,1.0008
150,1.021
175,1.0821
200,0.9931
225,1.0523
250,0.9326


TrainOutput(global_step=1080, training_loss=0.8089163861892842, metrics={'train_runtime': 4615.5293, 'train_samples_per_second': 0.234, 'train_steps_per_second': 0.234, 'total_flos': 2.24493090373632e+16, 'train_loss': 0.8089163861892842, 'epoch': 10.0})

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [None]:
# Specify the path where you want to save the model
save_path = "/content/drive/MyDrive/Colab Notebooks/Onfocus/Recruit GPT/Llama Fine Tuned Model"

# Save the model to that path
trainer.model.save_pretrained(save_path)


In [None]:
trainer.model.save_pretrained('llama-2-7b-custom')

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = """
Naukri_KAVITAUDDHAVJADHAV[10y_0m].txt
KAVITA UDDHAV JADHAV Email: Jadhavkavita756@gmail.comPhone:  +91- 9892078163, +91-8097101563A multi-faceted professional accustomed with proven technical and management skills; targeting challenging and rewarding opportunities as Scrum Master/Business Analyst with an organization of reputeProfile SummaryAn accomplished technocrat offering over 10 years of versatile experience in Scrum Master, Agile Coaching/Transformation, Project Management, Testing, People Management, Quality Assurance, Test Management (Manual & Automation) across various DomainPlayed a key role as Agile Team Facilitator & Scrum Master for steering the entire projects & project planning, scheduling, monitoring, controlling & configuration; actively participated in applying scrum principles/practices/theory and coordinated for Agile adoption for teams across multipleExpertise in implementing Scrum methodology to add value to the deliverables; working with consultants in identifying process improvements in deliverables; developing methods to achieve delivery of desired results; acting as the quality control point for consultant deliverablesExcellence in gathering and understanding requirements of clients & other multiple stakeholders, followed by translation into functional specifications as well as provisioning of suitable software solutionsPossess in-depth expertise in working on various tools and technologies such as JIRA, Confluence, Kanban, Scrum XP, waterfall methodology, ERP, SQL & so onProven expertise in spearheading entire gamut of sprint activities inclusive of sprint planning, release planning, backlog grooming, task management, scrum-of-scrum, retrospectives, sprint retrospective and sprint demonstrationLed proven initiatives in extending support towards:Product Owners and Development Team for managing product backlogs, resolving customer queriesScrum Team on how to execute Agile/Scrum practices and values to delight the customers Hands-on experience in technical and business writing, business process flow, business process modelling and testing with knowledge of writing user stories, creating use cases, use case diagrams, workflow diagrams, burn down charts, sequence diagrams & so onCore CompetenciesReport Development		India Payment stack			New Product Development Process Improvements		Data Modeling				Cost Estimations Reporting & Documentation	Vendor Management			Project Management Work ExperienceJuly'21 â€“ Present  Atos India Pvt Ltd, Pune as Consultat â€“ Business Analyst	  Client â€“ Deutsche Bank, UK as Scrum Master and Technical AnalystRole: Managing end-to-end process of Business Analysis entailing As-Is / To-Be Analysis, mapping & finalizing requirements, developing and documenting functional specifications, determining the business scenario and mapping them into solutionsGathering business requirements from multiple stakeholders of different business units and developing functional specification documentsDetailed understanding of UPI payments business and technical architecture, UPI transaction interface and customer onboarding, data modeling report development and analytics experience is added plus. Advocating for timely and high quality delivery of respective projects/programs while managing stakeholder expectationsManaging projects in Agile/Scrum environment, including sprint planning with Product Owner, resource allocation, managing Agile Scrum ceremonies (daily Scrum Meeting, Scrum of Scrum) and preparation of Release Note as per SprintFacilitating Sprint Planning & Review meetings, facilitating Sprint Retrospective meetings, managing the Product and the Sprint backlog and authoring Sprint artifacts and processesDelivering internal/client demos and signing off all deliverables as per sprint scopeProviding guidance and leadership to fellow team members and conducting annual performance reviews of teamFollowing project lifecycle methodology defined under the process framework; ensuring that Project Team understands all aspects of the prime contract relating to responsibilitiesShowcasing excellence in transforming the Project Teams from Traditional to Agile Way of working; establishing Agile practices and building culture within distributed scrum teams Analysing current processes, identifying areas of improvement & executing improvements in coordination with stakeholders; providing training to the team on the agile processCreating and refining the User Stories, Backlog Presentations by working in close coordination with Product Owners Building & maintaining strong business relations with Product Owners & other Stakeholders to facilitate teams interactionMonitoring teams adherence to agile process and SLAs during project execution, identifying issues & assisting team to resolve the sameFront-leading delivery of projects from the stage of initiation till monitoring & control including planning, estimation & scheduling, integrating change control, planning risk responses and contingency planningExhibiting skills in empowering team to be self-organize and grow cross-functionally in terms of Technologies and Protect Team from over-commitment, sprint backlog and prioritize resolution of defects Leading initiatives in managing the Agile testing process along with automated and manual software testing, writing and executing the test cases  Previous ExperienceMay'19 â€“ Juneâ€™21 HDFC Bank Ltd., Pune as Area Head â€“ Ecommerce Business AnalystJul'15 â€“ Nov'17 American Institute of Chemical Engineering (AIChE), USA/ Center for Chemical Process Safety (CCPS), Mumbai as Staff Consultant - Co-ordinator- Asia Pacific Region Nov'13 â€“ Jun'14 CGC Converse Technology Pvt Ltd, Hyderabad as Process Safety EngineerJul'12 â€“ Nov'13 Chemplant Project Consultant Pvt. Ltd, Mumbai as Project EngineerEducation & CredentialsPost-Graduation Diploma in Process Engineering (P.G.D) from Suvidya Institute of Technology, Santa cruise, Mumbai in 2012Degree in Chemical Engineering (B.E) from Datta Meghe College of Engineering, Navi Mumbai in 2012Certifications:SAFe 4 Certified Agilest (SAFe*) 			Azure fundamentals - access via Microsoft Training PlatformMicrosoft Certified: Azure DevOps Engineer Expert	ITIL 4 Foundation					Leading SAFe* 5.1 SAPersonal DetailsDate of Birth: June 09, 1989Address: Pune - 411014Languages known: English, Marathi & HindiPassport: M0407562"""
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2000)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
from transformers import AutoModel
device_map={"":0}
model = AutoModel.from_pretrained("Sudhee1997/Llama-2-7b-Custom-Recruit",device_map=device_map)


In [None]:
tokenizer = AutoTokenizer.from_pretrained("Sudhee1997/Llama-2-7b-Custom-Recruit")

In [None]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

19965

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]