### Install libraries

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.33.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/244.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m86.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m81.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

### Imports and Setup

In [None]:
import os
import torch
import transformers
import pandas as pd
import torch.nn as nn
import bitsandbytes as bnb
import matplotlib.pyplot as plt

from trl import SFTTrainer
from huggingface_hub import login
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging

### Parameters

In [None]:
# Model and dataset information
DEVICE_MAP = {"": 0}
DATASET = "Hawk28/spider-sql-prompts"
FINETUNED_MODEL = "Hawk28/Llama-7B-RP"
BASE_MODEL_NAME = "NousResearch/Llama-2-7b-hf"

# QLoRA parameters
LORA_R = 8
LORA_ALPHA = 16
LORA_BIAS = "none"
LORA_DROPOUT = 0.05
LORA_TASK_TYPE = "CAUSAL_LM"
LORA_TARGET_MODULES = ['q_proj','k_proj','v_proj','o_proj']

# Bitsandbytes parameters"
BNB_USE_4_BIT = True
BNB_USE_NESTED_QUANT = False
BNB_4_BIT_QUANT_TYPE = "nf4"
BNB_4_BIT_COMPUTE_DTYPE = "float16"

# Trainer parameters
TRAINER_FP16 = False
TRAINER_BF16 = False
TRAINER_MAX_STEPS = 300
TRAINER_EVAL_STEPS = 25
TRAINER_LOGGING_STEPS = 25
TRAINER_MAX_GRAD_NORM = 0.3
TRAINER_WARMUP_RATIO = 0.05
TRAINER_WEIGHT_DECAY = 0.001
TRAINER_NUM_TRAIN_EPOCHS = 1
TRAINER_LEARNING_RATE = 2e-4
TRAINER_GROUP_BY_LENGTH = True
TRAINER_OUTPUT_DIR = "./results"
TRAINER_OPTIM = "paged_adamw_32bit"
TRAINER_LR_SCHEDULER_TYPE = "linear"
TRAINER_EVALUATION_STRATEGY = "steps"
TRAINER_GRADIENT_CHECKPOINTING = True
TRAINER_PER_DEVICE_TRAIN_BATCH_SIZE = 4
TRAINER_GRADIENT_ACCUMULATION_STEPS = 2

# SFT Parameters
SFT_PACKING = False
SFT_MAX_SEQ_LENGTH = 1024

### Data

In [None]:
dataset = load_dataset(DATASET)

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/10.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/989k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['db_name', 'prompt'],
        num_rows: 8659
    })
    validation: Dataset({
        features: ['db_name', 'prompt'],
        num_rows: 1034
    })
})

In [None]:
print(dataset["train"][100]["prompt"])

### Complete SQL query only and with no explanation
### SQL tables followed by foreign key information:
#
# Addresses ( address_id, line_1, line_2, city, zip_postcode, state_province_county, country )
# People ( person_id, first_name, middle_name, last_name, cell_mobile_number, email_address, login_name, password )
# Students ( student_id, student_details )
# Courses ( course_id, course_name, course_description, other_details )
# People_Addresses ( person_address_id, person_id, address_id, date_from, date_to )
# Student_Course_Registrations ( student_id, course_id, registration_date )
# Student_Course_Attendance ( student_id, course_id, date_of_attendance )
# Candidates ( candidate_id, candidate_details )
# Candidate_Assessments ( candidate_id, qualification, assessment_date, asessment_outcome_code )
# 
# Students.student_id can be joined with People.person_id
# People_Addresses.address_id can be joined with Addresses.address_id
# People_Addresses.person_id can be joined with People.pe

### Bits and Bytes Configuration

In [None]:
BNB_4_BIT_COMPUTE_DTYPE = getattr(torch, BNB_4_BIT_COMPUTE_DTYPE)
BNB_4_BIT_COMPUTE_DTYPE

torch.float16

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = BNB_USE_4_BIT,
    bnb_4bit_quant_type = BNB_4_BIT_QUANT_TYPE,
    bnb_4bit_compute_dtype = BNB_4_BIT_COMPUTE_DTYPE,
    bnb_4bit_use_double_quant = BNB_USE_NESTED_QUANT
)

In [None]:
# Check GPU compatibility with bfloat16
if BNB_4_BIT_COMPUTE_DTYPE == torch.float16 and BNB_USE_4_BIT:
  major, _ = torch.cuda.get_device_capability()
  if major >= 8:
    print("=" * 80)
    print("Your GPU supports bfloat16: accelerate training with bf16=True")
    print("=" * 80)
    TRAINER_BF16 = True

### Load Base Model and Tokenizer

In [None]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    device_map = DEVICE_MAP,
    quantization_config = bnb_config,
    trust_remote_code = True
)
model.config.use_cache = False
model.config.pretraining_tp = 1

(…)h/Llama-2-7b-hf/resolve/main/config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

(…)esolve/main/model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

(…)b-hf/resolve/main/generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

(…)7b-hf/resolve/main/tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

(…)lama-2-7b-hf/resolve/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

(…)-hf/resolve/main/special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [None]:
batch = tokenizer("""### Complete SQL query only and with no explanation
### SQL tables followed by foreign key information:
#
# continents ( ContId, Continent )
# countries ( CountryId, CountryName, Continent )
# car_makers ( Id, Maker, FullName, Country )
# model_list ( ModelId, Maker, Model )
# car_names ( MakeId, Model, Make )
# cars_data ( Id, MPG, Cylinders, Edispl, Horsepower, Weight, Accelerate, Year )
#
# countries.Continent can be joined with continents.ContId
# car_makers.Country can be joined with countries.CountryId
# model_list.Maker can be joined with car_makers.Id
# car_names.Model can be joined with model_list.Model
# cars_data.Id can be joined with car_names.MakeId
#
### Question:
#
# For each continent, list its id, name, and how many countries it has?
#
### SQL:
""", return_tensors='pt')

with torch.cuda.amp.autocast():
    input_ids = batch['input_ids'].to('cuda')  # Move input_ids to the CUDA device
    output_tokens = model.generate(input_ids=input_ids, max_new_tokens=100)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))





 ### Complete SQL query only and with no explanation
### SQL tables followed by foreign key information:
#
# continents ( ContId, Continent )
# countries ( CountryId, CountryName, Continent )
# car_makers ( Id, Maker, FullName, Country )
# model_list ( ModelId, Maker, Model )
# car_names ( MakeId, Model, Make )
# cars_data ( Id, MPG, Cylinders, Edispl, Horsepower, Weight, Accelerate, Year )
#
# countries.Continent can be joined with continents.ContId
# car_makers.Country can be joined with countries.CountryId
# model_list.Maker can be joined with car_makers.Id
# car_names.Model can be joined with model_list.Model
# cars_data.Id can be joined with car_names.MakeId
#
### Question:
#
# For each continent, list its id, name, and how many countries it has?
#
### SQL:
#
# SELECT ContId, Continent, COUNT(CountryId) AS Countries
# FROM continents
# GROUP BY ContId, Continent
#
### Explanation:
#
# The SQL query above is a GROUP BY query.
#
# The GROUP BY clause is used to group the results o

In [None]:
# batch = tokenizer("""Below is an instruction that describes a task, paired with an input that provides further context. Write an SQL query that appropriately completes the request. Make sure to use the table relationships when joining two or more tables.

# ### Instruction:

# For each continent, list its id, name, and how many countries it has?

# ### Input:

# CREATE TABLE continents (ContId NUMBER PRIMARY KEY, Continent TEXT); CREATE TABLE countries (CountryId NUMBER PRIMARY KEY, CountryName TEXT, Continent NUMBER); CREATE TABLE car_makers (Id NUMBER PRIMARY KEY, Maker TEXT, FullName TEXT, Country TEXT); CREATE TABLE model_list (ModelId NUMBER PRIMARY KEY, Maker NUMBER, Model TEXT); CREATE TABLE car_names (MakeId NUMBER PRIMARY KEY, Model TEXT, Make TEXT); CREATE TABLE cars_data (Id NUMBER PRIMARY KEY, MPG TEXT, Cylinders NUMBER, Edispl NUMBER, Horsepower TEXT, Weight NUMBER, Accelerate NUMBER, Year NUMBER);

# ### Table Relationships:

# countries Continent RELATES TO continents ContId; car_makers Country RELATES TO countries CountryId; model_list Maker RELATES TO car_makers Id; car_names Model RELATES TO model_list Model; cars_data Id RELATES TO car_names MakeId

# ### SQL:
# """, return_tensors='pt')

# with torch.cuda.amp.autocast():
#     input_ids = batch['input_ids'].to('cuda')  # Move input_ids to the CUDA device
#     output_tokens = model.generate(input_ids=input_ids, max_new_tokens=100)

# print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

### LoRA Configuration

In [None]:
peft_config = LoraConfig(
    r = LORA_R,
    bias = LORA_BIAS,
    lora_alpha = LORA_ALPHA,
    task_type = LORA_TASK_TYPE,
    lora_dropout = LORA_DROPOUT,
    target_modules = LORA_TARGET_MODULES
)

### Training Configuration

In [None]:
# Set training parameters
training_arguments = TrainingArguments(
    save_steps = 0,
    fp16 = TRAINER_FP16,
    bf16 = TRAINER_BF16,
    optim = TRAINER_OPTIM,
    load_best_model_at_end = True,
    max_steps = TRAINER_MAX_STEPS,
    eval_steps = TRAINER_EVAL_STEPS,
    output_dir = TRAINER_OUTPUT_DIR,
    metric_for_best_model = 'eval_loss',
    weight_decay = TRAINER_WEIGHT_DECAY,
    warmup_ratio = TRAINER_WARMUP_RATIO,
    logging_steps = TRAINER_LOGGING_STEPS,
    learning_rate = TRAINER_LEARNING_RATE,
    max_grad_norm = TRAINER_MAX_GRAD_NORM,
    group_by_length = TRAINER_GROUP_BY_LENGTH,
    lr_scheduler_type = TRAINER_LR_SCHEDULER_TYPE,
    evaluation_strategy = TRAINER_EVALUATION_STRATEGY,
    per_device_train_batch_size = TRAINER_PER_DEVICE_TRAIN_BATCH_SIZE,
    gradient_accumulation_steps = TRAINER_GRADIENT_ACCUMULATION_STEPS,

)

### SFT Trainer

In [None]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model = model,
    packing = SFT_PACKING,
    tokenizer = tokenizer,
    peft_config = peft_config,
    args = training_arguments,
    dataset_text_field = "prompt",
    # dataset_text_field = "prompts",
    train_dataset = dataset["train"],
    eval_dataset = dataset['validation'],
    max_seq_length = SFT_MAX_SEQ_LENGTH
)



Map:   0%|          | 0/8659 [00:00<?, ? examples/s]

Map:   0%|          | 0/1034 [00:00<?, ? examples/s]

### Model Training

In [None]:
# Train model
trainer.train()

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
25,0.861,0.85275
50,0.732,0.764827
75,0.516,0.749388
100,0.6286,0.722037
125,0.4484,0.750181
150,0.515,0.735155
175,0.3889,0.773427
200,0.4152,0.772995
225,0.3282,0.750583
250,0.3956,0.781284


TrainOutput(global_step=300, training_loss=0.4891168228785197, metrics={'train_runtime': 2313.3001, 'train_samples_per_second': 1.037, 'train_steps_per_second': 0.13, 'total_flos': 1.978501022436557e+16, 'train_loss': 0.4891168228785197, 'epoch': 0.28})

In [None]:
# Save trained model
trainer.model.save_pretrained(FINETUNED_MODEL)

### Inference

In [None]:
batch = tokenizer("""### Complete SQL query only and with no explanation
### SQL tables followed by foreign key information:
#
# continents ( ContId, Continent )
# countries ( CountryId, CountryName, Continent )
# car_makers ( Id, Maker, FullName, Country )
# model_list ( ModelId, Maker, Model )
# car_names ( MakeId, Model, Make )
# cars_data ( Id, MPG, Cylinders, Edispl, Horsepower, Weight, Accelerate, Year )
#
# countries.Continent can be joined with continents.ContId
# car_makers.Country can be joined with countries.CountryId
# model_list.Maker can be joined with car_makers.Id
# car_names.Model can be joined with model_list.Model
# cars_data.Id can be joined with car_names.MakeId
#
### Question:
#
# For each continent, list its id, name, and how many countries it has?
#
### SQL:
""", return_tensors='pt')

with torch.cuda.amp.autocast():
    input_ids = batch['input_ids'].to('cuda')  # Move input_ids to the CUDA device
    output_tokens = model.generate(input_ids=input_ids, max_new_tokens=100)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))



 ### Complete SQL query only and with no explanation
### SQL tables followed by foreign key information:
#
# continents ( ContId, Continent )
# countries ( CountryId, CountryName, Continent )
# car_makers ( Id, Maker, FullName, Country )
# model_list ( ModelId, Maker, Model )
# car_names ( MakeId, Model, Make )
# cars_data ( Id, MPG, Cylinders, Edispl, Horsepower, Weight, Accelerate, Year )
#
# countries.Continent can be joined with continents.ContId
# car_makers.Country can be joined with countries.CountryId
# model_list.Maker can be joined with car_makers.Id
# car_names.Model can be joined with model_list.Model
# cars_data.Id can be joined with car_names.MakeId
#
### Question:
#
# For each continent, list its id, name, and how many countries it has?
#
### SQL:
#
# SELECT ContId ,  Continent ,  COUNT(*) FROM countries GROUP BY Continent
#
### End.
#
### SQL:
#
# SELECT ContId ,  Continent FROM continents GROUP BY Continent
#
### End.
#
### SQL:
#
# SELECT ContId ,  Continent FROM co

In [None]:
# batch = tokenizer("""Below is an instruction that describes a task, paired with an input that provides further context. Write an SQL query that appropriately completes the request. Make sure to use the table relationships when joining two or more tables.

# ### Instruction:

# For each continent, list its id, name, and how many countries it has?

# ### Input:

# CREATE TABLE continents (ContId NUMBER PRIMARY KEY, Continent TEXT); CREATE TABLE countries (CountryId NUMBER PRIMARY KEY, CountryName TEXT, Continent NUMBER); CREATE TABLE car_makers (Id NUMBER PRIMARY KEY, Maker TEXT, FullName TEXT, Country TEXT); CREATE TABLE model_list (ModelId NUMBER PRIMARY KEY, Maker NUMBER, Model TEXT); CREATE TABLE car_names (MakeId NUMBER PRIMARY KEY, Model TEXT, Make TEXT); CREATE TABLE cars_data (Id NUMBER PRIMARY KEY, MPG TEXT, Cylinders NUMBER, Edispl NUMBER, Horsepower TEXT, Weight NUMBER, Accelerate NUMBER, Year NUMBER);

# ### Table Relationships:

# countries Continent RELATES TO continents ContId; car_makers Country RELATES TO countries CountryId; model_list Maker RELATES TO car_makers Id; car_names Model RELATES TO model_list Model; cars_data Id RELATES TO car_names MakeId

# ### Response:
# """, return_tensors='pt')

# with torch.cuda.amp.autocast():
#     input_ids = batch['input_ids'].to('cuda')  # Move input_ids to the CUDA device
#     output_tokens = model.generate(input_ids=input_ids, max_new_tokens=100)

# print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

### Save Model on HuggingFace

In [None]:
# Empty VRAM
del model
del trainer
del batch
import gc
gc.collect()
gc.collect()

0

In [None]:
BASE_MODEL_NAME

'NousResearch/Llama-2-7b-hf'

In [None]:
FINETUNED_MODEL

'Hawk28/Llama-7B-RP'

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    low_cpu_mem_usage = True,
    return_dict = True,
    torch_dtype = torch.float16,
    device_map = DEVICE_MAP,
    trust_remote_code = True
)
model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model.push_to_hub(FINETUNED_MODEL)
tokenizer.push_to_hub(FINETUNED_MODEL)


Thrown during validation:
`do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Hawk28/Llama-7B-RP/commit/8705ea02bfebfb206b117b0c4e91b3a8c39ee082', commit_message='Upload tokenizer', commit_description='', oid='8705ea02bfebfb206b117b0c4e91b3a8c39ee082', pr_url=None, pr_revision=None, pr_num=None)