In [None]:
!pip install datasets
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM-135M-Instruct"



In [None]:
device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

# Pretrained SmolLM

In [None]:
def chat_template(question, context):
    template = f"""\
    You are a SQL expert. Given the Schema, generate a SQL query for the question asked.
    Schema:{context}
    question:{question}
    """
    # Remove any leading whitespace characters from each line in the template.
    template = "\n".join([line.lstrip() for line in template.splitlines()])
    return template

In [None]:
def model_response(question, context):
  # Tokenize the input with attention mask
  inputs = tokenizer(
      chat_template(question, context),
      return_tensors="pt",
      padding=True,
      truncation=True
  ).to(device)

  # Generate the output
  outputs = model.generate(
      inputs["input_ids"],
      attention_mask=inputs["attention_mask"],  # Ensure attention mask is passed
      max_new_tokens=50,
      temperature=0.1,
      top_p=0.99,
      do_sample=True,
      return_dict_in_generate=True,
  )

  # Decode and print the output
  response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
  print(response)

In [None]:
question = "How many heads of the departments are older than 56 ?"
context = "CREATE TABLE head (age INTEGER)"
model_response(question, context)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


SELECT SUM(age) FROM head WHERE age > 56


In [None]:
question = "List the name, born state and age of the heads of departments ordered by age."
context = "CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)"
model_response(question, context)

SELECT name FROM head WHERE born_state = "d departments" AND age = "order"


In [None]:
question = "What are the themes of farm competitions sorted by year in ascending order?"
context = "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)"
model_response(question, context)

SELECT theme FROM farm_competition WHERE YEAR = " ascending order"


# Finetuning SmolLM

In [None]:
from datasets import load_dataset, Dataset
# Define the dataset for fine-tuning
dataset_id = "b-mc2/sql-create-context"

data = load_dataset(dataset_id, split="train")
df = data.to_pandas()

README.md:   0%|          | 0.00/4.43k [00:00<?, ?B/s]

sql_create_context_v4.json:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/78577 [00:00<?, ? examples/s]

In [None]:
def chat_template_for_training(context, answer, question):
    template = f"""\
    <|im_start|>user
    You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
    Schema:{context}
    question:{question}
    <|im_end|>
    <|im_start|>assistant
    {answer}
    <|im_end|>
    """
    # Remove any leading whitespace characters from each line in the template.
    template = "\n".join([line.lstrip() for line in template.splitlines()])
    return template

In [None]:
# Apply the chat_template_for_training function to each row in the
# dataframe and store the result in a new "text" column.
df["text"] = df.apply(lambda x: chat_template_for_training(x["context"],
x["answer"], x["question"]), axis=1)

# Convert the dataframe back to a Dataset object.
formatted_data = Dataset.from_pandas(df)

In [None]:
# Load and prepare dataset
dataset_id = "b-mc2/sql-create-context"
data = load_dataset(dataset_id, split="train")
df = data.to_pandas()

# Prepare training data
def prepare_training_features(examples):
    inputs = [chat_template(q, c) for q, c in zip(examples["question"], examples["context"])]
    model_inputs = tokenizer(
        inputs,
        max_length=512,
        padding="max_length",
        truncation=True
    )

    # Tokenize targets
    labels = tokenizer(
        examples["answer"],
        max_length=128,
        padding="max_length",
        truncation=True
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Convert dataset to features
formatted_data = Dataset.from_pandas(df)
formatted_data = formatted_data.map(
    prepare_training_features,
    batched=True,
    remove_columns=formatted_data.column_names
)

Map:   0%|          | 0/78577 [00:00<?, ? examples/s]

In [None]:
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import (
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    AutoTokenizer
)

# Configure LoRA for SmolLM
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",  # Changed to CAUSAL_LM for SmolLM
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj"  # Added k_proj and o_proj for transformer attention
    ]
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,  # Replace with your specific SmolLM checkpoint
    trust_remote_code=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(
    checkpoint,  # Replace with your specific SmolLM checkpoint
    trust_remote_code=True
)

# Apply LoRA to model
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Training arguments
training_args = TrainingArguments(
    output_dir="smollm-sql-v1",
    per_device_train_batch_size=4,  # Reduced batch size due to model size
    gradient_accumulation_steps=4,   # Increased for stability
    learning_rate=1e-4,             # Adjusted learning rate
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=10,
    num_train_epochs=10,
    max_steps=5000,
    fp16=True,
    gradient_checkpointing=True,    # Added for memory efficiency
    optim="adamw_torch"            # Specified optimizer
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=formatted_data,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Save the final model
trainer.save_model("smollm-sql-v1-final")

# Load and merge the fine-tuned model
base_model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    trust_remote_code=True,
    device_map="auto"
)
peft_model = PeftModel.from_pretrained(
    base_model,
    "smollm-sql-v1-final",
    device_map="auto"
)
model = peft_model.merge_and_unload()

# Save the merged model
model.save_pretrained("smollm-sql-v1-merged")

  trainer = Trainer(
max_steps is given, it will override any value given in num_train_epochs


trainable params: 3,686,400 || all params: 138,201,408 || trainable%: 2.6674


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Disable cache to improve training speed.
model.config.use_cache = False

# Set the temperature for pretraining to 0.1.
model.config.pretraining_tp = 0.1

In [None]:
from peft import LoraConfig

# Define the PEFT configuration.
peft_config = LoraConfig(
    # Set the rank of the LoRA projection matrix.
    r=64,

    # Set the alpha parameter for the LoRA projection matrix.
    lora_alpha=64,

    # Set the dropout rate for the LoRA projection matrix.
    lora_dropout=0.05,

    # Set the bias term to "none".
    bias="none",

    # Set the task type to "SEQ_2_SEQ_LM".
    task_type="SEQ_2_SEQ_LM",
)

In [None]:
from transformers import TrainingArguments

# Define the training arguments.
training_args = TrainingArguments(
    # Set the output directory for the training run.
    output_dir="SmolLM-135M-Instruct-sqllm-v1",

    # Set the per-device training batch size.
    per_device_train_batch_size=6,

    # Set the number of gradient accumulation steps.
    gradient_accumulation_steps=2,

    # Set the optimizer to use.
    optim="paged_adamw_32bit",

    # Set the learning rate.
    learning_rate=2e-4,

    # Set the learning rate scheduler type.
    lr_scheduler_type="cosine",

    # Set the save strategy.
    save_strategy="epoch",

    # Set the logging steps.
    logging_steps=10,

    # Set the number of training epochs.
    num_train_epochs=10,

    # Set the maximum number of training steps.
    max_steps=500,

    # Enable fp16 training.
    fp16=True,
)

In [None]:
!pip install trl bitsandbytes



In [None]:
pip install --upgrade bitsandbytes



In [None]:
import torch

In [None]:
from trl import SFTTrainer

# Initialize the SFTTrainer.
trainer = SFTTrainer(
    # Set the model to be trained.
    model=model,

    # Set the training dataset.
    train_dataset=formatted_data,

    # Set the PEFT configuration.
    peft_config=peft_config,

    # Set the name of the text field in the dataset.
    dataset_text_field="labels",

    # Set the training arguments.
    args=training_args,

    # Set the tokenizer.
    tokenizer=tokenizer,

    # Disable packing.
    packing=False,

#     # Set the maximum sequence length.
#     max_seq_length=1024
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
10,0.0093
20,0.004


KeyboardInterrupt: 

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# save locally
model.save_pretrained("SmolLM-135M-Instruct-sqllm-v1")

# push to the hub
model.push_to_hub("yashyaks/SmolLM-135M-Instruct-sqllm-v1")

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/572M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/yashyaks/SmolLM-135M-Instruct-sqllm-v1/commit/e9490282976b3f9acea6eb7656212435dc0949db', commit_message='Upload BartForConditionalGeneration', commit_description='', oid='e9490282976b3f9acea6eb7656212435dc0949db', pr_url=None, repo_url=RepoUrl('https://huggingface.co/yashyaks/SmolLM-135M-Instruct-sqllm-v1', endpoint='https://huggingface.co', repo_type='model', repo_id='yashyaks/SmolLM-135M-Instruct-sqllm-v1'), pr_revision=None, pr_num=None)

In [None]:
!pip install torch



In [None]:
import torch

In [None]:
# from peft import AutoPeftModelForCausalLM, PeftModel

# # Load the pre-trained model.
# model = AutoModelForCausalLM.from_pretrained(
#     checkpoint,
#     # torch_dtype=torch.float16,
#     # load_in_8bit=False,
#     device_map="auto",
#     # trust_remote_code=True
# )

# # Load the PEFT model from a checkpoint.
# model_path = "/content/SmolLM-135M-Instruct-sqllm-v1/checkpoint-500"
# peft_model = PeftModel.from_pretrained(model, model_path, from_transformers=True, device_map="auto")

# # Wrap the model with the PEFT model.
# model = peft_model.merge_and_unload()

In [None]:
def finetune_response(question, context):
  # Encode the prompt.
  # inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
  inputs = tokenizer(
        chat_template(question, context),
        return_tensors="pt",
        padding=True,
        truncation=True
    ).to(device)


  # Generate the output.
  # output = model.generate(**inputs)
  outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],  # Ensure attention mask is passed
        max_new_tokens=50,
        temperature=0.1,
        top_p=0.99,
        do_sample=True,
        return_dict_in_generate=True,
    )

  response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
  print(response)
  return response

In [None]:
# Prepare the Prompt.
question = "How many heads of the departments are older than 56 ?"
context = "CREATE TABLE head (age INTEGER)"
finetune_response(question, context)

TypeError: PeftModelForSeq2SeqLM.generate() takes 1 positional argument but 2 were given

In [None]:
# Prepare the Prompt.
question = "List the name, born state and age of the heads of departments ordered by age."
context = "CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)"
finetune_response(question, context)

TypeError: PeftModelForSeq2SeqLM.generate() takes 1 positional argument but 2 were given

In [None]:
# Prepare the Prompt.
question = "What are the themes of farm competitions sorted by year in ascending order?"
context = "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)"
finetune_response(question, context)

TypeError: PeftModelForSeq2SeqLM.generate() takes 1 positional argument but 2 were given

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from nltk.translate.bleu_score import sentence_bleu
import pandas as pd

# Load the dataset
dataset_id = "b-mc2/sql-create-context"
data = load_dataset(dataset_id, split="train")
df = data.to_pandas()

# # Load the model and tokenizer
# tokenizer = AutoTokenizer.from_pretrained("your-model-name")
# model = AutoModelForCausalLM.from_pretrained("your-model-name").to("cuda")

# Function to format the prompt
def chat_template(question, context):
    template = f"""\
    You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
    Schema:{context}
    question:{question}
    """
    return "\n".join([line.lstrip() for line in template.splitlines()])

# Function to calculate BLEU score
def calculate_bleu_score(predictions, references):
    if not predictions or not references:
        return 0.0  # Return 0 if either list is empty
    bleu_scores = []
    for pred, ref in zip(predictions, references):
        # Tokenize the predictions and references (split by spaces)
        pred_tokens = pred.split()
        ref_tokens = ref.split()
        bleu_score = sentence_bleu([ref_tokens], pred_tokens)
        bleu_scores.append(bleu_score)
    return sum(bleu_scores) / len(bleu_scores)

# Function to calculate Exact Match (EM) score
def calculate_exact_match(predictions, references):
    exact_matches = [1 if pred.strip() == ref.strip() else 0 for pred, ref in zip(predictions, references)]
    return sum(exact_matches) / len(exact_matches)

# Process the dataset
generated_sqls = []
reference_sqls = df["answer"].tolist()

for _, row in df.iterrows():
    question = row["question"]
    context = row["context"]
    # output
    generated_sql = finetune_response(question, context)
    generated_sqls.append(generated_sql)

# Calculate BLEU and Exact Match scores
bleu = calculate_bleu_score(generated_sqls, reference_sqls)
em = calculate_exact_match(generated_sqls, reference_sqls)

# Print results
for i, (gen, ref) in enumerate(zip(generated_sqls, reference_sqls)):
    print(f"Query {i+1}:")
    print(f"Generated: {gen}")
    print(f"Reference: {ref}")
    print("---")

print(f"BLEU score: {bleu:.4f}")
print(f"Exact Match (EM): {em:.4f}")


\ 
user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE head (age INTEGER)
question:How many heads of the departments are older than 56 ?

assistant
SELECT MAX(head) FROM head
#queryset
#question
#markdown formatting
#schema
#reserved
# # question
# # Markdown formatting
# #schema
# #reserved
# # # #
\ 
user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)
question:List the name, born state and age of the heads of departments ordered by age.

assistant
SELECT name, born_state, age FROM head GROUP BY name, born_state
# Lake Tahoe, California
# Lake Tahoe, California

SELECT name, born_state, age FROM head GROUP BY name, born
\ 
user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE

KeyboardInterrupt: 

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Reload model and tokenizer from Hugging Face Hub
model = AutoModelForSeq2SeqLM.from_pretrained("Hiyaj/bart-small-text2sql")
tokenizer = AutoTokenizer.from_pretrained("Hiyaj/bart-small-text2sql")

# Ensure the model is on the correct device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/292 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]

BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_lay