<a href="https://colab.research.google.com/github/HiyaJain22/Text2Sql/blob/main/SmolLM_360M.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM-360M-Instruct"



In [None]:
device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.59k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/724M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

# Pretrained SmolLM

In [None]:
def chat_template(question, context):
    template = f"""\
    <|im_start|>user
    You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
    Schema:{context}
    question:{question}
    <|im_end|>
    <|im_start|>assistant
    """
    # Remove any leading whitespace characters from each line in the template.
    template = "\n".join([line.lstrip() for line in template.splitlines()])
    return template

In [None]:
def model_response(question, context):
  # Tokenize the input with attention mask
  inputs = tokenizer(
      chat_template(question, context),
      return_tensors="pt",
      padding=True,
      truncation=True
  ).to(device)

  # Generate the output
  outputs = model.generate(
      inputs["input_ids"],
      attention_mask=inputs["attention_mask"],  # Ensure attention mask is passed
      max_new_tokens=50,
      temperature=0.1,
      top_p=0.99,
      do_sample=True,
      return_dict_in_generate=True,
  )

  # Decode and print the output
  response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
  print(response)

In [None]:
question = "How many heads of the departments are older than 56 ?"
context = "CREATE TABLE head (age INTEGER)"
model_response(question, context)

From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.


user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE head (age INTEGER)
question:How many heads of the departments are older than 56 ?

assistant
Here is the SQL query:

```sql
SELECT COUNT(DISTINCT department.age) AS number_of_heads
FROM department
GROUP BY department.age
HAVING COUNT(DISTINCT


In [None]:
question = "What are the themes of farm competitions sorted by year in ascending order?"
context = "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)"
model_response(question, context)

user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)
question:What are the themes of farm competitions sorted by year in ascending order?

assistant
Here is the SQL query:
```sql
SELECT Theme, Year
FROM farm_competition
GROUP BY Theme, Year
HAVING COUNT(Theme) > 1
ORDER BY Year ASC
LIMIT


In [None]:
question = "What are the themes of farm competitions sorted by year in ascending order?"
context = "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)"
model_response(question, context)

user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)
question:What are the themes of farm competitions sorted by year in ascending order?

assistant
Here is the SQL query:
```sql
SELECT Theme, Year
FROM farm_competition
GROUP BY Theme, Year
ORDER BY Year ASC
LIMIT 10;
```
Explanation:

* The


# Finetuning SmolLM

In [None]:
from datasets import load_dataset, Dataset
# Define the dataset for fine-tuning
dataset_id = "b-mc2/sql-create-context"

data = load_dataset(dataset_id, split="train")
df = data.to_pandas()

In [None]:
def chat_template_for_training(context, answer, question):
    template = f"""\
    <|im_start|>user
    You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
    Schema:{context}
    question:{question}
    <|im_end|>
    <|im_start|>assistant
    {answer}
    <|im_end|>
    """
    # Remove any leading whitespace characters from each line in the template.
    template = "\n".join([line.lstrip() for line in template.splitlines()])
    return template

In [None]:
# Apply the chat_template_for_training function to each row in the
# dataframe and store the result in a new "text" column.
df["text"] = df.apply(lambda x: chat_template_for_training(x["context"],
x["answer"], x["question"]), axis=1)

# Convert the dataframe back to a Dataset object.
formatted_data = Dataset.from_pandas(df)

In [None]:
# Disable cache to improve training speed.
model.config.use_cache = False

# Set the temperature for pretraining to 0.1.
model.config.pretraining_tp = 0.1

In [None]:
from peft import LoraConfig

# Define the PEFT configuration.
peft_config = LoraConfig(
    # Set the rank of the LoRA projection matrix.
    r=64,

    # Set the alpha parameter for the LoRA projection matrix.
    lora_alpha=64,

    # Set the dropout rate for the LoRA projection matrix.
    lora_dropout=0.05,

    # Set the bias term to "none".
    bias="none",

    # Set the task type to "CAUSAL_LM".
    task_type="CAUSAL_LM"
)

In [None]:
from transformers import TrainingArguments

# Define the training arguments.
training_args = TrainingArguments(
    # Set the output directory for the training run.
    output_dir="SmolLM-360M-Instruct-sqllm-v1",

    # Set the per-device training batch size.
    per_device_train_batch_size=6,

    # Set the number of gradient accumulation steps.
    gradient_accumulation_steps=2,

    # Set the optimizer to use.
    optim="paged_adamw_32bit",

    # Set the learning rate.
    learning_rate=2e-4,

    # Set the learning rate scheduler type.
    lr_scheduler_type="cosine",

    # Set the save strategy.
    save_strategy="epoch",

    # Set the logging steps.
    logging_steps=10,

    # Set the number of training epochs.
    num_train_epochs=10,

    # Set the maximum number of training steps.
    max_steps=5000,

    # Enable fp16 training.
    fp16=True,
)

In [None]:
!pip install trl bitsandbytes



In [None]:
pip install --upgrade bitsandbytes



In [None]:
import torch

In [None]:
from trl import SFTTrainer

# Initialize the SFTTrainer.
trainer = SFTTrainer(
    # Set the model to be trained.
    model=model,

    # Set the training dataset.
    train_dataset=formatted_data,

    # Set the PEFT configuration.
    peft_config=peft_config,

    # Set the name of the text field in the dataset.
    dataset_text_field="text",

    # Set the training arguments.
    args=training_args,

    # Set the tokenizer.
    tokenizer=tokenizer,

    # Disable packing.
    packing=False,

#     # Set the maximum sequence length.
#     max_seq_length=1024
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/78577 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 37


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 37


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,2.6555
20,2.314
30,1.9631
40,1.4574
50,1.1589
60,1.0722
70,1.0085
80,0.9456
90,0.9802
100,0.9605


TrainOutput(global_step=5000, training_loss=0.6961899001121521, metrics={'train_runtime': 3381.5403, 'train_samples_per_second': 17.743, 'train_steps_per_second': 1.479, 'total_flos': 1.62472442303808e+16, 'train_loss': 0.6961899001121521, 'epoch': 0.7635336336565626})

In [None]:
# save locally
model.save_pretrained("SmolLM-360M-Instruct-sqllm-v1")

# # push to the hub
# model.push_to_hub("aryanntated/SmolLM-360M-Instruct-sqllm-v1")

In [None]:
!pip install torch



In [None]:
import torch

In [None]:
# from peft import AutoPeftModelForCausalLM, PeftModel

# # Load the pre-trained model.
# model = AutoModelForCausalLM.from_pretrained(
#     checkpoint,
#     # torch_dtype=torch.float16,
#     # load_in_8bit=False,
#     device_map="auto",
#     # trust_remote_code=True
# )

# # Load the PEFT model from a checkpoint.
# model_path = "/content/SmolLM-135M-Instruct-sqllm-v1/checkpoint-500"
# peft_model = PeftModel.from_pretrained(model, model_path, from_transformers=True, device_map="auto")

# # Wrap the model with the PEFT model.
# model = peft_model.merge_and_unload()

In [None]:
# Prepare the Prompt.
question = "How many heads of the departments are older than 56 ?"
context = "CREATE TABLE head (age INTEGER)"
prompt = chat_template(question,context)

# Encode the prompt.
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

# Generate the output.
output = model.generate(**inputs)

# Decode the output.
text = tokenizer.decode(output[0])

# Print the generated SQL query.
print(text)

<|im_start|>user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE head (age INTEGER)
question:How many heads of the departments are older than 56 ?
<|im_end|>
<|im_start|>assistant
SELECT COUNT(*) FROM head WHERE age > 56
GROUP BY COUNT(*)
HAVING COUNT(*) OVER (PIVOT) MAXES = 


In [None]:
# Prepare the Prompt.
question = "List the name, born state and age of the heads of departments ordered by age."
context = "CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)"
prompt = chat_template(question,context)

# Encode the prompt.
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

# Generate the output.
output = model.generate(**inputs)

# Decode the output.
text = tokenizer.decode(output[0])

# Print the generated SQL query.
print(text)

<|im_start|>user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE head (name VARCHAR, born_state VARCHAR, age VARCHAR)
question:List the name, born state and age of the heads of departments ordered by age.
<|im_end|>
<|im_start|>assistant
SELECT name, born_state, age FROM head ORDER BY age

Question:List the name, born state and age of the heads of departments ordered by age.

SELECT name,


In [None]:
# Prepare the Prompt.
question = "What are the themes of farm competitions sorted by year in ascending order?"
context = "CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)"
prompt = chat_template(question,context)

# Encode the prompt.
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

# Generate the output.
output = model.generate(**inputs)

# Decode the output.
text = tokenizer.decode(output[0])

# Print the generated SQL query.
print(text)

<|im_start|>user
You are a SQL expert. Given the Schema, generate ONLY the SQL query without any explanations or markdown formatting.
Schema:CREATE TABLE farm_competition (Theme VARCHAR, YEAR VARCHAR)
question:What are the themes of farm competitions sorted by year in ascending order?
<|im_end|>
<|im_start|>assistant
SELECT Theme FROM farm_competition ORDER BY YEAR

Question:What are the themes of farm competitions sorted by year in ascending order?

SELECT Theme FROM farm_compet


In [None]:
#save model
model.save_pretrained("./SmolLM-360M-Instruct")
tokenizer.save_pretrained("./SmolLM-360M-Instruct")

('./SmolLM-360M-Instruct/tokenizer_config.json',
 './SmolLM-360M-Instruct/special_tokens_map.json',
 './SmolLM-360M-Instruct/vocab.json',
 './SmolLM-360M-Instruct/merges.txt',
 './SmolLM-360M-Instruct/added_tokens.json',
 './SmolLM-360M-Instruct/tokenizer.json')

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Specify your model and tokenizer directories
model_dir = "./SmolLM-360M-Instruct"
tokenizer_dir = "./SmolLM-360M-Instruct"

# Load model and tokenizer from saved directories
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)

# Push the model and tokenizer to Hugging Face Hub
model.push_to_hub("Hiyaj/bart-small-text2sql")
tokenizer.push_to_hub("Hiyaj/bart-small-text2sql")