<a href="https://colab.research.google.com/github/sagargowda88/Class-imbalanceness-in-ML/blob/master/Text_To_SQL_Query_Helper_Tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Hugging Face Transformers is an open-source framework for deep learning created by Hugging Face.
# It provides APIs and tools to download state-of-the-art pre-trained models and further tune them to maximize performance.
# These models support common tasks in different modalities, such as natural language processing, computer vision, audio, and multi-modal applications.
# Using pretrained models can reduce your compute costs, carbon footprint,
# and save you the time and resources required to train a model from scratch.

# https://huggingface.co/docs/transformers/index
# https://huggingface.co/docs/hub/index

# Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup,
# whether it is multiple GPU's on one machine or multiple GPU's across several machines.

!pip install -q transformers langchain huggingface_hub accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m62.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m101.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m123.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m95.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# we need to login to Hugging Face to have access to their inference API.
# This step requires a free Hugging Face token.

from huggingface_hub import login
login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
# This class provides functionality related to Hugging Face Transformers pipelines .
from langchain import HuggingFacePipeline

# This line imports the AutoTokenizer class from the transformers library.
# The AutoTokenizer class is used to load tokenizers for various pre-trained language models available in the Hugging Face model hub.
from transformers import AutoTokenizer

# This line imports the entire transformers library, which is a popular library developed by
# Hugging Face for working with various transformer-based models in natural language processing (NLP),
# including both models and tokenizers.
import transformers

# This line imports the torch library, which is the primary library used for deep learning and tensor computations in PyTorch.
import torch

# Model name that we want to use
# https://huggingface.co/meta-llama/Llama-2-7b-chat-hf

model = "meta-llama/Llama-2-7b-chat-hf"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model)

# Set up text generation pipeline
pipeline = transformers.pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id,
                )

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [5]:
# 'HuggingFacePipeline' class creates a custom pipeline for text generation, and we are passing
# the pipeline that we defined earlier along with some model-specific keyword arguments - temperature here.

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

In [6]:
from langchain import PromptTemplate,  LLMChain

template = """
             Create a SQL query snippet using the below text:
              ```{text}```
              Just SQL query:
           """

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

text = """ Extract all the unique values from column "age"
"""



In [7]:
print(llm_chain.run(text))

 ```
SELECT DISTINCT age FROM customers;
```
Explanation:

The `SELECT DISTINCT` clause is used to select only the unique values from a column. In this case, we are selecting the `age` column from the `customers` table. The `DISTINCT` keyword tells MySQL to return only the unique values from the `age` column, without duplicates.

Note: The `SELECT DISTINCT` clause can be used with any column(s) in a table, not just the `age` column.

Also, you can use `GROUP BY` clause with `SELECT DISTINCT` to group the data by a column and then select the unique values.

For example:
```
SELECT DISTINCT age, COUNT(*) FROM customers GROUP BY age;
```
This will give you the count of customers in each age group.

You can also use `UNION` operator to combine the results of two or more SELECT statements that have the same number of columns and data types.
```
SELECT DISTINCT age FROM customers UNION SELECT DISTINCT age FROM orders;
```
This will give you the unique values from both `customers` and `orders