In [1]:
from transformers import pipeline
import torch 

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float32 if torch.cuda.is_available() else torch.float16
gemma = "google/gemma-2-2b-it"
nlp_model = pipeline("text-generation", model=gemma, model_kwargs={"torch_dtype": torch_dtype}, device=device)


`torch_dtype` is deprecated! Use `dtype` instead!
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


NATURAL LANGUAGE QUERYING

In [None]:
query = "display contact with foreign prefix numbers"
messages = [
    {"role": "user", "content": 
        f"""
        Context: You are a SQL generator. 
        Given the following database schema:
            Table: Messages
            Columns:
            - id (integer)
            - Address (text)
            - Date Sent (date)
            - Date Received (date)
            - Type (text)
            - Body (text)
            - Seen (boolean)
            
            Table: Contacts
            Columns:
            - id (integer)
            - name (text)
            - number (number)
            - email (text)
            
            Table: Call Logs
            Columns:
            - id (integer)
            - Owner (text)
            - Date Time (number)
            - Duration (number)
            - Type (text)
            
            Table: Files
            Columns:
            - path (text)
            - name (text)
            - parent (text)
            - size (number)
            - datetime (datetime)
            - ext (text) alias type
            
        Convert the following user question into a correct, safe SQL query. 
        Return only SQL, no explanations.
        Query: create a query to {query}
        """}
]

outputs = nlp_model(messages, max_new_tokens=128)

assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
print(assistant_response)


TEXT CLASSIFICATION

In [None]:
text = ""
candidate_labels = [
    "Normal", "Threat", "Identity Attack", "Sexually Explicit", "Toxicity", "Extremism", "Scam"
]

messages = [
    {
        "role": "user", "content": 
        f"""
        Text: {text}
        Query: classify the text into one of the following categories: {candidate_labels}
        """
    }
]

outputs = nlp_model(messages, max_new_tokens=128)
assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
print(assistant_response)

SUMMARIZATION

In [None]:
text = ""
messages = [
    {"role": "user", "content": 
        f"""
        {text}
        Query: Summarize this
        """}
]

outputs = nlp_model(messages, max_new_tokens=128)
print(outputs)
assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
print(assistant_response)

CONVERT TO NLP

In [None]:
import sqlite3

DB_LOCATION = "./data/"
id = "sms.db"

def run_query(query):
    if str(query).lower().startswith("select"):
        return
    conn = sqlite3.connect("./data/sms.db")
    cur = conn.cursor()
    output = cur.execute(
        query,
        []
    ).fetchall()
    return output

results = run_query(query)
print(results)

messages = [
    {
        "role": "user", 
        "content": 
        f"""
        Data: {results}
        Convert the following data in human readable format
        """
    }
]
try:
    outputs = nlp_model(messages, max_new_tokens=256)
    assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
    print(assistant_response)
    
except Exception as e:
    print(e)

TESTING

In [2]:
query = "create a sql query to list all audio files"
messages = [
    {"role": "user", "content": 
        f"""
        Context: You are a SQL generator. 
        Given the following database schema:
            Table: Messages
            Columns:
            - id (integer)
            - Address (text)
            - Date Sent (date)
            - Date Received (date)
            - Type (text)
            - Body (text)
            - Seen (boolean)
            
            Table: Contacts
            Columns:
            - id (integer)
            - name (text)
            - number (number)
            - email (text)
            
            Table: Call Logs
            Columns:
            - id (integer)
            - Owner (text)
            - Date Time (number)
            - Duration (number)
            - Type (text)
            
            Table: Files
            Columns:
            - path (text)
            - name (text)
            - parent (text)
            - size (number)
            - datetime (datetime)
            - ext (text) alias type
            
        Convert the following user question into a correct, safe SQL query. 
        Return only SQL, no explanations.
        Query: create a query to {query}
        """}
]

try:
    outputs = nlp_model(messages, max_new_tokens=256)
    assistant_response = outputs[0]["generated_text"][-1]["content"].strip()
    print(assistant_response)
    
except Exception as e:
    print(e)

```sql
SELECT * FROM Files WHERE ext ='mp3' OR ext = 'wav';
```
