#### Relevant Imports

In [42]:
from sqlalchemy import create_engine, text
import json
from collections import defaultdict
from typing import List, Dict, Any
import csv

**SQLite DB**  
Let us connect to the SQLite Sample Database what we have  
Note the difference in terms of connection

In [2]:
# There is an engine instance created, which can handle multiple connetions
sql_engine = create_engine("sqlite:///Sample_1 - Copy.db")
conn_1 = sql_engine.connect ()

**Output in a Narrative format**  
Data output from query based on the columns being fetched can be concatenated by natural language to form sentences  
This gives a ability to be passed directlty as context

In [59]:
# Pull out last 10 purchases made with customer details 
result = conn_1.execute (text("""
                                SELECT c.FirstName, c.LastName, i.InvoiceId, i.InvoiceDate, i.Total
                                FROM Customer c
                                JOIN Invoice i ON c.CustomerId = i.CustomerId
                                ORDER BY i.InvoiceDate DESC
                                LIMIT 10; """))

rows = result.fetchall()

# Make each row into a sentence
sentences = []
for row in rows:
    first, last, invoice_id, date, total = row
    sentence = f"{first} {last} made a purchse via invoice (#{invoice_id}) on {date} for a total of {total:.2f}."
    sentences.append(sentence)

for s in sentences:
    print(s)  


Manoj Pareek made a purchse via invoice (#412) on 2011-12-22 00:00:00 for a total of 1.99.
Terhi Hämäläinen made a purchse via invoice (#411) on 2011-12-14 00:00:00 for a total of 13.86.
Madalena Sampaio made a purchse via invoice (#410) on 2011-12-09 00:00:00 for a total of 8.91.
Robert Brown made a purchse via invoice (#409) on 2011-12-06 00:00:00 for a total of 5.94.
Victor Stevens made a purchse via invoice (#408) on 2011-12-05 00:00:00 for a total of 3.96.
Kathy Chase made a purchse via invoice (#406) on 2011-12-04 00:00:00 for a total of 1.98.
John Gordon made a purchse via invoice (#407) on 2011-12-04 00:00:00 for a total of 1.98.
Dan Miller made a purchse via invoice (#405) on 2011-11-21 00:00:00 for a total of 0.99.
Helena Holý made a purchse via invoice (#404) on 2011-11-13 00:00:00 for a total of 25.86.
Diego Gutiérrez made a purchse via invoice (#403) on 2011-11-08 00:00:00 for a total of 8.91.


**Output in a CSV format**  
Data output from query based on the columns being fetched can be made into CSV format by delimiter  
CSV is one of the structure format of providing input to LLM

In [None]:
#Top Customers by Spending on Rock Genre
result = conn_1.execute (text("""
                                SELECT CU.CustomerId, CU.FirstName, CU.LastName, SUM(IL.UnitPrice * IL.Quantity) AS RockTotalSpent
                                FROM Customer CU
                                JOIN Invoice IV ON CU.CustomerId = IV.CustomerId
                                JOIN InvoiceLine IL ON IV.InvoiceId = IL.InvoiceId
                                JOIN Track TR ON IL.TrackId = TR.TrackId
                                JOIN Genre GE ON TR.GenreId = GE.GenreId
                                WHERE GE.Name = 'Rock'
                                GROUP BY CU.CustomerId
                                ORDER BY RockTotalSpent DESC
                                LIMIT 10"""))

rows = result.fetchall()

columns = [desc[0] for desc in conn_1.description]

# Write to CSV
with open("top_rock_customers.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(columns)
    writer.writerows(rows)

**Data into JSON**

The output data from query is being converted into a JSON data.  
This can be stored as JSON file as well.

In [None]:
# Query data for Genere wise total sales from the invoice data
result = conn_1.execute(text("SELECT GN.Name AS Genre, SUM(INV.UnitPrice * INV.Quantity) AS TotalSales\
                                FROM InvoiceLine INV\
                                JOIN Track TR ON INV.TrackId = TR.TrackId\
                                JOIN Genre GN ON TR.GenreId = GN.GenreId\
                                GROUP BY GN.Name ORDER BY TotalSales DESC;"))
# Query output into JSON
rows = [row._asdict () for row in result]

# Convert to JSON
json_output = json.dumps(rows,  indent=2)
print(json_output)



**Multi-level JSON**
If there are complex data to be fetched, the SQL query will eventually return as a table by nature  
This can be be convered to required level of nesting in JSON

> This function does the re-alignment of the output from table format to a specific level of nesting in JSON  
> Practically the complex data can make better meaning with correct JSON nesting

In [50]:
def group_json_by_n_columns(data: List[Dict[str, Any]], n: int) -> Dict:
    
    if not data:
        return {}

    # If n is 1, there is no need for re-aligning
    if n < 1:
        raise ValueError("n must be at least 1")

    # Check if those many columns are there
    column_keys = list(data[0].keys())
    if n > len(column_keys):
        raise ValueError(f"n = {n} exceeds number of columns = {len(column_keys)}")

    group_keys = column_keys[:n]

    def nest(rows: List[Dict[str, Any]], keys: List[str]) -> Dict:
        if not keys:
            return rows

        result = defaultdict(list)
        current_key = keys[0]

        for row in rows:
            group_value = row[current_key]
            remaining_row = {k: v for k, v in row.items() if k != current_key}
            result[group_value].append(remaining_row)

        # Recurse for each group
        return {
            k: nest(v, keys[1:]) for k, v in result.items()
        }

    return nest(data, group_keys)

>For each Album list the tracks and track wise sales

In [None]:
# Query makes the information as 2D output (always table)
data = conn_1.execute (text("""
                                SELECT 
                                    ALB.Title AS AlbumTitle, TRK.TrackId, TRK.Name AS TrackName, il.InvoiceId, il.UnitPrice, il.Quantity, (il.UnitPrice * il.Quantity) AS LineTotal
                                FROM Album ALB
                                JOIN Track TRK ON ALB.AlbumId = TRK.AlbumId
                                LEFT JOIN InvoiceLine il ON TRK.TrackId = il.TrackId
                                ORDER BY ALB.Title, TRK.Name, il.InvoiceId;
                                """))


# Re-align the JSON level
rows = [row._asdict () for row in data]
result = group_json_by_n_columns(rows, 1)
print(json.dumps(result, indent=2))


> Fetch Genere wise Tracks the sales

In [None]:
# For each Album list the tracks and track wise sales
# Query makes the information as 2D output (always table)
data = conn_1.execute (text("""
                                SELECT GN.Name AS GenreName, AR.Name AS ArtistName, TR.Name AS TrackName, TR.UnitPrice
                                FROM Genre GN
                                JOIN Track TR ON GN.GenreId = TR.GenreId
                                JOIN Album al ON TR.AlbumId = al.AlbumId
                                JOIN Artist AR ON al.ArtistId = AR.ArtistId
                                ORDER BY GN.Name, AR.Name, TR.Name;
                            """))


# Re-align the JSON level
rows = [row._asdict () for row in data]
result = group_json_by_n_columns(rows, 1)
print(json.dumps(result, indent=2))

# Write output to a file
with open ('Output.json', 'w') as File:
    json.dump (result, File, indent=2)

In [None]:
# Consolidated information from 3 tables
result = conn_1.execute (text(
                                "SELECT fn.rfam_id, fn.ncbi_id, t.species, f.rfam_id AS family_rfam_id, f.auto_wiki, f.description\
                                FROM family_ncbi fn\
                                JOIN taxonomy t ON fn.ncbi_id = t.ncbi_id\
                                JOIN family f ON fn.rfam_acc = f.rfam_acc\
                                WHERE fn.rfam_acc = 'RF01530'"))

rows = result.fetchall ()
print (rows)
  