In [1]:
import sqlite3
import pandas as pd
import urllib.request

# Download correct Chinook database from official repo
url = "https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
urllib.request.urlretrieve(url, "Chinook_Sqlite.sqlite")

# Connect to it
conn = sqlite3.connect("Chinook_Sqlite.sqlite")

# View all tables
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print("✅ Tables found:")
print(tables)


✅ Tables found:
             name
0           Album
1          Artist
2        Customer
3        Employee
4           Genre
5         Invoice
6     InvoiceLine
7       MediaType
8        Playlist
9   PlaylistTrack
10          Track


**Filtering + Aggregation**

In [2]:
query = '''
SELECT BillingCountry, COUNT(*) AS InvoiceCount, AVG(Total) AS AvgSpending
FROM Invoice
WHERE Total > 10
GROUP BY BillingCountry
ORDER BY AvgSpending DESC
LIMIT 10;
'''
pd.read_sql_query(query, conn)


Unnamed: 0,BillingCountry,InvoiceCount,AvgSpending
0,Ireland,1,21.86
1,Hungary,1,21.86
2,Czech Republic,2,21.36
3,Austria,1,18.86
4,Chile,2,15.885
5,Norway,1,15.86
6,USA,15,14.668667
7,France,5,14.46
8,Germany,5,14.07
9,United Kingdom,3,13.86


**Subquery**

In [3]:
query = '''
SELECT FirstName, LastName
FROM Customer
WHERE CustomerId IN (
    SELECT CustomerId FROM Invoice WHERE Total > 15
);
'''
pd.read_sql_query(query, conn)


Unnamed: 0,FirstName,LastName
0,Bjørn,Hansen
1,František,Wichterlová
2,Helena,Holý
3,Astrid,Gruber
4,Frank,Ralston
5,Victor,Stevens
6,Richard,Cunningham
7,Isabelle,Mercier
8,Ladislav,Kovács
9,Hugh,O'Reilly


Analyze in **Pandas**

In [4]:
df = pd.read_sql_query("SELECT * FROM Invoice WHERE Total > 15", conn)
df.groupby("BillingCountry")["Total"].mean().sort_values(ascending=False)


Unnamed: 0_level_0,Total
BillingCountry,Unnamed: 1_level_1
Ireland,21.86
Hungary,21.86
Czech Republic,21.36
USA,19.526667
Austria,18.86
Chile,17.91
France,16.86
Norway,15.86


Simulate SQL JOIN in **Pandas**

In [5]:
customers = pd.read_sql_query("SELECT * FROM Customer", conn)
invoices = pd.read_sql_query("SELECT * FROM Invoice", conn)

merged = pd.merge(customers, invoices, on="CustomerId", how="inner")
merged[['FirstName', 'LastName', 'BillingCountry', 'Total']].head()


Unnamed: 0,FirstName,LastName,BillingCountry,Total
0,Luís,Gonçalves,Brazil,3.98
1,Luís,Gonçalves,Brazil,3.96
2,Luís,Gonçalves,Brazil,5.94
3,Luís,Gonçalves,Brazil,0.99
4,Luís,Gonçalves,Brazil,1.98
