## SQL & Data Integration

## Task 1: Advanced Filtering and Aggregation

### Basic Aggregation with WHERE + GROUP BY

In [39]:
# Setup and Import Libraries
import sqlite3

# Connect to Chinook database
conn = sqlite3.connect("chinook.db")

# Create a cursor to execute SQL commands
cur = conn.cursor()

# Execute SQL query with filtering, aggregation, and GROUP BY
cur.execute("""
SELECT BillingCountry AS Region,
       COUNT(*) AS NumInvoices,
       AVG(Total) AS AvgSales
FROM invoices
WHERE Total > 5 AND BillingCountry != 'USA'
GROUP BY BillingCountry
ORDER BY AvgSales DESC;
""")

# Fetch all results
results = cur.fetchall()

# Display results
for row in results:
    print(row)

('Chile', 3, 12.57)
('Ireland', 3, 12.236666666666666)
('Hungary', 3, 12.236666666666666)
('Czech Republic', 6, 12.07)
('Austria', 3, 11.236666666666666)
('Netherlands', 3, 10.57)
('Norway', 3, 10.236666666666666)
('Germany', 12, 10.07)
('Sweden', 3, 9.903333333333334)
('Portugal', 6, 9.903333333333334)
('France', 15, 9.770000000000001)
('Canada', 24, 9.611666666666666)
('United Kingdom', 9, 9.57)
('Spain', 3, 9.57)
('Poland', 3, 9.57)
('Italy', 3, 9.57)
('India', 6, 9.57)
('Denmark', 3, 9.57)
('Brazil', 15, 9.57)
('Belgium', 3, 9.57)
('Australia', 3, 9.57)
('Argentina', 3, 9.57)
('Finland', 4, 9.1675)


## Task 2: Subqueries and Nesting

### Query with Subquery in WHERE clause

In [57]:
# Execute subquery
cur.execute("""
SELECT FirstName || ' ' || LastName AS Name
FROM customers
WHERE CustomerId IN (
    SELECT CustomerId
    FROM invoices
    WHERE Total > 15
);
""")

# Fetch and display results
results = cur.fetchall()
for row in results:
    print(row)

('Bjørn Hansen',)
('František Wichterlová',)
('Helena Holý',)
('Astrid Gruber',)
('Frank Ralston',)
('Victor Stevens',)
('Richard Cunningham',)
('Isabelle Mercier',)
('Ladislav Kovács',)
("Hugh O'Reilly",)
('Luis Rojas',)


## Task 3: Import SQL Results into Pandas

### Setup and Import Libraries

In [49]:
# Import libraries
import sqlite3
import pandas as pd

# Step 2: Connect to the Chinook database
conn = sqlite3.connect('chinook.db')

### Query the Database

#### Since Chinook doesn't have an orders table with amount, we’ll use invoices and filter by Total > 15:

In [47]:
# Run SQL and load into DataFrame
df = pd.read_sql_query("""
SELECT * FROM invoices
WHERE Total > 15;
""", conn)

### Explore the DataFrame

In [31]:
# Preview the first few rows
df.head()

Unnamed: 0,InvoiceId,CustomerId,InvoiceDate,BillingAddress,BillingCity,BillingState,BillingCountry,BillingPostalCode,Total
0,88,57,2010-01-13 00:00:00,"Calle Lira, 198",Santiago,,Chile,,17.91
1,89,7,2010-01-18 00:00:00,"Rotenturmstraße 4, 1010 Innere Stadt",Vienne,,Austria,1010,18.86
2,96,45,2010-02-18 00:00:00,Erzsébet krt. 58.,Budapest,,Hungary,H-1073,21.86
3,103,24,2010-03-21 00:00:00,162 E Superior Street,Chicago,IL,USA,60611,15.86
4,194,46,2011-04-28 00:00:00,3 Chatham Street,Dublin,Dublin,Ireland,,21.86


In [37]:
# Get summary statistics
df.describe()

Unnamed: 0,InvoiceId,CustomerId,Total
count,11.0,11.0,11.0
mean,209.181818,26.181818,19.500909
std,109.400931,19.239637,3.382072
min,88.0,4.0,15.86
25%,99.5,6.5,16.86
50%,201.0,25.0,18.86
75%,302.5,44.0,21.86
max,404.0,57.0,25.86


## Task 4: Modify SQL Data

### 1. UPDATE Example

In [63]:
# Update example: change country to 'India' where it is NULL
cur.execute("""
UPDATE customers
SET Country = 'India'
WHERE Country IS NULL;
""")

# Commit the change
conn.commit()

# Confirm update
cur.execute("SELECT * FROM customers WHERE Country = 'India';")
results = cur.fetchall()
for row in results:
    print(row)

(58, 'Manoj', 'Pareek', None, '12,Community Centre', 'Delhi', None, 'India', '110017', '+91 0124 39883988', None, 'manoj.pareek@rediff.com', 3)
(59, 'Puja', 'Srivastava', None, '3,Raj Bhavan Road', 'Bangalore', None, 'India', '560001', '+91 080 22289999', None, 'puja_srivastava@yahoo.in', 3)


### 2. DELETE Example

In [66]:
# DELETE example: delete a customer with ID = 60
cur.execute("DELETE FROM customers WHERE CustomerId = 60;")
conn.commit()

# Confirm deletion
cur.execute("SELECT * FROM customers WHERE CustomerId = 60;")
result = cur.fetchall()
print("After DELETE:", result)

After DELETE: []


## Task 5: Merge in Pandas (SQL-style)

### 1. Load DataFrames

In [69]:
# Load both tables into pandas DataFrames
customers_df = pd.read_sql_query("SELECT * FROM customers;", conn)
invoices_df = pd.read_sql_query("SELECT * FROM invoices;", conn)

### 2. Perform Inner JOIN using pd.merge()

In [73]:
# Simulate SQL INNER JOIN on CustomerId
merged_df = pd.merge(customers_df, invoices_df, on='CustomerId', how='inner')

# Preview the result
merged_df.head()

Unnamed: 0,CustomerId,FirstName,LastName,Company,Address,City,State,Country,PostalCode,Phone,...,Email,SupportRepId,InvoiceId,InvoiceDate,BillingAddress,BillingCity,BillingState,BillingCountry,BillingPostalCode,Total
0,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,...,luisg@embraer.com.br,3,98,2010-03-11 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,3.98
1,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,...,luisg@embraer.com.br,3,121,2010-06-13 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,3.96
2,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,...,luisg@embraer.com.br,3,143,2010-09-15 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,5.94
3,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,...,luisg@embraer.com.br,3,195,2011-05-06 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,0.99
4,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,...,luisg@embraer.com.br,3,316,2012-10-27 00:00:00,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,1.98


### 3. Equivalent SQL JOIN (for comparison)

In [76]:
sql_query = """
SELECT c.FirstName, c.LastName, i.InvoiceId, i.Total
FROM customers c
JOIN invoices i ON c.CustomerId = i.CustomerId;
"""

sql_join_df = pd.read_sql_query(sql_query, conn)
sql_join_df.head()

Unnamed: 0,FirstName,LastName,InvoiceId,Total
0,Luís,Gonçalves,98,3.98
1,Luís,Gonçalves,121,3.96
2,Luís,Gonçalves,143,5.94
3,Luís,Gonçalves,195,0.99
4,Luís,Gonçalves,316,1.98
