# SQL Practice Problems using sqlite + Pandas + Jupyter notebooks and Northwinds Database 

###### DB: https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/sql/linq/downloading-sample-databases

In [1]:
import sqlite3

# Create a SQL connection to our SQLite database
con = sqlite3.connect("Northwind_large.sqlite")

In [2]:
# check available tables 

cur = con.cursor()
cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
available_table=(cur.fetchall())
available_table

[('Category',),
 ('Customer',),
 ('CustomerCustomerDemo',),
 ('CustomerDemographic',),
 ('Employee',),
 ('EmployeeTerritory',),
 ('Order',),
 ('OrderDetail',),
 ('Product',),
 ('Region',),
 ('Shipper',),
 ('Supplier',),
 ('Territory',)]

In [3]:
# Excerceise 21 Total number of customers per Country and City

import pandas as pd
Q = 'SELECT Country,City,count(*) from Customer group by 1,2 order by 3 desc;'
pd.read_sql( Q , con)


Unnamed: 0,Country,City,count(*)
0,UK,London,6
1,Mexico,México D.F.,5
2,Brazil,Sao Paulo,4
3,Argentina,Buenos Aires,3
4,Brazil,Rio de Janeiro,3
...,...,...,...
64,USA,Walla Walla,1
65,Venezuela,Barquisimeto,1
66,Venezuela,Caracas,1
67,Venezuela,I. de Margarita,1


In [4]:
# Excercise 22 Products that need reordering

Q = 'SELECT * from Product  where UnitsInStock + UnitsOnOrder < ReorderLevel and Discontinued = 0 order by Id;'
pd.read_sql( Q , con)

Unnamed: 0,Id,ProductName,SupplierId,CategoryId,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued
0,30,Nord-Ost Matjeshering,13,8,10 - 200 g glasses,25.89,10,0,15,0
1,70,Outback Lager,7,1,24 - 355 ml bottles,15.0,15,10,30,0


In [5]:
# Excercise 24 Customers sorted by region and ID
Q = 'SELECT * from Customer order by Region, Id;'
pd.read_sql( Q , con)


Unnamed: 0,Id,CompanyName,ContactName,ContactTitle,Address,City,Region,PostalCode,Country,Phone,Fax
0,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,British Isles,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
1,BSBEV,B's Beverages,Victoria Ashworth,Sales Representative,Fauntleroy Circus,London,British Isles,EC2 5NT,UK,(171) 555-1212,
2,CONSH,Consolidated Holdings,Elizabeth Brown,Sales Representative,Berkeley Gardens 12 Brewery,London,British Isles,WX1 6LT,UK,(171) 555-2282,(171) 555-9199
3,EASTC,Eastern Connection,Ann Devon,Sales Agent,35 King George,London,British Isles,WX3 6FW,UK,(171) 555-0297,(171) 555-3373
4,HUNGO,Hungry Owl All-Night Grocers,Patricia McKenna,Sales Associate,8 Johnstown Road,Cork,British Isles,,Ireland,2967 542,2967 3333
...,...,...,...,...,...,...,...,...,...,...,...
86,SUPRD,Suprêmes délices,Pascale Cartrain,Accounting Manager,"Boulevard Tirou, 255",Charleroi,Western Europe,B-6000,Belgium,(071) 23 67 22 20,(071) 23 67 22 21
87,TOMSP,Toms Spezialitäten,Karin Josephs,Marketing Manager,Luisenstr. 48,Münster,Western Europe,44087,Germany,0251-031259,0251-035695
88,VICTE,Victuailles en stock,Mary Saveley,Sales Agent,"2, rue du Commerce",Lyon,Western Europe,69004,France,78.32.54.86,78.32.54.87
89,VINET,Vins et alcools Chevalier,Paul Henriot,Accounting Manager,59 rue de l'Abbaye,Reims,Western Europe,51100,France,26.47.15.10,26.47.15.11


In [None]:
# if there are nulls, you need to createa secondary column and sort

Select
CustomerID
,CompanyName
,Region
,Case
when Region is null then 1
else 0
End
From Customers

In [7]:
# Excercise 25: Three ship countries with the highest freight -- always check for 4th as well could be 4th country has same as 3rd
Q = 'SELECT ShipCountry, avg(Freight) from "Order" group by 1 order by 2 desc limit 3; '
pd.read_sql( Q , con)


Unnamed: 0,ShipCountry,avg(Freight)
0,Poland,261.145588
1,Finland,257.960979
2,Mexico,254.20747


In [18]:
# Excercise 26: Three ship countries with the highest freight in 2015
Q = 'SELECT ShipCountry, avg(Freight) from "Order" where strftime("%Y", ShippedDate) = "2015"  group by 1 order by 2 desc limit 3; '
pd.read_sql( Q , con)

Unnamed: 0,ShipCountry,avg(Freight)
0,Norway,297.414773
1,Finland,274.864362
2,Sweden,265.247573


In [49]:
# this is wrong, and we will see why
Q = 'Select  ShipCountry , avg(Freight) From "Order" Where OrderDate between "2015-01-01" and "2015-12-31" Group By ShipCountry Order By 2 desc limit 3;'
pd.read_sql( Q , con)

Unnamed: 0,ShipCountry,avg(Freight)
0,Norway,293.255682
1,Finland,274.966146
2,Switzerland,268.136842


In [42]:
Q = 'select max(OrderDate) from "order"  Where OrderDate between "2015-01-01" and "2015-12-31" '
pd.read_sql( Q , con)

Unnamed: 0,max(OrderDate)
0,2015-12-30 23:58:05


In [48]:
Q = 'select Id,OrderDate from "order"  Where strftime("%Y", OrderDate) = "2015"  order by OrderDate desc limit 3'
pd.read_sql( Q , con)

Unnamed: 0,Id,OrderDate
0,26589,2015-12-31 21:31:45
1,20595,2015-12-31 21:10:55
2,21058,2015-12-31 19:59:11


In [45]:
Q = 'select max(OrderDate) from "order"  Where OrderDate >= "2015-01-01" and OrderDate < "2016-01-01" '
pd.read_sql( Q , con)

Unnamed: 0,max(OrderDate)
0,2015-12-31 21:31:45


In [59]:
# 28 highest freight charges last 1 year

Q = 'SELECT ShipCountry, avg(Freight) from "Order" where  OrderDate <= (select max(OrderDate) from "Order") and OrderDate > (SELECT date((select max(OrderDate) from "Order"),"-1 year")) group by 1 order by 2 desc limit 3;'


pd.read_sql( Q , con)

Unnamed: 0,ShipCountry,avg(Freight)
0,Finland,279.398352
1,Switzerland,275.240964
2,Venezuela,269.887681


In [21]:
# Excercise 29: Employees, ProductName, Quantity

Q = 'SELECT a.Id as OrderID, b.quantity, c.ProductName, e.LastName,e.Id as EmployeeID from "Order" a  join OrderDetail b on b.OrderId = a.Id join Product c on c.Id = b.ProductId join Employee e on a.EmployeeId = e.Id'
pd.read_sql( Q , con)

Unnamed: 0,OrderID,Quantity,ProductName,LastName,EmployeeID
0,10248,12,Queso Cabrales,Buchanan,5
1,10248,10,Singaporean Hokkien Fried Mee,Buchanan,5
2,10248,5,Mozzarella di Giovanni,Buchanan,5
3,10249,9,Tofu,Suyama,6
4,10249,40,Manjimup Dried Apples,Suyama,6
...,...,...,...,...,...
621878,27065,20,Gustaf's Knäckebröd,Davolio,1
621879,27065,11,Original Frankfurter grüne Soße,Davolio,1
621880,27065,45,Alice Mutton,Davolio,1
621881,27065,7,Grandma's Boysenberry Spread,Davolio,1


In [32]:
# Excercise 30: Customers with orders but no records in Customer table


Q = 'select count(distinct(CustomerId)) from "Order"  Where CustomerId not in (select Id from "Customer" )'
pd.read_sql( Q , con)


Unnamed: 0,count(distinct(CustomerId))
0,4
