In [1]:
from sqlvis import vis
import sqlite3

conn = sqlite3.connect('../../data/shopping.db')

In [2]:
# Retrieve the schema from the db connection
schema = vis.schema_from_conn(conn)
schema

# Missing: Shopping database diagram

## Demonstration: normal query
Give the prices of each purchase made by each customer, and for each of these specify the name of the customer that made that purchase.

In [7]:
query = '''
SELECT p.price, c.cName
FROM customer AS c, purchase AS p
WHERE c.cID = p.cID
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Demonstration 2
Give the total amount of money spent in stores for each individual day. Only include small purchases (less than five things bought), and only include days where many purchases were done (more than fifty purchases).

In [3]:
query = '''
SELECT p.date, SUM(p.price)
FROM purchase AS p
WHERE quantity < 5
GROUP BY p.date
HAVING COUNT(tID) > 50
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Demonstration: query with error
Give the total amount of money spent in stores for each individual day. Only include large scale purchases (more than ten things bought), and only include days where few purchases were done (less than five purchases).

In [4]:
query = '''
SELECT SUM(p.price)
FROM purchase AS p
WHERE p.quantity > 10
GROUP BY p.date
HAVING COUNT(p.pID) < 5
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 1
For each customer, give both their largest and smallest amounts of money they have spent on a single purchase.

Issues:
- cName is tagged in red, but the cell isn't highlighted or autoexpanded and there is no tooltip?
    - Update: I implemented selection object handling, and now it's not even tagged in red anymore... not better.

In [5]:
query = '''
SELECT MAX(p.price), MIN(p.price), cName
WHERE c.cID = p.cID
FROM customer AS c, purchase AS p
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 2
On a per-city and per-day basis, report per customer what their smallest and largest purchases on that day were.

Issues here:
- Where in the SELECT the error occurs defines whether it is marked as a problem or not?
- In- or excluding the `p.` changes whether it is visualized (may be because vis doesn't know which table to attach the error to? But the highlight is still made... huh?)

In [8]:
# Commented versions are different error variations, for debug reasons
query = '''
SELECT MAX(p.price), date, MIN(p.price), cName
FROM customer AS c, purchase AS p
WHERE c.cID = p.cID
GROUP BY cName, c.city
'''

# query = '''
# SELECT MAX(p.price), MIN(p.price), cName, date
# FROM customer AS c, purchase AS p
# WHERE c.cID = p.cID
# GROUP BY cName, c.city
# '''

# query = '''
# SELECT p.date, MAX(p.price), MIN(p.price), cName
# FROM customer AS c, purchase AS p
# WHERE c.cID = p.cID
# GROUP BY cName, c.city
# '''

# query = '''
# SELECT MAX(p.price), MIN(p.price), cName, p.date
# FROM customer AS c, purchase AS p
# WHERE c.cID = p.cID
# GROUP BY cName, c.city
# '''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 3
What is the maximum and minimum amount spent in a single purchase by each customers living in a city that has at least one store? Consider only customers who have made purchases with a quantity greater than 5.

Issues here:
- GROUP BY mistake not highlighting node, also no tooltip (this error will also disappear when the one below is fixed)
- WHERE should be late instead of GROUP BY early (@root level)
- Side error: referencing error in subquery is not detected? (try making it s.cit) May be related to me breaking selections though

In [12]:
query = '''
SELECT MAX(p.price), MIN(p.price), cName
FROM customer AS c, purchase AS p
GROUP BY cName
HAVING p.quantity > 5
WHERE c.cID = p.cID
AND c.city IN (FROM store AS s
               SELECT s.city)
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 4
Find each customer's number of items purchased per store. Only consider cases where the customer lives in the same city where the store is they purchased the items at.

Issues:
- Tooltip boxes are too small, that's the only issue really.

In [12]:
query = '''
SELECT s.sName, c.cName, SUM(p.quantity)
FROM customer AS c, purchase AS p, store AS s
WHERE c.cID = p.cID
WHERE s.city = c.city
WHERE s.sID = p.sID
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 5
Find the total quantity of products purchased per store in Eindhoven, by customers from Eindhoven who made fewer than 10 purchases (transactions) in that store.

Issues:
- The WHERE with aggregation triggers changing the entire WHERE to HAVING, which breaks more things.
- Late keyword detection is triggered incorrectly (I thought that wasn't even reachable?), why? And also, why does that crash this query?

In [14]:
query = '''
SELECT s.sName, c.cName, SUM(p.quantity)
FROM customer AS c, purchase AS p, store AS s
WHERE c.cID = p.cID
AND s.city = c.city
AND COUNT(p.pID) < 10
GROUP BY c.cName, s.sName
HAVING s.sName IN (FROM store AS s2
                   SELECT s2.sName
                   WHERE s2.city = 'Eindhoven'
                  )
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 6
Which store names and corresponding purchase dates are associated with purchases made from stores that satisfy the following conditions:
- The store's name must contain the letter 'e'.
- There must be at least two other stores with the letter 'e' in the same city.
- The store must be in a city where at least 20 registered customers live. These customers do not need to ever have bought anything at this particular store.

Issues:
- How is the parsing crashing?????
- Late keyword detection is triggered, and once again crashes the system on insertErrorInfo. How?????

In [15]:
query = '''
WITH confusion AS (
    FROM store AS s1
    WHERE s1.sName LIKE '%e%'
    AND COUNT(s1.sID) > 2
    GROUP BY s1.city
    HAVING s1.city IN (FROM customer AS c1
                       GROUP BY c1.city
                       HAVING COUNT(c1.cID) > 20
                       SELECT c1.city
                      )
    SELECT s1.sName)

FROM confusion AS co, purchase AS pur, store as s2
WHERE s2.sName = co.sName
AND s2.sID = pur.sID
SELECT co.sName, pur.date
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 7
We want to know whether customers from a particular city spend more or less money at our stores. What are the maximum and minimum amounts of money spent in a single purchase, grouped on the city where the customers come from? Only include customer cities from where the inhabitants buy more than 5 products at once on average and at least 2 items at once in a purchase. For some reason, management also wants the customers to specifically come from addresses with 'straat' in the name.

Issues:
- Double HAVING issue is not being shown at all.
- Because of it being double HAVING, the `cName LIKE '%a%'` is not visualized?
- The `cName LIKE '%a%'`, when visualized, is shown as a WHERE component (due to it not containing aggregation).
- city is made orange as if it is being selected... it is not. That is the entire problem for incorr grouping.

In [19]:
query = '''
SELECT MAX(p.price), MIN(p.price)
FROM customer AS c, purchase AS p
WHERE c.cID = p.cID
GROUP BY c.city
HAVING AVG(p.quantity) > 5
HAVING MIN(p.quantity) > 2
HAVING c.street LIKE '%straat%'
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 8
Find the largest and smallest amounts of money spent in a purchase per day.

Issues: no issues, working as intended

In [20]:
query = '''
SELECT MAX(p.price), MIN(p.price), p.date
FROM purchase AS p
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 9
Which products have been purchased by customers in each city, and what is the average quantity of those products for each city?

Issues:
- p.pName is not grouped on, but not visualized as an error
- pur.pID is highlighted as selected but isn't, that's the problem... so don't show it as being selected?

In [27]:
query = '''
SELECT c.city, p.pName, AVG(pur.quantity) AS average_quantity
FROM customer AS c, purchase AS pur, product as p
WHERE c.cID = pur.cID
WHERE p.pID = pur.pID
GROUP BY c.city, pur.pID
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)

## Problem 10
For each store, find each individual customer's records for their largest and smallest amounts spent in a single purchase.

Issues:
- (`c.`)`cName` is not being marked as insufficient aggregation.
- s.sName is not being marked as insufficient aggregation.

In [10]:
query = '''
SELECT MAX(p.price), MIN(p.price), cName, sName
FROM customer AS c, purchase AS p, store AS s
WHERE c.cID = p.cID
AND s.sID = p.sID
'''

# Generate the visualization.
vis.visualize(query, schema)

cur = conn.cursor()
res = cur.execute(query)
output = res.fetchall()
print('Query output:\n', output)