## Week 1 - Data with Danny

In [None]:
#| echo: false

from pathlib import Path

from IPython.display import Markdown, display
from sql_8week_danny.sql_engine import DuckDBEngine

In [None]:
#| echo: false

SQL_DIR = Path.cwd().parent / "sql"
DATA_DIR = Path.cwd().parent / "data"

TABLES_SQL = SQL_DIR / "week1.sql"
DUCK_DB = DATA_DIR / "week1_duckdb.db"

In [None]:
#| echo: false

def create_database(persist=True):
    if persist and DUCK_DB:
        db = DuckDBEngine(
            str(DUCK_DB), rm_db=True
        )  # rm the db if it exists to avoid table creation errors
    else:
        db = DuckDBEngine()
    sql = db.execute_sql_file(TABLES_SQL)  # create the tables
    return db, sql

In [None]:
db, sql = create_database(persist=True)

In [None]:
db.table_names

In [None]:
#| label: display_tables_sql
#| echo: false

display(Markdown(f"```\n{sql}"))

In [None]:
tables_df = db.load_tables_to_df()

#### Table information

In [None]:
df = db.display_all_table_info(display_tables=True)

In [None]:
for table in db.table_names:
    display(Markdown(f"# {table}"))
    display(df[table].head())

In [None]:
db.query(
    "SELECT COUNT(*) FROM members"
)


In [None]:
# db.close()  # required to remove the DB file lock (.wal)

#### Case Study Questions

Each of the following case study questions can be answered using a single SQL statement:


##### What is the total amount each customer spent at the restaurant?

In [None]:
db.query(
    "SELECT customer_id, SUM(price) FROM sales as s JOIN menu as m ON s.product_id = m.product_id GROUP BY customer_id"
)

##### How many days has each customer visited the restaurant?

In [None]:
db.query(
    "SELECT customer_id, COUNT(DISTINCT order_date) FROM sales GROUP BY customer_id"
)

##### What was the first item from the menu purchased by each customer?

In [None]:
db.query(
    "SELECT customer_id, MIN(order_date) FROM sales GROUP BY customer_id"
)

In [None]:
db.query(
    "SELECT customer_id, product_id FROM sales WHERE order_date = '2021-01-01' GROUP BY customer_id, product_id"
)

##### What is the most purchased item on the menu and how many times was it purchased by all customers?

In [None]:
db.query(
    "SELECT product_id, COUNT(product_id) FROM sales GROUP BY product_id ORDER BY COUNT(product_id) DESC"
)

##### Which item was the most popular for each customer?

In [None]:
db.query(
    "SELECT customer_id, product_id, COUNT(product_id) FROM sales GROUP BY customer_id, product_id ORDER BY COUNT(product_id) DESC"
)

##### Which item was purchased first by the customer after they became a member?

In [None]:
db.query(
    """
    SELECT * FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'A' AND order_date >= join_date ORDER BY order_date LIMIT 1
    """
)

In [None]:
db.query(
    """
    SELECT * FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'B' AND order_date >= join_date ORDER BY order_date LIMIT 1
    """
)

##### Which item was purchased just before the customer became a member?

In [None]:
db.query(
    """
    SELECT * FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'A' AND order_date < join_date ORDER BY order_date LIMIT 1
    """
)

In [None]:
db.query(
    """
    SELECT * FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'B' AND order_date < join_date ORDER BY order_date LIMIT 1
    """
)

##### What is the total items and amount spent for each member before they became a member?

In [None]:
db.query(
    """
    SELECT count(*) FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'A' AND order_date < join_date
    """
)

In [None]:
# need to join menu table for this

In [None]:
db.query(
    """
    SELECT SUM(price) FROM sales JOIN members on sales.customer_id = members.customer_id JOIN menu on menu.product_id = sales.product_id  WHERE sales.customer_id = 'A' AND order_date < join_date
    """
)

In [None]:
db.query(
    """
    SELECT count(*) FROM sales JOIN members on sales.customer_id = members.customer_id  WHERE sales.customer_id = 'B' AND order_date < join_date
    """
)

In [None]:
db.query(
    """
    SELECT SUM(price) FROM sales JOIN members on sales.customer_id = members.customer_id JOIN menu on menu.product_id = sales.product_id  WHERE sales.customer_id = 'B' AND order_date < join_date
    """
)

##### If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have?


##### If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have?

In the first week after a customer joins the program (including their join date) they earn 2x points on all items, not just sushi - how many points do customer A and B have at the end of January?