## Week 1 - Data with Danny

In [1]:
#| echo: false

from pathlib import Path

from IPython.display import Markdown, display
from sql_8week_danny.sql_engine import DuckDBEngine

In [2]:
#| echo: false

SQL_DIR = Path.cwd().parent / "sql"
DATA_DIR = Path.cwd().parent / "data"

TABLES_SQL = SQL_DIR / "week1.sql"
DUCK_DB = DATA_DIR / "week1_duckdb.db"

In [3]:
#| echo: false

def create_database(persist=True):
    if persist and DUCK_DB:
        db = DuckDBEngine(
            str(DUCK_DB), rm_db=True
        )  # rm the db if it exists to avoid table creation errors
    else:
        db = DuckDBEngine()
    sql = db.execute_sql_file(TABLES_SQL)  # create the tables
    return db, sql

In [4]:
db, sql = create_database(persist=True)

In [5]:
db.table_names

['members', 'menu', 'sales']

In [6]:
#| label: display_tables_sql
#| echo: false

display(Markdown(f"```\n{sql}"))

```
CREATE SCHEMA IF NOT EXISTS dannys_diner;

SET search_path = dannys_diner;

CREATE OR REPLACE TABLE sales (
    "customer_id" VARCHAR(1), "order_date" DATE, "product_id" INTEGER
);

INSERT INTO
    sales (
        "customer_id", "order_date", "product_id"
    )
VALUES ('A', '2021-01-01', '1'),
    ('A', '2021-01-01', '2'),
    ('A', '2021-01-07', '2'),
    ('A', '2021-01-10', '3'),
    ('A', '2021-01-11', '3'),
    ('A', '2021-01-11', '3'),
    ('B', '2021-01-01', '2'),
    ('B', '2021-01-02', '2'),
    ('B', '2021-01-04', '1'),
    ('B', '2021-01-11', '1'),
    ('B', '2021-01-16', '3'),
    ('B', '2021-02-01', '3'),
    ('C', '2021-01-01', '3'),
    ('C', '2021-01-01', '3'),
    ('C', '2021-01-07', '3');

CREATE OR REPLACE TABLE menu (
    "product_id" INTEGER, "product_name" VARCHAR(5), "price" INTEGER
);

INSERT INTO
    menu (
        "product_id", "product_name", "price"
    )
VALUES ('1', 'sushi', '10'),
    ('2', 'curry', '15'),
    ('3', 'ramen', '12');

CREATE OR REPLACE TABLE members (
    "customer_id" VARCHAR(1), "join_date" DATE
);

INSERT INTO
    members ("customer_id", "join_date")
VALUES ('A', '2021-01-07'),
    ('B', '2021-01-09');

In [7]:
tables_df = db.load_tables_to_df()

#### Table information

In [8]:
df = db.display_all_table_info(display_tables=True)

# members: 1 records

Unnamed: 0,customer_id,join_date
0,A,2021-01-07
1,B,2021-01-09


# menu: 2 records

Unnamed: 0,product_id,product_name,price
0,1,sushi,10
1,2,curry,15
2,3,ramen,12


# sales: 3 records

Unnamed: 0,customer_id,order_date,product_id
0,A,2021-01-01,1
1,A,2021-01-01,2
2,A,2021-01-07,2
3,A,2021-01-10,3
4,A,2021-01-11,3
5,A,2021-01-11,3
6,B,2021-01-01,2
7,B,2021-01-02,2
8,B,2021-01-04,1
9,B,2021-01-11,1


In [9]:
for table in db.table_names:
    display(Markdown(f"# {table}"))
    display(df[table].head())

# members

Unnamed: 0,customer_id,join_date
0,A,2021-01-07
1,B,2021-01-09


# menu

Unnamed: 0,product_id,product_name,price
0,1,sushi,10
1,2,curry,15
2,3,ramen,12


# sales

Unnamed: 0,customer_id,order_date,product_id
0,A,2021-01-01,1
1,A,2021-01-01,2
2,A,2021-01-07,2
3,A,2021-01-10,3
4,A,2021-01-11,3


In [10]:
db.query(
    "SELECT COUNT(*) FROM members"
)


2

In [11]:
# db.close()  # required to remove the DB file lock (.wal)

#### Case Study Questions

Each of the following case study questions can be answered using a single SQL statement:


##### What is the total amount each customer spent at the restaurant?

In [12]:
db.query(
    "SELECT customer_id, SUM(price) FROM sales as s JOIN menu as m ON s.product_id = m.product_id GROUP BY customer_id"
)

Unnamed: 0,customer_id,sum(price)
0,A,76.0
1,B,74.0
2,C,36.0


##### How many days has each customer visited the restaurant?

In [13]:
db.query(
    "SELECT customer_id, COUNT(DISTINCT order_date) FROM sales GROUP BY customer_id"
)

Unnamed: 0,customer_id,count(DISTINCT order_date)
0,A,4
1,B,6
2,C,2


##### What was the first item from the menu purchased by each customer?

In [14]:
db.query(
    "SELECT customer_id, MIN(order_date) FROM sales GROUP BY customer_id"
)

Unnamed: 0,customer_id,min(order_date)
0,A,2021-01-01
1,B,2021-01-01
2,C,2021-01-01


In [15]:
db.query(
    "SELECT customer_id, product_id FROM sales WHERE order_date = '2021-01-01' GROUP BY customer_id, product_id"
)

Unnamed: 0,customer_id,product_id
0,A,1
1,A,2
2,B,2
3,C,3


##### What is the most purchased item on the menu and how many times was it purchased by all customers?

In [16]:
db.query(
    "SELECT product_id, COUNT(product_id) FROM sales GROUP BY product_id ORDER BY COUNT(product_id) DESC"
)

Unnamed: 0,product_id,count(product_id)
0,3,8
1,2,4
2,1,3


##### Which item was the most popular for each customer?

In [17]:
db.query(
    "SELECT customer_id, product_id, COUNT(product_id) FROM sales GROUP BY customer_id, product_id ORDER BY COUNT(product_id) DESC"
)

Unnamed: 0,customer_id,product_id,count(product_id)
0,A,3,3
1,C,3,3
2,A,2,2
3,B,1,2
4,B,2,2
5,B,3,2
6,A,1,1


##### Which item was purchased first by the customer after they became a member?

##### Which item was purchased just before the customer became a member?

##### What is the total items and amount spent for each member before they became a member?

##### If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have?

###### In the first week after a customer joins the program (including their join date) they earn 2x points on all items, not just sushi - how many points do customer A and B have at the end of January?