In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS pizza_runner;

DROP TABLE IF EXISTS runners;
CREATE TABLE runners (
  `runner_id` INTEGER,
  `registration_date` DATE
);
INSERT INTO runners
  (`runner_id`, `registration_date`)
VALUES
  (1, '2021-01-01'),
  (2, '2021-01-03'),
  (3, '2021-01-08'),
  (4, '2021-01-15');


DROP TABLE IF EXISTS customer_orders;
CREATE TABLE customer_orders (
  `order_id` INTEGER,
  `customer_id` INTEGER,
  `pizza_id` INTEGER,
  `exclusions` VARCHAR(4),
  `extras` VARCHAR(4),
  `order_time` TIMESTAMP
);

INSERT INTO customer_orders
  (`order_id`, `customer_id`, `pizza_id`, `exclusions`, `extras`, `order_time`)
VALUES
  ('1', '101', '1', '', '', '2020-01-01 18:05:02'),
  ('2', '101', '1', '', '', '2020-01-01 19:00:52'),
  ('3', '102', '1', '', '', '2020-01-02 23:51:23'),
  ('3', '102', '2', '', NULL, '2020-01-02 23:51:23'),
  ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
  ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
  ('4', '103', '2', '4', '', '2020-01-04 13:23:46'),
  ('5', '104', '1', 'null', '1', '2020-01-08 21:00:29'),
  ('6', '101', '2', 'null', 'null', '2020-01-08 21:03:13'),
  ('7', '105', '2', 'null', '1', '2020-01-08 21:20:29'),
  ('8', '102', '1', 'null', 'null', '2020-01-09 23:54:33'),
  ('9', '103', '1', '4', '1, 5', '2020-01-10 11:22:59'),
  ('10', '104', '1', 'null', 'null', '2020-01-11 18:34:49'),
  ('10', '104', '1', '2, 6', '1, 4', '2020-01-11 18:34:49');


DROP TABLE IF EXISTS runner_orders;
CREATE TABLE runner_orders (
  `order_id` INTEGER,
  `runner_id` INTEGER,
  `pickup_time` VARCHAR(19),
  `distance` VARCHAR(7),
  `duration` VARCHAR(10),
  `cancellation` VARCHAR(23)
);

INSERT INTO runner_orders
  (`order_id`, `runner_id`, `pickup_time`, `distance`, `duration`, `cancellation`)
VALUES
  ('1', '1', '2020-01-01 18:15:34', '20km', '32 minutes', ''),
  ('2', '1', '2020-01-01 19:10:54', '20km', '27 minutes', ''),
  ('3', '1', '2020-01-03 00:12:37', '13.4km', '20 mins', NULL),
  ('4', '2', '2020-01-04 13:53:03', '23.4', '40', NULL),
  ('5', '3', '2020-01-08 21:10:57', '10', '15', NULL),
  ('6', '3', 'null', 'null', 'null', 'Restaurant Cancellation'),
  ('7', '2', '2020-01-08 21:30:45', '25km', '25mins', 'null'),
  ('8', '2', '2020-01-10 00:15:02', '23.4 km', '15 minute', 'null'),
  ('9', '2', 'null', 'null', 'null', 'Customer Cancellation'),
  ('10', '1', '2020-01-11 18:50:20', '10km', '10minutes', 'null');


DROP TABLE IF EXISTS pizza_names;
CREATE TABLE pizza_names (
  `pizza_id` INTEGER,
  `pizza_name` STRING
);
INSERT INTO pizza_names
  (`pizza_id`, `pizza_name`)
VALUES
  (1, 'Meatlovers'),
  (2, 'Vegetarian');


DROP TABLE IF EXISTS pizza_recipes;
CREATE TABLE pizza_recipes (
  `pizza_id` INTEGER,
  `toppings` STRING
);
INSERT INTO pizza_recipes
  (`pizza_id`, `toppings`)
VALUES
  (1, '1, 2, 3, 4, 5, 6, 8, 10'),
  (2, '4, 6, 7, 9, 11, 12');


DROP TABLE IF EXISTS pizza_toppings;
CREATE TABLE pizza_toppings (
  `topping_id` INTEGER,
  `topping_name` STRING
);
INSERT INTO pizza_toppings
  (`topping_id`, `topping_name`)
VALUES
  (1, 'Bacon'),
  (2, 'BBQ Sauce'),
  (3, 'Beef'),
  (4, 'Cheese'),
  (5, 'Chicken'),
  (6, 'Mushrooms'),
  (7, 'Onions'),
  (8, 'Pepperoni'),
  (9, 'Peppers'),
  (10, 'Salami'),
  (11, 'Tomatoes'),
  (12, 'Tomato Sauce');

# **1. Pizza Metrics**

## **Clean 'runner_orders' columns**

In [0]:
%sql
DROP TABLE IF EXISTS runner_orders_cleaned;

CREATE TABLE runner_orders_cleaned (
  order_id INTEGER PRIMARY KEY,
  runner_id INTEGER,
  pickup_time STRING,
  distance FLOAT,
  duration FLOAT,
  cancellation STRING
);

INSERT INTO runner_orders_cleaned
SELECT 
  order_id,
  runner_id,
  NULLIF(pickup_time, 'null') AS pickup_time,
  CASE 
      WHEN TRIM(distance) = 'null' OR distance IS NULL THEN NULL
      ELSE ROUND(CAST(REGEXP_REPLACE(distance, '[^0-9.]', '') AS FLOAT), 2)
  END AS distance,
  CASE
      WHEN TRIM(duration) = 'null' OR duration IS NULL THEN NULL 
      ELSE CAST(REGEXP_REPLACE(duration, '[^0-9.]', '') AS FLOAT)
  END AS duration,
  CASE
      WHEN TRIM(cancellation) IN ('Restaurant Cancellation', 'Customer Cancellation')
      THEN TRIM(cancellation)
      ELSE NULL
  END AS cancellation
FROM runner_orders;

## **Clean 'customer_orders' Column**

In [0]:
%sql
DROP TABLE IF EXISTS customer_orders_cleaned;

CREATE TABLE  customer_orders_cleaned(
  order_id INT,
  customer_id INT,
  pizza_id INT,
  exclusions STRING,
  extras STRING,
  order_date TIMESTAMP
);

INSERT INTO customer_orders_cleaned 
SELECT 
  order_id,
  customer_id,
  pizza_id,
  CASE
      WHEN exclusions = '' OR TRIM(exclusions) = 'null' THEN NULL
      ELSE exclusions
  END AS exclusions,
  CASE 
      WHEN extras = '' OR extras = 'null' THEN NULL
      ELSE extras
  END AS extras,
  order_time
FROM customer_orders;

In [0]:
%sql
-- How many pizzas were ordered?
SELECT 
  COUNT(pizza_id) AS total_pazzas
FROM customer_orders_cleaned;

In [0]:
%sql
-- How many unique customer orders were made?
SELECT
    COUNT(DISTINCT order_id) AS unique_orders
FROM customer_orders_cleaned;

In [0]:
%sql
-- How many successful orders were delivered by each runner?
SELECT 
  runner_id,
  COUNT(*) AS successfull_orders
FROM (
  SELECT 
    order_id,
    runner_id,
    CASE
        WHEN TRIM(cancellation) IN ('Restaurant Cancellation', 'Customer Cancellation')
        THEN cancellation
        ELSE NULL
    END AS cancellation
FROM runner_orders_cleaned
) AS temp_table
WHERE cancellation IS NULL
GROUP BY runner_id;

In [0]:
%sql
-- How many successful orders were delivered by each runner?
SELECT 
    runner_id,
    COUNT(*) AS successfully_delivered_by_runner
FROM runner_orders_cleaned
WHERE pickup_time IS NOT NULL
GROUP BY runner_id;

In [0]:
%sql
SELECT *
FROM runner_orders;

In [0]:
%sql
-- How many of each type of pizza was delivered?
SELECT 
    pizza_name,
    COUNT(*) total_order
FROM customer_orders_cleaned AS co
JOIN pizza_names AS pn 
    ON pn.pizza_id = co.pizza_id
JOIN runner_orders_cleaned AS ro
    ON ro.order_id = co.order_id
WHERE pickup_time IS NOT NULL
GROUP BY pizza_name;

In [0]:
%sql
-- How many Vegetarian and Meatlovers were ordered by each customer?
SELECT 
    customer_id,
    pizza_name,
    COUNT(*) AS total_orders
FROM customer_orders_cleaned AS co
JOIN pizza_names AS pn ON co.pizza_id = pn.pizza_id
GROUP BY customer_id, pizza_name
ORDER BY customer_id;

In [0]:
%sql
SELECT *
FROM customer_orders;

In [0]:
%sql
-- How many Vegetarian and Meatlovers were ordered by each customer?
SELECT
    customer_id,
    SUM(CASE WHEN pizza_name = 'Vegetarian' THEN 1 ELSE 0 END) AS vegetarian,
    SUM(CASE WHEN pizza_name = 'Meatlovers' THEN 1 ELSE 0 END) AS meatlovers
FROM customer_orders_cleaned AS co
JOIN pizza_names AS pn 
    ON co.pizza_id = pn.pizza_id
GROUP BY customer_id
ORDER BY customer_id;

In [0]:
%sql
-- What was the maximum number of pizzas delivered in a single order?
SELECT 
    co.order_id,
    COUNT(*) AS max_delivery_perOrder
FROM customer_orders_cleaned AS co
JOIN runner_orders_cleaned AS ro ON co.order_id = ro.order_id
WHERE pickup_time IS NOT NULL
GROUP BY co.order_id
ORDER BY COUNT(*) DESC;

In [0]:
%sql
-- For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
SELECT *
FROM( 
    SELECT 
        customer_id,
        COUNT(*) AS Unchanged_order
    FROM customer_orders_cleaned
    WHERE exclusions IS NULL AND extras IS NULL
    GROUP BY customer_id ) AS table_1
FULL JOIN 
( 
    SELECT 
        customer_id,
        COUNT(*) AS changed_order
    FROM customer_orders_cleaned
    WHERE exclusions IS NOT NULL OR extras IS NOT NULL
    GROUP BY customer_id
) AS table_2 
ON table_1.customer_id = table_2.customer_id

In [0]:
%sql
-- For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
SELECT
    customer_id,
    SUM(
        CASE 
            WHEN exclusions IS NOT NULL OR extras IS NOT NULL THEN 1 
            ELSE 0 
        END
    ) AS CHANGES,
    SUM(
        CASE 
            WHEN exclusions IS NULL AND extras IS NULL THEN 1 
            ELSE 0 
        END
    ) AS NO_CHANGES

FROM customer_orders_cleaned
GROUP BY customer_id;


In [0]:
%sql
-- How many pizzas were delivered that had both exclusions and extras?
SELECT 
    COUNT(*) AS total_delivered
FROM customer_orders_cleaned AS co
LEFT JOIN runner_orders_cleaned AS ro ON co.order_id = ro.order_id
WHERE pickup_time IS NOT NULL AND co.exclusions IS NOT NULL AND co.extras IS NOT NULL;


In [0]:
%sql
-- What was the total volume of pizzas ordered for each hour of the day?
SELECT 
    HOUR(order_date) AS orderd_hour,
    COUNT(*) AS total_orders
FROM customer_orders_cleaned
GROUP BY HOUR(order_date);

In [0]:
%sql
-- What was the volume of orders for each day of the week?
SELECT 
    DATE_PART('dow', order_date) AS Day,
    DATE_FORMAT(order_date, 'EEEE') AS Day_Name,
    COUNT(*) AS total_orders
FROM customer_orders_cleaned
GROUP BY DATE_PART('dow', order_date),
         DATE_FORMAT(order_date, 'EEEE')
ORDER BY DATE_PART('dow', order_date);