## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        data = pd.read_csv(input_data)
        engine = sqlite3.connect('transport_business.db')
        data.to_sql('transport', con=engine, index = False)
        engine.close()
    except Exception as e:
        print(f'Error: {e}')

# Loading data
data = "/content/transport_business.csv"
add_data_to_database(data)

In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///transport_business.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [6]:
%%sql
SELECT * FROM transport LIMIT 5;

 * sqlite:///transport_business.db
Done.


shipment_id,customer_name,pickup_address,dropoff_address,shipment_date,shipment_time,shipment_cost
1,ABC Logistics,123 Main Street,456 Elm Street,2023-10-05,10:00 AM,120
2,XYZ Enterprises,789 Maple Avenue,101 Oak Street,2023-10-06,02:00 PM,150
3,DEF Shipping,567 Oak Street,987 Pine Avenue,2023-10-07,09:00 AM,180
4,LMN Transportation,345 Maple Avenue,678 Elm Street,2023-10-08,03:00 PM,135
5,GHI Delivery,987 Pine Avenue,234 Main Street,2023-10-09,11:00 AM,105


## Question 2: **Answer**

In [7]:
%%sql
-- Calculates the total shipments for each customer.
WITH shipment_counts AS (
    SELECT
        customer_name,
        COUNT(*) AS total_shipments
    FROM transport
    GROUP BY customer_name
),
-- Finds the maximum number of shipments across all customers
max_shipments AS (
    SELECT
        MAX(total_shipments) AS max_count
    FROM shipment_counts
)
-- Returns the name/s of customers with max shipments
SELECT
    customer_name,
    total_shipments
FROM shipment_counts
WHERE total_shipments = (SELECT max_count FROM max_shipments);

 * sqlite:///transport_business.db
Done.


customer_name,total_shipments
VWX Courier,3


## Question 3: **Answer**

In [8]:
%%sql
SELECT
    pickup_address,
    dropoff_address,
    COUNT(*) AS shipment_count
FROM transport
GROUP BY pickup_address, dropoff_address
HAVING shipment_count >= 2
ORDER BY shipment_count DESC;

 * sqlite:///transport_business.db
Done.


pickup_address,dropoff_address,shipment_count
123 Main Street,456 Elm Street,2
345 Maple Avenue,678 Elm Street,2
567 Oak Street,987 Pine Avenue,2
678 Elm Street,123 Main Street,2
987 Pine Avenue,234 Main Street,2


## Question 4: **Answer**



In [9]:
%%sql
WITH route_avg_cost AS (
    SELECT
        pickup_address,
        dropoff_address,
        AVG(shipment_cost) AS average_shipment_cost
    FROM transport
    GROUP BY pickup_address, dropoff_address
),
max_cost AS (
    SELECT MAX(average_shipment_cost) AS highest_cost
    FROM route_avg_cost
)
SELECT
    r.pickup_address,
    r.dropoff_address,
    r.average_shipment_cost
FROM route_avg_cost r
JOIN max_cost m
ON r.average_shipment_cost = m.highest_cost;

 * sqlite:///transport_business.db
Done.


pickup_address,dropoff_address,average_shipment_cost
987 Pine Avenue,987 Pine Avenue,215.0


## Question 5: **Answer**

In [10]:
%%sql
SELECT
    october_cost,
    november_cost,
    ROUND((november_cost - october_cost) / october_cost * 100, 2)
    AS percentage_change
FROM (
    SELECT
        (SELECT CAST(SUM(shipment_cost) AS REAL)
         FROM transport
         WHERE STRFTIME('%m', shipment_date) = '10')
         AS october_cost,

        (SELECT CAST(SUM(shipment_cost) AS REAL)
         FROM transport
         WHERE STRFTIME('%m', shipment_date) = '11')
         AS november_cost
) AS cost_analysis;

 * sqlite:///transport_business.db
Done.


october_cost,november_cost,percentage_change
4285.0,780.0,-81.8


## Altenative method

In [11]:
%%sql
SELECT
    october_cost,
    november_cost,
    ROUND((november_cost - october_cost)/ october_cost * 100, 2)
    AS percentage_change
FROM
    (
    SELECT
    CAST(SUM(CASE WHEN STRFTIME('%m', shipment_date) = '10'
        THEN shipment_cost ELSE 0 END) AS REAL)
        AS october_cost
    FROM transport),

    (SELECT
        CAST(SUM(CASE WHEN STRFTIME('%m', shipment_date) = '11'
            THEN shipment_cost ELSE 0 END)  AS REAL)
            AS november_cost
    FROM transport
    ) AS cost_analysis;

 * sqlite:///transport_business.db
Done.


october_cost,november_cost,percentage_change
4285.0,780.0,-81.8


## Question 6: **Answer**

In [12]:
%%sql
UPDATE transport
SET shipment_time =
            -- Handle PM times where the hour is 12 (e.g., 12:30 PM remains 12:30)
            CASE WHEN shipment_time  LIKE '%PM' AND SUBSTR(shipment_time, 1, 2) = '12'
                    THEN TIME(REPLACE(shipment_time, 'PM', ''))

                    -- Handle other PM times by adding 12 to the hour
                    WHEN shipment_time LIKE '%PM'
                    THEN TIME((CAST(SUBSTR(shipment_time,1, 2) AS INTEGER) + 12) ||
                    SUBSTR(shipment_time, INSTR(shipment_time, ':'), 3))

                    /*
                    Handle AM times by removing the AM suffix
                    (e.g., 10:30 AM becomes 10:30)
                    */
                ELSE TIME(REPLACE(shipment_time, 'AM', ''))
            END;

 * sqlite:///transport_business.db
32 rows affected.


[]

In [13]:
%%sql
SELECT
    -- Calculate total shipment cost for the morning (before 12 PM)
    SUM(CASE WHEN STRFTIME('%H', shipment_time) < '12'
             THEN shipment_cost ELSE 0 END)
             AS morning_cost,

    -- Calculate total shipment cost for the afternoon (12 PM onward)
    SUM(CASE WHEN STRFTIME('%H', shipment_time) >= '12'
             THEN shipment_cost ELSE 0 END)
             AS afternoon_cost,

    -- Compute cost difference between afternoon and morning shipments
    SUM(CASE WHEN STRFTIME('%H', shipment_time) >= '12' THEN shipment_cost ELSE 0 END) -
    SUM(CASE WHEN STRFTIME('%H', shipment_time) < '12' THEN shipment_cost ELSE 0 END)
    AS cost_difference
FROM transport;

 * sqlite:///transport_business.db
Done.


morning_cost,afternoon_cost,cost_difference
2525,2540,15
