In [None]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os

In [None]:
db_path = "/kaggle/input/olist-database/olist.db"

conn = sqlite3.connect(db_path)

In [None]:
#  Commandes récentes avec retard
query_lateorders = """
WITH last_order_date AS (
    SELECT MAX(order_purchase_timestamp) AS max_order_date 
    FROM orders
),
recent_orders AS (
    SELECT 
        o.order_id,
        c.customer_unique_id,
        o.order_delivered_customer_date,
        o.order_estimated_delivery_date,
        (JULIANDAY(o.order_delivered_customer_date) - JULIANDAY(o.order_estimated_delivery_date)) AS delay_days
    FROM 
        orders o
    JOIN 
        customers c ON o.customer_id = c.customer_id
    CROSS JOIN 
        last_order_date
    WHERE 
        o.order_status NOT IN ('canceled', 'unavailable')
        AND (JULIANDAY(last_order_date.max_order_date) - JULIANDAY(o.order_purchase_timestamp)) <= 90
        AND JULIANDAY(o.order_delivered_customer_date) > JULIANDAY(o.order_estimated_delivery_date)
)
SELECT 
    order_id, 
    customer_unique_id, 
    delay_days
FROM 
    recent_orders
WHERE 
    delay_days >= 3;
"""

# Vendeurs générant plus de 100 000 Real en chiffre d’affaires
query_sellers = """
WITH seller_revenue AS (
    SELECT 
        s.seller_id,
        SUM(oi.price) AS total_revenue
    FROM 
        order_items oi
    JOIN 
        sellers s ON oi.seller_id = s.seller_id
    JOIN 
        orders o ON oi.order_id = o.order_id
    WHERE 
        o.order_status = 'delivered'
    GROUP BY 
        s.seller_id
)
SELECT 
    seller_id,
    total_revenue
FROM 
    seller_revenue
WHERE 
    total_revenue > 100000;
"""

#  Nouveaux vendeurs très engagés
query_newsellers = """
WITH last_order_date AS (
    SELECT MAX(order_purchase_timestamp) AS max_order_date 
    FROM orders
),
first_sale_date AS (
    SELECT 
        oi.seller_id,
        MIN(o.order_purchase_timestamp) AS first_sale_date
    FROM 
        order_items oi
    JOIN 
        orders o ON oi.order_id = o.order_id
    GROUP BY 
        oi.seller_id
),
recent_sellers AS (
    SELECT 
        fs.seller_id,
        fs.first_sale_date,
        (JULIANDAY(l.max_order_date) - JULIANDAY(fs.first_sale_date)) AS seller_age_days
    FROM 
        first_sale_date fs
    CROSS JOIN 
        last_order_date l
    WHERE 
        (JULIANDAY(l.max_order_date) - JULIANDAY(fs.first_sale_date)) <= 90
),
seller_activity AS (
    SELECT 
        rs.seller_id,
        rs.first_sale_date,
        COUNT(oi.order_item_id) AS total_products_sold
    FROM 
        recent_sellers rs
    JOIN 
        order_items oi ON rs.seller_id = oi.seller_id
    GROUP BY 
        rs.seller_id, rs.first_sale_date
)
SELECT 
    seller_id,
    total_products_sold,
    first_sale_date
FROM 
    seller_activity
WHERE 
    total_products_sold > 30;
"""

# Les 5 codes postaux avec le pire review score moyen
query_worstreviewscore = """
WITH last_order_date AS (
    SELECT MAX(order_purchase_timestamp) AS max_order_date 
    FROM orders
),
recent_reviews AS (
    SELECT 
        c.customer_zip_code_prefix,
        r.review_score
    FROM 
        orders o
    JOIN 
        customers c ON o.customer_id = c.customer_id
    JOIN 
        order_reviews r ON o.order_id = r.order_id
    CROSS JOIN 
        last_order_date
    WHERE 
        (JULIANDAY(last_order_date.max_order_date) - JULIANDAY(o.order_purchase_timestamp)) <= 365
),
zipcode_reviews AS (
    SELECT 
        customer_zip_code_prefix,
        COUNT(review_score) AS review_count,
        AVG(review_score) AS avg_review_score
    FROM 
        recent_reviews
    GROUP BY 
        customer_zip_code_prefix
    HAVING 
        review_count > 30
)
SELECT 
    customer_zip_code_prefix,
    review_count,
    avg_review_score
FROM 
    zipcode_reviews
ORDER BY 
    avg_review_score ASC
LIMIT 5;
"""

In [None]:
result_lateorders = pd.read_sql_query(query_lateorders, conn)
display(result_lateorders)

In [None]:
result_sellers = pd.read_sql_query(query_sellers, conn)
display(result_sellers)

In [None]:
result_newsellers = pd.read_sql_query(query_newsellers, conn)
display(result_newsellers)

In [None]:
result_worstreviewscore = pd.read_sql_query(query_worstreviewscore, conn)
display(result_worstreviewscore)