In [None]:
import sqlite3
from db_methods import *
import pandas as pd

db_path = "sales.db"  # Cambia por el path correcto si hace falta

# Conexión a la base SQLite
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Obtener todas las tablas
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tablas = cursor.fetchall()

print("Tablas disponibles:")
for tabla in tablas:
    print(f"- {tabla[0]}")
# Mostrar primeras 3 filas de cada tabla
for tabla in tablas:
    print(f"\nPrimeras 3 filas de la tabla '{tabla[0]}':")
    cursor.execute(f"SELECT * FROM {tabla[0]} LIMIT 3")
    filas = cursor.fetchall()
    for fila in filas:
        print(fila)

conn.close()

In [None]:
import db_methods
with sqlite3.connect("sales.db") as connection:
        cursor = connection.cursor()

        query = """
        SELECT * 
        FROM sales sale 
        WHERE sale.priceeach > (
            SELECT AVG(priceeach) 
            FROM sales 
            WHERE productline = sale.productline
        );
        """

        cursor.execute(query)
        result = cursor.fetchall()

db_methods.print_rows(result)


In [None]:
# correr con CTE 

with sqlite3.connect("sales.db") as connection:
    cursor = connection.cursor()

    query = """
    WITH avg_price AS (
        SELECT productline, AVG(priceeach) AS avg_price
        FROM sales
        GROUP BY productline
    )
    SELECT *
    FROM sales sale
    WHERE sale.priceeach > (
        SELECT avg_price
        FROM avg_price
        WHERE productline = sale.productline
    );
    """

    cursor.execute(query)
    result = cursor.fetchall()
db_methods.print_rows(result)

* Cuántos empleados reportan directa o indirectamente a David y Gabriel

In [None]:
with sqlite3.connect("sales.db") as connection:
    cursor = connection.cursor()

    query = """
    WITH RECURSIVE employee_hierarchy AS (
        SELECT id, name, boss_id, id AS main_id
    FROM employees
    WHERE boss_id IN (4,7)

    UNION ALL 

    SELECT employee.id, employee.name, employee.boss_id, employee_hierarchy.main_id
    FROM employees employee
    INNER JOIN employee_hierarchy ON employee.boss_id = employee_hierarchy.id
    )

    SELECT *
    FROM employee_hierarchy
    ORDER BY main_id, id;
    """

    cursor.execute(query)
    result = cursor.fetchall()
db_methods.print_rows(result)


```sql
    WITH RECURSIVE employee_hierarchy AS (
        -- consulta base que devuelve a los que reportan dirtecto al o al 7 
        SELECT id, name, boss_id, id AS main_id
        FROM employees
        WHERE boss_id IN (4,7)

        UNION ALL 
        -- recursiva busca en cada empleado de los de la consulta previa , es decir los empleados que reportan al 4 y al 7
        SELECT employee.id, employee.name, employee.boss_id, employee_hierarchy.main_id
        FROM employees employee
        INNER JOIN employee_hierarchy ON employee.boss_id = employee_hierarchy.id
    )

    SELECT *
    FROM employee_hierarchy
    ORDER BY main_id, id;
 ```

# funciones ventana

Una función de ventana permite realizar cálculos sobre un conjunto de filas relacionadas con la fila actual, sin tener que agrupar y eliminar filas, a diferencia de lo que ocurre con GROUP BY. La clave es el uso de la cláusula OVER (...), que define la “ventana” de filas sobre las cuales se aplica la función.

<función_agregada>() OVER (
    [PARTITION BY columna]
    [ORDER BY columna]
    [ROWS BETWEEN ...]
)

*¿qué haríamos si Jorge necesita saber el promedio de ventas por mes y año sin perder el detalle por fila? 

ejemplo, se busca promedio por mes año sin agrupar datos . En  este caso el resultado muestra tres columnas , mes , año y el promedio de ventas de dicho año

In [None]:


with sqlite3.connect("sales.db") as connection:
    cursor = connection.cursor()

    query = """
    SELECT 
    YEAR_ID,
    MONTH_ID,
    avg(sales)
    FROM sales s
    GROUP BY 1,2
    """

    data_frame = pd.read_sql_query(query, connection)

print(data_frame)


## Ahora ,  si se pide agregar a la info de la fila el correspondiente promedio 

In [None]:
with sqlite3.connect("sales.db") as connection:
    cursor = connection.cursor()

    query = """
    SELECT 
    year_id,
    month_id,
    sales,  
    AVG(sales) OVER 
        (PARTITION BY year_id, month_id) -- Ventana de agregación, agrupa logicamente por año y mes<
        AS avg_sales
    FROM sales
    """

    data_frame = pd.read_sql_query(query, connection)

print(data_frame)


anhora Entonces, si Jorge necesita saber cómo puede encontrar filas duplicadas en una tabla, sin agruparlas ni perder el detalle de cada una, ¿cómo lo resolvemos?

Para eso usamos funciones de ranking, como ROW_NUMBER(), que nos permiten numerar cada fila dentro de un grupo definido. Así, podemos detectar duplicados manteniendo el contexto completo de cada registro.

In [None]:
with sqlite3.connect("sales.db") as connection:
    cursor = connection.cursor()

    query = """
    WITH  sales_with_row_number AS(
    SELECT *,
    ROW_NUMBER() OVER (
        PARTITION BY ordernumber,productcode,quantityordered
        ORDER BY ordernumber
        ) AS row_number
    FROM sales
    )

    SELECT * 
    FROM sales_with_row_number
    WHERE row_number > 1
    """ 

    data_frame = pd.read_sql_query(query, connection)

print(data_frame)

# HOMEWORK 

1. ¿Puedes usar RANK() para obtener el ranking de ventas (SALES) por año (YEAR_ID), ordenando de mayor a menor?

RANK() OVER (PARTITION BY column1, column2, ... ORDER BY sort_column1, sort_column2, ...)



In [None]:
import db_methods
with sqlite3.connect('sales.db') as db_connection:

    query = """
    WITH total_sales_year AS (
    SELECT 
        YEAR_ID,
        SUM(QUANTITYORDERED * PRICEEACH) AS total_sales
    FROM 
        sales
    GROUP BY 
        YEAR_ID
    ),
    ranked_sales AS (
    SELECT 
        YEAR_ID,
        total_sales,
        RANK() OVER (ORDER BY total_sales DESC) AS rank_sales
    FROM 
        total_sales_year
    )
    SELECT 
    YEAR_ID, 
    total_sales
    FROM 
        ranked_sales;
    """ 

db_methods.print_query_results(db_connection, query)

2. Haz lo mismo con DENSE_RANK()

2. DENSE_RANK() Function
The DENSE_RANK() function in SQL server serves the purpose of assigning ranks to rows in a dataset according to specific conditions. Just like the RANK() function, it orders the data based on certain criteria. However, what sets it apart is that it ensures there are no gaps between ranks in cases where multiple rows share the same values. This means that tied rows receive consecutive ranks without any interruptions.

DENSE_RANK() OVER (PARTITION BY column1, column2, ... ORDER BY sort_column1, sort_column2, ...)



In [None]:
import db_methods
with sqlite3.connect('sales.db') as db_connection:

    query = """
    WITH total_sales_year AS (
    SELECT 
        YEAR_ID,
        SUM(QUANTITYORDERED * PRICEEACH) AS total_sales
    FROM 
        sales
    GROUP BY 
        YEAR_ID
    ),
    ranked_sales AS (
    SELECT 
        YEAR_ID,
        total_sales,
        DENSE_RANK() OVER (ORDER BY total_sales DESC) AS rank_sales
    FROM 
        total_sales_year
    )
    SELECT 
    YEAR_ID, 
    total_sales,
    rank_sales
    FROM 
        ranked_sales;
    """ 

db_methods.print_query_results(db_connection, query)

calcular el crecimiento mensual en ventas y comparar con el mes anterior sin perder el detalle por fila usar LAG()

In [None]:
import db_methods
with sqlite3.connect('sales.db') as db_connection:
    query = """
   -- CTE para calcular ventas mensuales 
    WITH monthly_sales AS (
        SELECT 
            YEAR_ID,
            MONTH_ID,
            SUM(QUANTITYORDERED * PRICEEACH) AS total_sales
        FROM 
            sales
        GROUP BY 
            YEAR_ID, MONTH_ID
    ),

    -- CTE para cada mes traer total de ventas del mes anterior con LAG
    previous_month_sales AS (
        SELECT -- seleccionamos los campos que queremos mostrar
            YEAR_ID,
            MONTH_ID,
            total_sales, -- total de ventas del mes actual
            LAG(total_sales) OVER ( -- función LAG para traer el total del mes anterior
            PARTITION BY YEAR_ID -- particionamos por año como que primero encapsule por año
            ORDER BY MONTH_ID -- ordenamos por mes
            ) AS previous_month_total
        FROM 
            monthly_sales
    )

    -- cte para calcular el crecimiento porcentual del mes actual respecto al mes anterior
    SELECT 
        YEAR_ID,
        MONTH_ID,
        total_sales,
        previous_month_total,
        CASE 
            WHEN previous_month_total IS NULL THEN   -- si no hay mes anterior, no hay crecimiento
            ELSE (total_sales - previous_month_total) / previous_month_total * 100 -- calculamos el crecimiento porcentual
        END AS monthly_growth_percentage

    FROM previous_month_sales
    ORDER BY YEAR_ID, MONTH_ID;
    """
db_methods.print_query_results(db_connection, query)

In [None]:
import db_methods
with sqlite3.connect('sales.db') as db_connection:
    query = """
   -- CTE para calcular ventas mensuales 
    WITH monthly_sales AS (
        SELECT 
            YEAR_ID,
            MONTH_ID,
            SUM(QUANTITYORDERED * PRICEEACH) AS total_sales
        FROM 
            sales
        GROUP BY 
            YEAR_ID, MONTH_ID
    ),

    -- CTE para cada mes traer total de ventas del mes proximo con LEAD
    sales_next_month AS (
        SELECT -- seleccionamos los campos que queremos mostrar
            YEAR_ID,
            MONTH_ID,
            total_sales, -- total de ventas del mes actual
            LEAD(total_sales) OVER ( -- función LAG para traer el total del mes anterior
            PARTITION BY YEAR_ID -- particionamos por año como que primero encapsule por año
            ORDER BY MONTH_ID -- ordenamos por mes
            ) AS next_month_total
        FROM 
            monthly_sales
    )

    -- cte para calcular el crecimiento porcentual del mes actual respecto al mes proximo
    SELECT 
        YEAR_ID,
        MONTH_ID,
        total_sales,
        next_month_total,
        CASE 
            WHEN next_month_total IS NULL THEN NULL  -- si no hay mes proximo, no hay crecimiento
            ELSE (next_month_total - total_sales) / total_sales * 100 -- calculamos el crecimiento porcentual
        END AS monthly_growth_percentage

    FROM sales_next_month
    ORDER BY YEAR_ID, MONTH_ID;
    """
db_methods.print_query_results(db_connection, query)