In [1]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(os.path.join('..')))

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()  # Carga las variables del archivo .env

config = {
    'host': os.getenv('HOST'),
    'user': os.getenv('USER'),
    'password': os.getenv('PASSWORD'),
    'database': os.getenv('DATABASE'),
    'port': os.getenv('PORT')
}

In [3]:
from sqlalchemy import create_engine, text
# Crear la URL de conexión para SQLAlchemy
db_url = f"mysql+pymysql://{config['user']}:{config['password']}@{config['host']}:{config['port']}/caso_6"

# Crear el motor de SQLAlchemy
engine = create_engine(db_url, echo=True)  # echo=True muestra las consultas SQL en la consola



In [4]:
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.types import DateTime
Base = declarative_base()


In [5]:
class EventIdentifier(Base):

    __tablename__ = 'event_identifier'
    __table_args__ = {'extend_existing': True} 

    id = Column(Integer, primary_key=True) 
    nombre = Column(String(50))  


class CampaignIdentifier(Base):

    __tablename__ = 'campaign_identifier'
    __table_args__= {'extend_existing': True}

    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    campaign_id = Column(Integer, primary_key=True)
    products = Column(String(3))
    campaign_name = Column(String(33))
    start_date = Column(DateTime)
    end_date = Column(DateTime)


class PageHierarchy(Base):
     
    __tablename__ = 'page_hierarchy'
    __table_args__ = {'extend_existing': True}

    # Columnas

    page_id = Column(Integer, primary_key=True)
    page_name = Column(String(14))
    product_category = Column(String(9))
    product_id = Column(Integer)



class Users(Base):

    __tablename__= 'users'
    __table_args__={'extend_existing': True}

    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    user_id = Column(Integer)
    cookie_id = Column(String(6))
    start_date = Column(DateTime)


class Events(Base):

    __tablename__= 'events'
    __table_args_= {'extend_existing': True}


    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    visit_id = Column(String(6))
    cookie_id = Column(String(6))
    page_id = Column(Integer)
    event_type =Column(Integer)
    sequence_number = Column(Integer)
    event_time = Column(DateTime)


# Crear la tabla en la base de datos
Base.metadata.create_all(engine)


2025-04-24 13:40:07,057 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2025-04-24 13:40:07,058 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,059 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2025-04-24 13:40:07,059 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,060 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2025-04-24 13:40:07,060 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,062 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,063 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`event_identifier`
2025-04-24 13:40:07,063 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,066 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`campaign_identifier`
2025-04-24 13:40:07,066 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,067 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`page_hierarchy`
2025-04-24 13:40:07,067 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:07,068

In [6]:
from utils import funciones_utiles

event_identifier_data = funciones_utiles.load_text_as_tuples('datos_tablas/event_identifier_data.txt')
campaign_identifier_data = funciones_utiles.load_text_as_tuples('datos_tablas/campaign_identifier_data.txt')
page_hierarchy_data =funciones_utiles.load_text_as_tuples('datos_tablas/page_hierarchy_data.txt')
users_data = funciones_utiles.load_text_as_tuples('datos_tablas/users_data.txt')
events_data = funciones_utiles.load_text_as_tuples('datos_tablas/events_data.txt')

In [7]:
event_identifier_data

[('1', 'Page View'),
 ('2', 'Add to Cart'),
 ('3', 'Purchase'),
 ('4', 'Ad Impression'),
 ('5', 'Ad Click')]

In [8]:
from sqlalchemy.orm import sessionmaker

# Crear la sesión
Session = sessionmaker(bind=engine)
session = Session()


In [9]:
from sqlalchemy.orm import sessionmaker



# Insertar los datos
for eid, name in event_identifier_data:
    
    eid = eid.strip('\'')
    name = name.strip('\'')

    print(eid)
    print(name)

    exists_query = session.query(
        session.query(EventIdentifier).filter_by(id=int(eid)).exists()
    ).scalar()

    if not exists_query:
        event = EventIdentifier(id=int(eid), nombre=name)
        session.add(event)


# Confirmar los cambios en la base de datos
session.commit()

# Cerrar sesión
session.close()

1
Page View
2025-04-24 13:40:07,751 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,754 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-24 13:40:07,756 INFO sqlalchemy.engine.Engine [generated in 0.00142s] {'id_1': 1}
2
Add to Cart
2025-04-24 13:40:07,762 INFO sqlalchemy.engine.Engine INSERT INTO event_identifier (id, nombre) VALUES (%(id)s, %(nombre)s)
2025-04-24 13:40:07,762 INFO sqlalchemy.engine.Engine [generated in 0.00070s] {'id': 1, 'nombre': 'Page View'}
2025-04-24 13:40:07,765 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-24 13:40:07,766 INFO sqlalchemy.engine.Engine [cached since 0.01161s ago] {'id_1': 2}
3
Purchase
2025-04-24 13:40:07,768 INFO sqlalchemy.engine.Engine INSERT INTO event_identifier (id, nombre) VALUES (%(id)s, %(nombre)s)
2025-04-24 13:40:07,768 INFO sqlalchemy.engine.En

In [10]:
   
for eid, name in event_identifier_data:
    
    eid = eid.strip('\'')
    name = name.strip('\'')

    print(eid)
    print(name)

    exists_query = session.query(
        session.query(EventIdentifier).filter_by(id=int(eid)).exists()
    ).scalar()

    if not exists_query:
        event = EventIdentifier(id=int(eid), nombre=name)
        session.add(event)


# Confirmar los cambios en la base de datos
session.commit()


1
Page View
2025-04-24 13:40:07,796 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,797 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-24 13:40:07,798 INFO sqlalchemy.engine.Engine [cached since 0.0435s ago] {'id_1': 1}
2
Add to Cart
2025-04-24 13:40:07,800 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-24 13:40:07,800 INFO sqlalchemy.engine.Engine [cached since 0.0458s ago] {'id_1': 2}
3
Purchase
2025-04-24 13:40:07,801 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-24 13:40:07,802 INFO sqlalchemy.engine.Engine [cached since 0.04755s ago] {'id_1': 3}
4
Ad Impression
2025-04-24 13:40:07,803 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS ano

In [11]:
campaign_identifier_data

[('1', '1-3', 'BOGOF - Fishing For Compliments', '2020-01-01', '2020-01-14'),
 ('2', '4-5', '25% Off - Living The Lux Life', '2020-01-15', '2020-01-28'),
 ('3', '6-8', 'Half Off - Treat Your Shellf(ish)', '2020-02-01', '2020-03-31')]

In [12]:
from datetime import datetime

objects = [
    CampaignIdentifier(
        campaign_id=int(cid),
        products=prod,
        campaign_name=name,
        start_date=datetime.strptime(start, '%Y-%m-%d'),
        end_date=datetime.strptime(end, '%Y-%m-%d')
    )
    for cid, prod, name, start, end in campaign_identifier_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-24 13:40:07,842 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,843 INFO sqlalchemy.engine.Engine INSERT INTO campaign_identifier (campaign_id, products, campaign_name, start_date, end_date) VALUES (%(campaign_id)s, %(products)s, %(campaign_name)s, %(start_date)s, %(end_date)s)
2025-04-24 13:40:07,843 INFO sqlalchemy.engine.Engine [generated in 0.00056s] [{'campaign_id': 1, 'products': '1-3', 'campaign_name': 'BOGOF - Fishing For Compliments', 'start_date': datetime.datetime(2020, 1, 1, 0, 0), 'end_date': datetime.datetime(2020, 1, 14, 0, 0)}, {'campaign_id': 2, 'products': '4-5', 'campaign_name': '25% Off - Living The Lux Life', 'start_date': datetime.datetime(2020, 1, 15, 0, 0), 'end_date': datetime.datetime(2020, 1, 28, 0, 0)}, {'campaign_id': 3, 'products': '6-8', 'campaign_name': 'Half Off - Treat Your Shellf(ish)', 'start_date': datetime.datetime(2020, 2, 1, 0, 0), 'end_date': datetime.datetime(2020, 3, 31, 0, 0)}]
2025-04-24 13:40:07,846 INFO sqlalchem

In [13]:
page_hierarchy_data

[('1', 'Home Page', None, None),
 ('2', 'All Products', None, None),
 ('3', 'Salmon', 'Fish', '1'),
 ('4', 'Kingfish', 'Fish', '2'),
 ('5', 'Tuna', 'Fish', '3'),
 ('6', 'Russian Caviar', 'Luxury', '4'),
 ('7', 'Black Truffle', 'Luxury', '5'),
 ('8', 'Abalone', 'Shellfish', '6'),
 ('9', 'Lobster', 'Shellfish', '7'),
 ('10', 'Crab', 'Shellfish', '8'),
 ('11', 'Oyster', 'Shellfish', '9'),
 ('12', 'Checkout', None, None),
 ('13', 'Confirmation', None, None)]

In [14]:
from datetime import datetime

objects = [
    PageHierarchy(
        page_id=int(page_id),
        page_name=page_name,
        product_category=product_category,
        product_id=int(product_id) if product_id is not None else None,
    )
    for page_id, page_name, product_category, product_id in page_hierarchy_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-24 13:40:07,889 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,891 INFO sqlalchemy.engine.Engine INSERT INTO page_hierarchy (page_id, page_name) VALUES (%(page_id)s, %(page_name)s)
2025-04-24 13:40:07,892 INFO sqlalchemy.engine.Engine [generated in 0.00083s] [{'page_id': 1, 'page_name': 'Home Page'}, {'page_id': 2, 'page_name': 'All Products'}]
2025-04-24 13:40:07,894 INFO sqlalchemy.engine.Engine INSERT INTO page_hierarchy (page_id, page_name, product_category, product_id) VALUES (%(page_id)s, %(page_name)s, %(product_category)s, %(product_id)s)
2025-04-24 13:40:07,896 INFO sqlalchemy.engine.Engine [generated in 0.00086s] [{'page_id': 3, 'page_name': 'Salmon', 'product_category': 'Fish', 'product_id': 1}, {'page_id': 4, 'page_name': 'Kingfish', 'product_category': 'Fish', 'product_id': 2}, {'page_id': 5, 'page_name': 'Tuna', 'product_category': 'Fish', 'product_id': 3}, {'page_id': 6, 'page_name': 'Russian Caviar', 'product_category': 'Luxury', 'product_id'

In [15]:
from datetime import datetime

objects = [
    Users(
        user_id=int(user_id),
        cookie_id=cookie_id,
        start_date=datetime.strptime(start_date, '%Y-%m-%d')
    )
    for user_id, cookie_id, start_date in users_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-24 13:40:07,952 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:07,963 INFO sqlalchemy.engine.Engine INSERT INTO users (user_id, cookie_id, start_date) VALUES (%(user_id)s, %(cookie_id)s, %(start_date)s)
2025-04-24 13:40:07,964 INFO sqlalchemy.engine.Engine [generated in 0.00513s] [{'user_id': 1, 'cookie_id': 'c4ca42', 'start_date': datetime.datetime(2020, 2, 4, 0, 0)}, {'user_id': 2, 'cookie_id': 'c81e72', 'start_date': datetime.datetime(2020, 1, 18, 0, 0)}, {'user_id': 3, 'cookie_id': 'eccbc8', 'start_date': datetime.datetime(2020, 2, 21, 0, 0)}, {'user_id': 4, 'cookie_id': 'a87ff6', 'start_date': datetime.datetime(2020, 2, 22, 0, 0)}, {'user_id': 5, 'cookie_id': 'e4da3b', 'start_date': datetime.datetime(2020, 2, 1, 0, 0)}, {'user_id': 6, 'cookie_id': '167909', 'start_date': datetime.datetime(2020, 1, 25, 0, 0)}, {'user_id': 7, 'cookie_id': '8f14e4', 'start_date': datetime.datetime(2020, 2, 9, 0, 0)}, {'user_id': 8, 'cookie_id': 'c9f0f8', 'start_date': datetim

In [16]:
from datetime import datetime

objects = [
    Events(
        visit_id=visit_id,
        cookie_id=cookie_id,
        page_id=int(page_id),
        event_type = event_type,
        sequence_number = sequence_number,
        event_time =  event_time   
    )
    for visit_id, cookie_id, page_id, event_type, sequence_number, event_time in events_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-24 13:40:08,660 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:08,915 INFO sqlalchemy.engine.Engine INSERT INTO events (visit_id, cookie_id, page_id, event_type, sequence_number, event_time) VALUES (%(visit_id)s, %(cookie_id)s, %(page_id)s, %(event_type)s, %(sequence_number)s, %(event_time)s)
2025-04-24 13:40:08,916 INFO sqlalchemy.engine.Engine [generated in 0.08325s] [{'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 1, 'event_type': '1', 'sequence_number': '1', 'event_time': '2020-02-04 19:16:09.182546'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 2, 'event_type': '1', 'sequence_number': '2', 'event_time': '2020-02-04 19:16:17.358191'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 6, 'event_type': '1', 'sequence_number': '3', 'event_time': '2020-02-04 19:16:58.454669'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 9, 'event_type': '1', 'sequence_number': '4', 'event_time': '2020-02-04 19:16:58.609142'}, {'visit_id

<center><h1>PREGUNTAS</h1></center>

# **1. Cuantos usuarios hay ?**

In [17]:
query = '''

    SELECT
        COUNT(DISTINCT user_id) AS total_usuarios
    FROM
        users

'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,590 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,591 INFO sqlalchemy.engine.Engine 

    SELECT
        COUNT(DISTINCT user_id) AS total_usuarios
    FROM
        users


2025-04-24 13:40:09,591 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,601 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,total_usuarios
0,500


- Existe un total de 500 usuarios unicos

# **2. Cuantos cookies en promedio existen por usuario ?**

In [18]:
query = '''
    WITH cuenta_por_usuario AS (
    SELECT
        user_id,
        COUNT(cookie_id) as total_cookies
    FROM
        users
    GROUP BY
        user_id )


    SELECT 
        AVG(total_cookies) AS Promedio_cookies_por_usuario
    FROM
        cuenta_por_usuario

'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,620 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,620 INFO sqlalchemy.engine.Engine 
    WITH cuenta_por_usuario AS (
    SELECT
        user_id,
        COUNT(cookie_id) as total_cookies
    FROM
        users
    GROUP BY
        user_id )


    SELECT 
        AVG(total_cookies) AS Promedio_cookies_por_usuario
    FROM
        cuenta_por_usuario


2025-04-24 13:40:09,621 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,624 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,Promedio_cookies_por_usuario
0,3.564


# **3. Cúal es el número de visitas unicas de todos los usuarios por mes?**



In [19]:
query = '''
    SELECT
        EXTRACT(MONTH FROM start_date) AS mes,
       COUNT(DISTINCT  visit_id) AS total_usuarios_por_mes
    FROM
        users u
    JOIN 
        events  e
     ON u.cookie_id = e.cookie_id
    GROUP BY 
        mes



'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,635 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,636 INFO sqlalchemy.engine.Engine 
    SELECT
        EXTRACT(MONTH FROM start_date) AS mes,
       COUNT(DISTINCT  visit_id) AS total_usuarios_por_mes
    FROM
        users u
    JOIN 
        events  e
     ON u.cookie_id = e.cookie_id
    GROUP BY 
        mes




2025-04-24 13:40:09,636 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,693 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,mes,total_usuarios_por_mes
0,1,876
1,2,1488
2,3,916
3,4,248
4,5,36



# **4. Cúal es el numero de eventos de cada tipo?**




In [20]:
query = '''
    SELECT
        i.nombre,
        COUNT(*) AS cantidad_de_eventos
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    GROUP BY
        i.nombre
  


'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,713 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,714 INFO sqlalchemy.engine.Engine 
    SELECT
        i.nombre,
        COUNT(*) AS cantidad_de_eventos
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    GROUP BY
        i.nombre
  



2025-04-24 13:40:09,714 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,768 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,nombre,cantidad_de_eventos
0,Page View,20928
1,Add to Cart,8451
2,Purchase,1777
3,Ad Impression,876
4,Ad Click,702


# **5. Cúal es el procentaje de visitas que tiene un evento de purchase ?**



In [21]:
query = '''
    
   
    SELECT
        100 * COUNT(DISTINCT e.visit_id)/(SELECT COUNT(DISTINCT visit_id) FROM events) AS porcentaje_compras
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    WHERE i.nombre = 'Purchase'

  
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,791 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,792 INFO sqlalchemy.engine.Engine 
    
   
    SELECT
        100 * COUNT(DISTINCT e.visit_id)/(SELECT COUNT(DISTINCT visit_id) FROM events) AS porcentaje_compras
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    WHERE i.nombre = 'Purchase'

  
  
  



2025-04-24 13:40:09,793 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,830 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,porcentaje_compras
0,49.8597


# **6. Cual es el procentaje de vistitantes que vieron la pagina de checkout pero no compraron (purchase event)**




In [22]:
query = '''
    
    WITH tabla_resumen AS (
    SELECT
        e.visit_id,
        SUM((CASE WHEN i.nombre = 'Page View' AND ph.page_name = 'Checkout' THEN 1 ELSE 0 END)) AS checkout,
        SUM((CASE WHEN i.nombre = 'Purchase' THEN 1 ELSE 0 END)) AS purchase


    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
       
    JOIN 
        page_hierarchy ph
    
     ON e.page_id = ph.page_id
    GROUP BY
        e.visit_id )

    
    SELECT
        ROUND(100 * (1-(SUM(purchase)/SUM(checkout))),2) AS percentage_checkout_view_with_no_purchase
    FROM
        tabla_resumen
  
  
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,854 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,855 INFO sqlalchemy.engine.Engine 
    
    WITH tabla_resumen AS (
    SELECT
        e.visit_id,
        SUM((CASE WHEN i.nombre = 'Page View' AND ph.page_name = 'Checkout' THEN 1 ELSE 0 END)) AS checkout,
        SUM((CASE WHEN i.nombre = 'Purchase' THEN 1 ELSE 0 END)) AS purchase


    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
       
    JOIN 
        page_hierarchy ph
    
     ON e.page_id = ph.page_id
    GROUP BY
        e.visit_id )

    
    SELECT
        ROUND(100 * (1-(SUM(purchase)/SUM(checkout))),2) AS percentage_checkout_view_with_no_purchase
    FROM
        tabla_resumen
  
  
  
  



2025-04-24 13:40:09,855 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:09,939 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,percentage_checkout_view_with_no_purchase
0,15.5


# **7. Cuales son los tops 3 paginas por numero de visitas?**






In [23]:
query = '''
    
   

    SELECT
        ph.page_name,
        COUNT(visit_id) AS total_visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    WHERE e.event_type = 1
    GROUP BY 
        ph.page_name
    ORDER BY
        total_visitas DESC
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:09,962 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:09,963 INFO sqlalchemy.engine.Engine 
    
   

    SELECT
        ph.page_name,
        COUNT(visit_id) AS total_visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    WHERE e.event_type = 1
    GROUP BY 
        ph.page_name
    ORDER BY
        total_visitas DESC
  
  



2025-04-24 13:40:09,963 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:10,008 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,page_name,total_visitas
0,All Products,3174
1,Checkout,2103
2,Home Page,1782
3,Oyster,1568
4,Crab,1564
5,Russian Caviar,1563
6,Kingfish,1559
7,Salmon,1559
8,Lobster,1547
9,Abalone,1525


# **8. ¿Cuál es el número de visualizaciones y agregaciones al carrito para cada categoría de producto?**



In [24]:
query = '''
    
   

    SELECT
        ph.product_category,
        SUM(CASE WHEN ei.nombre='Add to Cart' THEN 1 ELSE 0 END) AS carrito,
        SUM(CASE WHEN ei.nombre='Page View' THEN 1 ELSE 0 END) AS visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    JOIN 
        event_identifier ei
    ON e.event_type = ei.id
    WHERE ph.product_category IS NOT NULL
    GROUP BY
        ph.product_category
    ORDER BY
        visitas DESC
  


'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:10,026 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:10,026 INFO sqlalchemy.engine.Engine 
    
   

    SELECT
        ph.product_category,
        SUM(CASE WHEN ei.nombre='Add to Cart' THEN 1 ELSE 0 END) AS carrito,
        SUM(CASE WHEN ei.nombre='Page View' THEN 1 ELSE 0 END) AS visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    JOIN 
        event_identifier ei
    ON e.event_type = ei.id
    WHERE ph.product_category IS NOT NULL
    GROUP BY
        ph.product_category
    ORDER BY
        visitas DESC
  



2025-04-24 13:40:10,027 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:10,094 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,product_category,carrito,visitas
0,Shellfish,3792.0,6204.0
1,Fish,2789.0,4633.0
2,Luxury,1870.0,3032.0


# **9. ¿Cuáles son los 3 productos principales por compras?**



In [25]:
query = '''
    
    -- Obtener todas las ids que fueron compras

    WITH all_ids_purchase AS (
    SELECT
        DISTINCT visit_id
    FROM
        events
    WHERE
        event_type = 3 ),

    -- Filtrar la tabla evento por todas aquellas ids donde hubo compras

    purchase_table AS (

    SELECT
        *
    FROM
        events
    WHERE
        visit_id IN (SELECT * FROM all_ids_purchase) ) 

    
    -- Una vez filtrado agrupamos por producto y contamos

    SELECT
        ph.page_name,
        COUNT(*) AS total_compras
    FROM
        purchase_table pt
    JOIN
       page_hierarchy ph
    ON pt.page_id = ph.page_id
    WHERE
        event_type = 2
    GROUP BY
        ph.page_name
    ORDER BY
        total_compras
    
    

'''


pd.read_sql_query(query, con=engine)

2025-04-24 13:40:10,118 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 13:40:10,120 INFO sqlalchemy.engine.Engine 
    
    -- Obtener todas las ids que fueron compras

    WITH all_ids_purchase AS (
    SELECT
        DISTINCT visit_id
    FROM
        events
    WHERE
        event_type = 3 ),

    -- Filtrar la tabla evento por todas aquellas ids donde hubo compras

    purchase_table AS (

    SELECT
        *
    FROM
        events
    WHERE
        visit_id IN (SELECT * FROM all_ids_purchase) ) 

    
    -- Una vez filtrado agrupamos por producto y contamos

    SELECT
        ph.page_name,
        COUNT(*) AS total_compras
    FROM
        purchase_table pt
    JOIN
       page_hierarchy ph
    ON pt.page_id = ph.page_id
    WHERE
        event_type = 2
    GROUP BY
        ph.page_name
    ORDER BY
        total_compras
    
    


2025-04-24 13:40:10,120 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 13:40:10,158 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,page_name,total_compras
0,Tuna,697
1,Russian Caviar,697
2,Abalone,699
3,Kingfish,707
4,Black Truffle,707
5,Salmon,711
6,Crab,719
7,Oyster,726
8,Lobster,754


# **3. Análisis del Embudo de Producto**

Usando una sola consulta SQL, crea una nueva tabla de salida que contenga los siguientes detalles:

- ¿Cuántas veces se visualizó cada producto?
- ¿Cuántas veces se añadió cada producto al carrito?
- ¿Cuántas veces se añadió cada producto al carrito pero no se compró (abandonado)?
- ¿Cuántas veces se compró cada producto?

In [91]:
query = '''
    WITH temporal_resume AS (
    SELECT
        e.visit_id,
        e.cookie_id,
        e.page_id,
        e.event_type,
        e.sequence_number,
        e.event_time,
        ei.nombre,
        ph.page_name,
        ph.product_category,
        ph.product_id,
        ph.page_name as product_name
    FROM
        events e
    JOIN event_identifier ei
    ON  e.event_type = ei.id
    JOIN page_hierarchy ph
    ON e.page_id = ph.page_id)


    SELECT 
        product_name,
        product_category,
        
        SUM((CASE WHEN nombre = 'Page View' THEN 1 ELSE 0 END )) AS total_vistar_por_producto,
        SUM((CASE WHEN nombre = 'Add to Cart' THEN 1 ELSE 0 END )) AS total_agregados_carro,
        SUM((CASE WHEN nombre = 'Add to Cart'  AND visit_id NOT IN (SELECT DISTINCT visit_id FROM temporal_resume WHERE nombre='Purchase') THEN 1 ELSE 0 END)) AS agregado_pero_no_comprados,
        SUM((CASE WHEN nombre = 'Add to Cart'  AND visit_id  IN (SELECT DISTINCT visit_id FROM temporal_resume WHERE nombre='Purchase') THEN 1 ELSE 0 END)) AS Comprados

    FROM temporal_resume

    GROUP BY 
        product_name, product_category

 



'''


pd.read_sql_query(query, con=engine)

2025-04-24 14:20:35,209 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 14:20:35,210 INFO sqlalchemy.engine.Engine 
    WITH temporal_resume AS (
    SELECT
        e.visit_id,
        e.cookie_id,
        e.page_id,
        e.event_type,
        e.sequence_number,
        e.event_time,
        ei.nombre,
        ph.page_name,
        ph.product_category,
        ph.product_id,
        ph.page_name as product_name
    FROM
        events e
    JOIN event_identifier ei
    ON  e.event_type = ei.id
    JOIN page_hierarchy ph
    ON e.page_id = ph.page_id)


    SELECT 
        product_name,
        product_category,
        
        SUM((CASE WHEN nombre = 'Page View' THEN 1 ELSE 0 END )) AS total_vistar_por_producto,
        SUM((CASE WHEN nombre = 'Add to Cart' THEN 1 ELSE 0 END )) AS total_agregados_carro,
        SUM((CASE WHEN nombre = 'Add to Cart'  AND visit_id NOT IN (SELECT DISTINCT visit_id FROM temporal_resume WHERE nombre='Purchase') THEN 1 ELSE 0 END)) AS agregado_

Unnamed: 0,product_name,product_category,total_vistar_por_producto,total_agregados_carro,agregado_pero_no_comprados,Comprados
0,Home Page,,1782.0,0.0,0.0,0.0
1,All Products,,3174.0,0.0,0.0,0.0
2,Russian Caviar,Luxury,1563.0,946.0,249.0,697.0
3,Lobster,Shellfish,1547.0,968.0,214.0,754.0
4,Crab,Shellfish,1564.0,949.0,230.0,719.0
5,Oyster,Shellfish,1568.0,943.0,217.0,726.0
6,Checkout,,2103.0,0.0,0.0,0.0
7,Confirmation,,0.0,0.0,0.0,0.0
8,Kingfish,Fish,1559.0,920.0,213.0,707.0
9,Tuna,Fish,1515.0,931.0,234.0,697.0


In [89]:
query = '''
    SELECT
       *
    FROM
        events e
    JOIN event_identifier ei
    ON  e.event_type = ei.id
    JOIN page_hierarchy ph
    ON e.page_id = ph.page_id

 



'''


pd.read_sql_query(query, con=engine)

2025-04-24 14:17:55,237 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-24 14:17:55,238 INFO sqlalchemy.engine.Engine 
    SELECT
       *
    FROM
        events e
    JOIN event_identifier ei
    ON  e.event_type = ei.id
    JOIN page_hierarchy ph
    ON e.page_id = ph.page_id

 




2025-04-24 14:17:55,238 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-24 14:17:55,920 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,id,visit_id,cookie_id,page_id,event_type,sequence_number,event_time,id.1,nombre,page_id.1,page_name,product_category,product_id
0,1,ccf365,c4ca42,1,1,1,2020-02-04 19:16:09,1,Page View,1,Home Page,,
1,2,ccf365,c4ca42,2,1,2,2020-02-04 19:16:17,1,Page View,2,All Products,,
2,3,ccf365,c4ca42,6,1,3,2020-02-04 19:16:58,1,Page View,6,Russian Caviar,Luxury,4.0
3,4,ccf365,c4ca42,9,1,4,2020-02-04 19:16:59,1,Page View,9,Lobster,Shellfish,7.0
4,5,ccf365,c4ca42,9,2,5,2020-02-04 19:17:52,2,Add to Cart,9,Lobster,Shellfish,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32729,32730,355a6a,87a4ba,10,1,15,2020-03-18 22:44:17,1,Page View,10,Crab,Shellfish,8.0
32730,32731,355a6a,87a4ba,11,1,16,2020-03-18 22:44:19,1,Page View,11,Oyster,Shellfish,9.0
32731,32732,355a6a,87a4ba,11,2,17,2020-03-18 22:45:13,2,Add to Cart,11,Oyster,Shellfish,9.0
32732,32733,355a6a,87a4ba,12,1,18,2020-03-18 22:45:54,1,Page View,12,Checkout,,
