In [15]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(os.path.join('..')))

In [16]:
from dotenv import load_dotenv
import os

load_dotenv()  # Carga las variables del archivo .env

config = {
    'host': os.getenv('HOST'),
    'user': os.getenv('USER'),
    'password': os.getenv('PASSWORD'),
    'database': os.getenv('DATABASE'),
    'port': os.getenv('PORT')
}

In [17]:
from sqlalchemy import create_engine, text
# Crear la URL de conexión para SQLAlchemy
db_url = f"mysql+pymysql://{config['user']}:{config['password']}@{config['host']}:{config['port']}/caso_6"

# Crear el motor de SQLAlchemy
engine = create_engine(db_url, echo=True)  # echo=True muestra las consultas SQL en la consola



In [18]:
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.types import DateTime
Base = declarative_base()


In [19]:
class EventIdentifier(Base):

    __tablename__ = 'event_identifier'
    __table_args__ = {'extend_existing': True} 

    id = Column(Integer, primary_key=True) 
    nombre = Column(String(50))  


class CampaignIdentifier(Base):

    __tablename__ = 'campaign_identifier'
    __table_args__= {'extend_existing': True}

    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    campaign_id = Column(Integer, primary_key=True)
    products = Column(String(3))
    campaign_name = Column(String(33))
    start_date = Column(DateTime)
    end_date = Column(DateTime)


class PageHierarchy(Base):
     
    __tablename__ = 'page_hierarchy'
    __table_args__ = {'extend_existing': True}

    # Columnas

    page_id = Column(Integer, primary_key=True)
    page_name = Column(String(14))
    product_category = Column(String(9))
    product_id = Column(Integer)



class Users(Base):

    __tablename__= 'users'
    __table_args__={'extend_existing': True}

    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    user_id = Column(Integer)
    cookie_id = Column(String(6))
    start_date = Column(DateTime)


class Events(Base):

    __tablename__= 'events'
    __table_args_= {'extend_existing': True}


    # Columnas
    id = Column(Integer, primary_key=True, autoincrement=True)  # nueva PK
    visit_id = Column(String(6))
    cookie_id = Column(String(6))
    page_id = Column(Integer)
    event_type =Column(Integer)
    sequence_number = Column(Integer)
    event_time = Column(DateTime)


# Crear la tabla en la base de datos
Base.metadata.create_all(engine)


2025-04-23 13:30:08,773 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2025-04-23 13:30:08,774 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,775 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2025-04-23 13:30:08,776 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,777 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2025-04-23 13:30:08,777 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,779 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:08,779 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`event_identifier`
2025-04-23 13:30:08,780 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,783 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`campaign_identifier`
2025-04-23 13:30:08,783 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,785 INFO sqlalchemy.engine.Engine DESCRIBE `caso_6`.`page_hierarchy`
2025-04-23 13:30:08,786 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:08,787

In [20]:
from utils import funciones_utiles

event_identifier_data = funciones_utiles.load_text_as_tuples('datos_tablas/event_identifier_data.txt')
campaign_identifier_data = funciones_utiles.load_text_as_tuples('datos_tablas/campaign_identifier_data.txt')
page_hierarchy_data =funciones_utiles.load_text_as_tuples('datos_tablas/page_hierarchy_data.txt')
users_data = funciones_utiles.load_text_as_tuples('datos_tablas/users_data.txt')
events_data = funciones_utiles.load_text_as_tuples('datos_tablas/events_data.txt')

In [21]:
event_identifier_data

[('1', 'Page View'),
 ('2', 'Add to Cart'),
 ('3', 'Purchase'),
 ('4', 'Ad Impression'),
 ('5', 'Ad Click')]

In [22]:
from sqlalchemy.orm import sessionmaker

# Crear la sesión
Session = sessionmaker(bind=engine)
session = Session()


In [23]:
from sqlalchemy.orm import sessionmaker



# Insertar los datos
for eid, name in event_identifier_data:
    
    eid = eid.strip('\'')
    name = name.strip('\'')

    print(eid)
    print(name)

    exists_query = session.query(
        session.query(EventIdentifier).filter_by(id=int(eid)).exists()
    ).scalar()

    if not exists_query:
        event = EventIdentifier(id=int(eid), nombre=name)
        session.add(event)


# Confirmar los cambios en la base de datos
session.commit()

# Cerrar sesión
session.close()

1
Page View
2025-04-23 13:30:09,537 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:09,539 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-23 13:30:09,540 INFO sqlalchemy.engine.Engine [generated in 0.00067s] {'id_1': 1}
2
Add to Cart
2025-04-23 13:30:09,543 INFO sqlalchemy.engine.Engine INSERT INTO event_identifier (id, nombre) VALUES (%(id)s, %(nombre)s)
2025-04-23 13:30:09,544 INFO sqlalchemy.engine.Engine [generated in 0.00084s] {'id': 1, 'nombre': 'Page View'}
2025-04-23 13:30:09,546 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-23 13:30:09,546 INFO sqlalchemy.engine.Engine [cached since 0.007423s ago] {'id_1': 2}
3
Purchase
2025-04-23 13:30:09,548 INFO sqlalchemy.engine.Engine INSERT INTO event_identifier (id, nombre) VALUES (%(id)s, %(nombre)s)
2025-04-23 13:30:09,549 INFO sqlalchemy.engine.E

In [24]:
   
for eid, name in event_identifier_data:
    
    eid = eid.strip('\'')
    name = name.strip('\'')

    print(eid)
    print(name)

    exists_query = session.query(
        session.query(EventIdentifier).filter_by(id=int(eid)).exists()
    ).scalar()

    if not exists_query:
        event = EventIdentifier(id=int(eid), nombre=name)
        session.add(event)


# Confirmar los cambios en la base de datos
session.commit()


1
Page View
2025-04-23 13:30:09,583 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:09,584 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-23 13:30:09,585 INFO sqlalchemy.engine.Engine [cached since 0.04627s ago] {'id_1': 1}
2
Add to Cart
2025-04-23 13:30:09,588 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-23 13:30:09,589 INFO sqlalchemy.engine.Engine [cached since 0.04992s ago] {'id_1': 2}
3
Purchase
2025-04-23 13:30:09,591 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS anon_1
2025-04-23 13:30:09,591 INFO sqlalchemy.engine.Engine [cached since 0.0524s ago] {'id_1': 3}
4
Ad Impression
2025-04-23 13:30:09,593 INFO sqlalchemy.engine.Engine SELECT EXISTS (SELECT 1 
FROM event_identifier 
WHERE event_identifier.id = %(id_1)s) AS an

In [25]:
campaign_identifier_data

[('1', '1-3', 'BOGOF - Fishing For Compliments', '2020-01-01', '2020-01-14'),
 ('2', '4-5', '25% Off - Living The Lux Life', '2020-01-15', '2020-01-28'),
 ('3', '6-8', 'Half Off - Treat Your Shellf(ish)', '2020-02-01', '2020-03-31')]

In [26]:
from datetime import datetime

objects = [
    CampaignIdentifier(
        campaign_id=int(cid),
        products=prod,
        campaign_name=name,
        start_date=datetime.strptime(start, '%Y-%m-%d'),
        end_date=datetime.strptime(end, '%Y-%m-%d')
    )
    for cid, prod, name, start, end in campaign_identifier_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-23 13:30:09,629 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:09,631 INFO sqlalchemy.engine.Engine INSERT INTO campaign_identifier (campaign_id, products, campaign_name, start_date, end_date) VALUES (%(campaign_id)s, %(products)s, %(campaign_name)s, %(start_date)s, %(end_date)s)
2025-04-23 13:30:09,632 INFO sqlalchemy.engine.Engine [generated in 0.00095s] [{'campaign_id': 1, 'products': '1-3', 'campaign_name': 'BOGOF - Fishing For Compliments', 'start_date': datetime.datetime(2020, 1, 1, 0, 0), 'end_date': datetime.datetime(2020, 1, 14, 0, 0)}, {'campaign_id': 2, 'products': '4-5', 'campaign_name': '25% Off - Living The Lux Life', 'start_date': datetime.datetime(2020, 1, 15, 0, 0), 'end_date': datetime.datetime(2020, 1, 28, 0, 0)}, {'campaign_id': 3, 'products': '6-8', 'campaign_name': 'Half Off - Treat Your Shellf(ish)', 'start_date': datetime.datetime(2020, 2, 1, 0, 0), 'end_date': datetime.datetime(2020, 3, 31, 0, 0)}]
2025-04-23 13:30:09,636 INFO sqlalchem

In [27]:
page_hierarchy_data

[('1', 'Home Page', None, None),
 ('2', 'All Products', None, None),
 ('3', 'Salmon', 'Fish', '1'),
 ('4', 'Kingfish', 'Fish', '2'),
 ('5', 'Tuna', 'Fish', '3'),
 ('6', 'Russian Caviar', 'Luxury', '4'),
 ('7', 'Black Truffle', 'Luxury', '5'),
 ('8', 'Abalone', 'Shellfish', '6'),
 ('9', 'Lobster', 'Shellfish', '7'),
 ('10', 'Crab', 'Shellfish', '8'),
 ('11', 'Oyster', 'Shellfish', '9'),
 ('12', 'Checkout', None, None),
 ('13', 'Confirmation', None, None)]

In [28]:
from datetime import datetime

objects = [
    PageHierarchy(
        page_id=int(page_id),
        page_name=page_name,
        product_category=product_category,
        product_id=int(product_id) if product_id is not None else None,
    )
    for page_id, page_name, product_category, product_id in page_hierarchy_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-23 13:30:09,675 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:09,677 INFO sqlalchemy.engine.Engine INSERT INTO page_hierarchy (page_id, page_name) VALUES (%(page_id)s, %(page_name)s)
2025-04-23 13:30:09,679 INFO sqlalchemy.engine.Engine [generated in 0.00152s] [{'page_id': 1, 'page_name': 'Home Page'}, {'page_id': 2, 'page_name': 'All Products'}]
2025-04-23 13:30:09,681 INFO sqlalchemy.engine.Engine INSERT INTO page_hierarchy (page_id, page_name, product_category, product_id) VALUES (%(page_id)s, %(page_name)s, %(product_category)s, %(product_id)s)
2025-04-23 13:30:09,682 INFO sqlalchemy.engine.Engine [generated in 0.00069s] [{'page_id': 3, 'page_name': 'Salmon', 'product_category': 'Fish', 'product_id': 1}, {'page_id': 4, 'page_name': 'Kingfish', 'product_category': 'Fish', 'product_id': 2}, {'page_id': 5, 'page_name': 'Tuna', 'product_category': 'Fish', 'product_id': 3}, {'page_id': 6, 'page_name': 'Russian Caviar', 'product_category': 'Luxury', 'product_id'

In [29]:
from datetime import datetime

objects = [
    Users(
        user_id=int(user_id),
        cookie_id=cookie_id,
        start_date=datetime.strptime(start_date, '%Y-%m-%d')
    )
    for user_id, cookie_id, start_date in users_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-23 13:30:09,759 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:09,768 INFO sqlalchemy.engine.Engine INSERT INTO users (user_id, cookie_id, start_date) VALUES (%(user_id)s, %(cookie_id)s, %(start_date)s)
2025-04-23 13:30:09,769 INFO sqlalchemy.engine.Engine [generated in 0.00430s] [{'user_id': 1, 'cookie_id': 'c4ca42', 'start_date': datetime.datetime(2020, 2, 4, 0, 0)}, {'user_id': 2, 'cookie_id': 'c81e72', 'start_date': datetime.datetime(2020, 1, 18, 0, 0)}, {'user_id': 3, 'cookie_id': 'eccbc8', 'start_date': datetime.datetime(2020, 2, 21, 0, 0)}, {'user_id': 4, 'cookie_id': 'a87ff6', 'start_date': datetime.datetime(2020, 2, 22, 0, 0)}, {'user_id': 5, 'cookie_id': 'e4da3b', 'start_date': datetime.datetime(2020, 2, 1, 0, 0)}, {'user_id': 6, 'cookie_id': '167909', 'start_date': datetime.datetime(2020, 1, 25, 0, 0)}, {'user_id': 7, 'cookie_id': '8f14e4', 'start_date': datetime.datetime(2020, 2, 9, 0, 0)}, {'user_id': 8, 'cookie_id': 'c9f0f8', 'start_date': datetim

In [30]:
from datetime import datetime

objects = [
    Events(
        visit_id=visit_id,
        cookie_id=cookie_id,
        page_id=int(page_id),
        event_type = event_type,
        sequence_number = sequence_number,
        event_time =  event_time   
    )
    for visit_id, cookie_id, page_id, event_type, sequence_number, event_time in events_data
]

session.bulk_save_objects(objects)
session.commit()

2025-04-23 13:30:10,527 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:10,857 INFO sqlalchemy.engine.Engine INSERT INTO events (visit_id, cookie_id, page_id, event_type, sequence_number, event_time) VALUES (%(visit_id)s, %(cookie_id)s, %(page_id)s, %(event_type)s, %(sequence_number)s, %(event_time)s)
2025-04-23 13:30:10,858 INFO sqlalchemy.engine.Engine [generated in 0.08896s] [{'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 1, 'event_type': '1', 'sequence_number': '1', 'event_time': '2020-02-04 19:16:09.182546'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 2, 'event_type': '1', 'sequence_number': '2', 'event_time': '2020-02-04 19:16:17.358191'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 6, 'event_type': '1', 'sequence_number': '3', 'event_time': '2020-02-04 19:16:58.454669'}, {'visit_id': 'ccf365', 'cookie_id': 'c4ca42', 'page_id': 9, 'event_type': '1', 'sequence_number': '4', 'event_time': '2020-02-04 19:16:58.609142'}, {'visit_id

<center><h1>PREGUNTAS</h1></center>

# **1. Cuantos usuarios hay ?**

In [31]:
query = '''

    SELECT
        COUNT(DISTINCT user_id) AS total_usuarios
    FROM
        users

'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,652 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,653 INFO sqlalchemy.engine.Engine 

    SELECT
        COUNT(DISTINCT user_id) AS total_usuarios
    FROM
        users


2025-04-23 13:30:11,654 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:11,668 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,total_usuarios
0,500


- Existe un total de 500 usuarios unicos

# **2. Cuantos cookies en promedio existen por usuario ?**

In [32]:
query = '''
    WITH cuenta_por_usuario AS (
    SELECT
        user_id,
        COUNT(cookie_id) as total_cookies
    FROM
        users
    GROUP BY
        user_id )


    SELECT 
        AVG(total_cookies) AS Promedio_cookies_por_usuario
    FROM
        cuenta_por_usuario

'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,699 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,700 INFO sqlalchemy.engine.Engine 
    WITH cuenta_por_usuario AS (
    SELECT
        user_id,
        COUNT(cookie_id) as total_cookies
    FROM
        users
    GROUP BY
        user_id )


    SELECT 
        AVG(total_cookies) AS Promedio_cookies_por_usuario
    FROM
        cuenta_por_usuario


2025-04-23 13:30:11,701 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:11,707 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,Promedio_cookies_por_usuario
0,3.564


# **3. Cúal es el número de visitas unicas de todos los usuarios por mes?**



In [33]:
query = '''
    SELECT
        EXTRACT(MONTH FROM start_date) AS mes,
       COUNT(DISTINCT  visit_id) AS total_usuarios_por_mes
    FROM
        users u
    JOIN 
        events  e
     ON u.cookie_id = e.cookie_id
    GROUP BY 
        mes



'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,731 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,732 INFO sqlalchemy.engine.Engine 
    SELECT
        EXTRACT(MONTH FROM start_date) AS mes,
       COUNT(DISTINCT  visit_id) AS total_usuarios_por_mes
    FROM
        users u
    JOIN 
        events  e
     ON u.cookie_id = e.cookie_id
    GROUP BY 
        mes




2025-04-23 13:30:11,733 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:11,804 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,mes,total_usuarios_por_mes
0,1,876
1,2,1488
2,3,916
3,4,248
4,5,36



# **4. Cúal es el numero de eventos de cada tipo?**




In [34]:
query = '''
    SELECT
        i.nombre,
        COUNT(*) AS cantidad_de_eventos
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    GROUP BY
        i.nombre
  


'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,824 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,826 INFO sqlalchemy.engine.Engine 
    SELECT
        i.nombre,
        COUNT(*) AS cantidad_de_eventos
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    GROUP BY
        i.nombre
  



2025-04-23 13:30:11,826 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:11,885 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,nombre,cantidad_de_eventos
0,Page View,20928
1,Add to Cart,8451
2,Purchase,1777
3,Ad Impression,876
4,Ad Click,702


# **5. Cúal es el procentaje de visitas que tiene un evento de purchase ?**



In [35]:
query = '''
    
   
    SELECT
        100 * COUNT(DISTINCT e.visit_id)/(SELECT COUNT(DISTINCT visit_id) FROM events) AS porcentaje_compras
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    WHERE i.nombre = 'Purchase'

  
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,901 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,902 INFO sqlalchemy.engine.Engine 
    
   
    SELECT
        100 * COUNT(DISTINCT e.visit_id)/(SELECT COUNT(DISTINCT visit_id) FROM events) AS porcentaje_compras
    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
    WHERE i.nombre = 'Purchase'

  
  
  



2025-04-23 13:30:11,903 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:11,943 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,porcentaje_compras
0,49.8597


# **6. Cual es el procentaje de vistitantes que vieron la pagina de checkout pero no compraron (purchase event)**




In [36]:
query = '''
    
    WITH tabla_resumen AS (
    SELECT
        e.visit_id,
        SUM((CASE WHEN i.nombre = 'Page View' AND ph.page_name = 'Checkout' THEN 1 ELSE 0 END)) AS checkout,
        SUM((CASE WHEN i.nombre = 'Purchase' THEN 1 ELSE 0 END)) AS purchase


    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
       
    JOIN 
        page_hierarchy ph
    
     ON e.page_id = ph.page_id
    GROUP BY
        e.visit_id )

    
    SELECT
        ROUND(100 * (1-(SUM(purchase)/SUM(checkout))),2) AS percentage_checkout_view_with_no_purchase
    FROM
        tabla_resumen
  
  
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:30:11,964 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:30:11,965 INFO sqlalchemy.engine.Engine 
    
    WITH tabla_resumen AS (
    SELECT
        e.visit_id,
        SUM((CASE WHEN i.nombre = 'Page View' AND ph.page_name = 'Checkout' THEN 1 ELSE 0 END)) AS checkout,
        SUM((CASE WHEN i.nombre = 'Purchase' THEN 1 ELSE 0 END)) AS purchase


    FROM
        events  e
    JOIN 
        event_identifier i
    
     ON e.event_type = i.id
       
    JOIN 
        page_hierarchy ph
    
     ON e.page_id = ph.page_id
    GROUP BY
        e.visit_id )

    
    SELECT
        ROUND(100 * (1-(SUM(purchase)/SUM(checkout))),2) AS percentage_checkout_view_with_no_purchase
    FROM
        tabla_resumen
  
  
  
  



2025-04-23 13:30:11,966 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:30:12,063 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,percentage_checkout_view_with_no_purchase
0,15.5


# **7. Cuales son los tops 3 paginas por numero de visitas?**






In [47]:
query = '''
    
   

    SELECT
        ph.page_name,
        COUNT(visit_id) AS total_visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    WHERE e.event_type = 1
    GROUP BY 
        ph.page_name
    ORDER BY
        total_visitas DESC
  
  


'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:39:18,311 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:39:18,313 INFO sqlalchemy.engine.Engine 
    
   

    SELECT
        ph.page_name,
        COUNT(visit_id) AS total_visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    WHERE e.event_type = 1
    GROUP BY 
        ph.page_name
    ORDER BY
        total_visitas DESC
  
  



2025-04-23 13:39:18,313 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:39:18,367 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,page_name,total_visitas
0,All Products,3174
1,Checkout,2103
2,Home Page,1782
3,Oyster,1568
4,Crab,1564
5,Russian Caviar,1563
6,Kingfish,1559
7,Salmon,1559
8,Lobster,1547
9,Abalone,1525


# **8. ¿Cuál es el número de visualizaciones y agregaciones al carrito para cada categoría de producto?**



In [61]:
query = '''
    
   

    SELECT
        ph.product_category,
        SUM(CASE WHEN ei.nombre='Add to Cart' THEN 1 ELSE 0 END) AS carrito,
        SUM(CASE WHEN ei.nombre='Page View' THEN 1 ELSE 0 END) AS visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    JOIN 
        event_identifier ei
    ON e.event_type = ei.id
    WHERE ph.product_category IS NOT NULL
    GROUP BY
        ph.product_category
    ORDER BY
        visitas DESC
  


'''


pd.read_sql_query(query, con=engine)

2025-04-23 13:49:59,149 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 13:49:59,150 INFO sqlalchemy.engine.Engine 
    
   

    SELECT
        ph.product_category,
        SUM(CASE WHEN ei.nombre='Add to Cart' THEN 1 ELSE 0 END) AS carrito,
        SUM(CASE WHEN ei.nombre='Page View' THEN 1 ELSE 0 END) AS visitas
    FROM
        events e
    JOIN 
        page_hierarchy ph
    ON e.page_id = ph.page_id
    JOIN 
        event_identifier ei
    ON e.event_type = ei.id
    WHERE ph.product_category IS NOT NULL
    GROUP BY
        ph.product_category
    ORDER BY
        visitas DESC
  



2025-04-23 13:49:59,151 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 13:49:59,225 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,product_category,carrito,visitas
0,Shellfish,3792.0,6204.0
1,Fish,2789.0,4633.0
2,Luxury,1870.0,3032.0


# **9. ¿Cuáles son los 3 productos principales por compras?**



In [104]:
query = '''
    
    -- Obtener todas las ids que fueron compras

    WITH all_ids_purchase AS (
    SELECT
        DISTINCT visit_id
    FROM
        events
    WHERE
        event_type = 3 ),

    -- Filtrar la tabla evento por todas aquellas ids donde hubo compras

    purchase_table AS (

    SELECT
        *
    FROM
        events
    WHERE
        visit_id IN (SELECT * FROM all_ids_purchase) ) 

    
    -- Una vez filtrado agrupamos por producto y contamos

    SELECT
        ph.page_name,
        COUNT(*) AS total_compras
    FROM
        purchase_table pt
    JOIN
       page_hierarchy ph
    ON pt.page_id = ph.page_id
    WHERE
        event_type = 2
    GROUP BY
        ph.page_name
    ORDER BY
        total_compras
    
    

'''


pd.read_sql_query(query, con=engine)

2025-04-23 14:28:16,998 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-23 14:28:16,999 INFO sqlalchemy.engine.Engine 
    
    -- Obtener todas las ids que fueron compras

    WITH all_ids_purchase AS (
    SELECT
        DISTINCT visit_id
    FROM
        events
    WHERE
        event_type = 3 ),

    -- Filtrar la tabla evento por todas aquellas ids donde hubo compras

    purchase_table AS (

    SELECT
        *
    FROM
        events
    WHERE
        visit_id IN (SELECT * FROM all_ids_purchase) ) 

    
    -- Una vez filtrado agrupamos por producto y contamos

    SELECT
        ph.page_name,
        COUNT(*) AS total_compras
    FROM
        purchase_table pt
    JOIN
       page_hierarchy ph
    ON pt.page_id = ph.page_id
    WHERE
        event_type = 2
    GROUP BY
        ph.page_name
    ORDER BY
        total_compras
    
    


2025-04-23 14:28:16,999 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-23 14:28:17,044 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,page_name,total_compras
0,Tuna,697
1,Russian Caviar,697
2,Abalone,699
3,Kingfish,707
4,Black Truffle,707
5,Salmon,711
6,Crab,719
7,Oyster,726
8,Lobster,754
