In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("market_place.db")

cursor = conn.cursor()

In [None]:
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS aisles (
        aisle_id INT PRIMARY KEY,
        aisle_name TEXT NOT NULL
    )
    """
)

cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS departments (
        department_id INT PRIMARY KEY,
        department_name TEXT NOT NULL
    )
    """
)

cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS products (
        product_id INT PRIMARY KEY,
        product_name TEXT NOT NULL,
        aisle_id INT NOT NULL,
        department_id INT NOT NULL
        FOREIGN KEY (aisle_id) REFERENCES aisles(aisle_id),
        FOREIGN KEY (department_id) REFERENCES departments(department_id)
    )
    """
)

cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS orders (
        order_id INT NOT NULL,
        user_id INT NOT NULL,
        product_id INT NOT NULL,
        add_to_cart_order INT NOT NULL,
        reordered INT NOT NULL,
        order_number INT NOT NULL,
        order_dow INT NOT NULL,
        order_hour_of_day INT NOT NULL,
        days_since_prio_order INT NOT NULL
        PRIMARY KEY (order_id,user_id,product_id)
        FOREIGN KEY (product_id) REFERENCES products(product_id)
    )
    """
)

conn.commit()

In [2]:
aisles = pd.read_csv(r"data\aisles.csv", sep=",")
departments = pd.read_csv(r"data\departments.csv", sep=",")
products = pd.read_csv(r"data\products.csv", sep=",")
orders = pd.read_csv(r"data\merged_orders.csv", sep=",")

print(aisles.isna().sum(), "\n")
print(departments.isna().sum(), "\n")
print(products.isna().sum(), "\n")
print(orders.isna().sum(), "\n")

aisle_id    0
aisle       0
dtype: int64 

department_id    0
department       0
dtype: int64 

product_id       0
product_name     0
aisle_id         0
department_id    0
dtype: int64 

order_id                        0
user_id                         0
eval_set                        0
order_number                    0
order_dow                       0
order_hour_of_day               0
days_since_prior_order    2078068
product_id                      0
add_to_cart_order               0
reordered                       0
dtype: int64 



In [3]:
# Calcular la media de 'days_since_prior_order' por usuario
mean_days_per_user = orders.groupby('user_id')['days_since_prior_order'].transform('mean')

# Reemplazar los valores NaN con la media de cada usuario
orders['days_since_prior_order'] = orders['days_since_prior_order'].fillna(mean_days_per_user).astype(int)

orders.to_csv("data\orders_not_nulls.csv", index=False)