# Notebooks para preparar los datos para entrenar al modelo.

In [1]:
import pandas as pd
import sqlite3

In [2]:
conn = sqlite3.connect('../database.db')

In [3]:
def query(sql):
    try:
        return pd.read_sql_query(sql, conn)
    except Exception as e:
        raise ValueError("❌ No se encuentra la tabla en la base de datos.") from e

In [4]:
tablas = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';", conn)
print(tablas)


# Obtener columnas de la tabla (otra opción)
info = pd.read_sql_query("PRAGMA table_info(mi_tabla);", conn)
print(info)  # tiene name, type, notnull, pk, etc.


            name
0          clima
1     calendario
2     provincias
3    localidades
4  departamentos
5     estaciones
6      contagios
Empty DataFrame
Columns: [cid, name, type, notnull, dflt_value, pk]
Index: []


In [9]:
tablas

Unnamed: 0,name
0,clima
1,calendario
2,provincias
3,localidades
4,departamentos
5,estaciones
6,contagios


In [17]:
for tabla in tablas.name:
    print('Tabla:', tabla)
    print(pd.read_sql_query(f'SELECT * FROM {tabla} LIMIT 10', con=conn))
    print('')

Tabla: clima
                        fecha id_estacion  precipitacion_pluviometrica  \
0  2018-01-01 00:00:00.000000     A872801                     0.000000   
1  2018-01-02 00:00:00.000000     A872801                     0.000000   
2  2018-01-03 00:00:00.000000     A872801                     0.000000   
3  2018-01-04 00:00:00.000000     A872801                     0.000000   
4  2018-01-05 00:00:00.000000     A872801                     0.589565   
5  2018-01-06 00:00:00.000000     A872801                     0.000000   
6  2018-01-07 00:00:00.000000     A872801                     0.000000   
7  2018-01-08 00:00:00.000000     A872801                     0.000000   
8  2018-01-09 00:00:00.000000     A872801                     0.000000   
9  2018-01-10 00:00:00.000000     A872801                     0.000000   

   temperatura_minima  temperatura_maxima  temperatura_media  humedad_media  \
0                15.9                29.4           22.30069           52.0   
1             

In [23]:
with open('../query.sql', 'r', encoding='utf-8') as f:
    sql = f.read()

In [24]:
sql

'WITH\n-- 0) Calendario semanal: lunes como inicio de semana\ncal_sem AS (\n  SELECT\n    fecha,\n    anio,\n    semana,\n    diaNumeroSemana,\n    DATE(fecha) AS semana_inicio\n  FROM calendario\n  WHERE diaNumeroSemana = 0          -- Lunes\n),\n\n-- 1) Clima diario normalizado\nclima_d AS (\n  SELECT\n    DATE(c.fecha)                       AS fecha,\n    c.id_estacion                       AS id_interno,   -- A872xxx\n    c.precipitacion_pluviometrica       AS precip,\n    c.temperatura_media                 AS t_media,\n    c.humedad_media                     AS h_media,\n    c.temperatura_minima                AS t_min,\n    c.temperatura_maxima                AS t_max\n  FROM clima c\n),\n\n-- 2) Mapeo estación -> departamento\nest_dep AS (\n  SELECT\n    e.id_interno,                       -- A872xxx\n    e.id_estacion,                      -- id numérico si lo necesitás\n    e.id_departamento\n  FROM estaciones e\n),\n\n-- 3) Clima semanal por departamento\nclima_sem AS (\n  S

In [25]:
pd.read_sql_query(sql, conn)

Unnamed: 0,semana_inicio,id_departamento,lag1_t_media,lag1_h_media,lag1_t_min,lag1_t_max,lag1_precip,roll4_t_media,roll4_precip,lag1_y,roll4_y,y
0,2020-04-27,14063,6.961538,82.346154,1.230769,12.692308,0.268413,6.961538,0.268413,1.0,1.0,1.0
1,2023-03-20,14063,28.150000,56.000000,19.800000,36.500000,0.000000,17.555769,0.268413,1.0,3.0,1.0
2,2023-03-27,14063,24.400000,78.000000,19.800000,29.000000,0.000000,26.275000,0.000000,1.0,3.0,1.0
3,2023-04-03,14063,20.700000,71.000000,12.800000,28.600000,0.000000,24.416667,0.000000,1.0,3.0,1.0
4,2023-04-10,14063,21.750000,74.000000,17.300000,26.200000,0.000000,22.283333,0.000000,1.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1180,2024-03-25,86168,28.279860,73.000000,23.400000,35.600000,0.037024,27.615740,0.037024,1.0,5.0,2.0
1181,2024-04-01,86168,26.412500,60.000000,18.900000,36.500000,0.000000,28.536343,0.037024,2.0,6.0,6.0
1182,2024-04-08,86168,19.375700,86.000000,17.200000,23.400000,0.119363,24.689353,0.156388,6.0,9.0,3.0
1183,2024-04-15,86168,17.430550,84.000000,11.400000,25.200000,0.000000,21.072917,0.119363,3.0,11.0,1.0
