## Creación de una base de datos unificada.

![Esquema de la base de datos](../img/database_schema.png)

In [1]:
import sqlite3
from sqlalchemy import create_engine
import pandas as pd

Creo el motor de base de datos.

In [2]:
engine = create_engine('sqlite:///../database.db', echo=True)

In [3]:
def crear_tabla(nombre, data: pd.DataFrame):
    try:
        data.to_sql(name=nombre, con=engine, index=False, if_exists='replace')
        print(f'Tabla "{nombre}" creada con exito.')
    except:
        raise "Error al crear la tabla."

# Tablas de hecho.

### Tabla `clima`.

In [4]:
df_clima = pd.read_parquet('../data/datos_climaticos.parquet')

In [5]:
df_clima = df_clima.loc[df_clima.fecha > "2017-12-31"].reset_index(drop=True)

In [6]:
df_clima['anio'] = df_clima.fecha.dt.year
df_clima['semana'] = df_clima.fecha.dt.isocalendar().week

df_clima.sample(10)

Unnamed: 0,fecha,id_estacion,precipitacion_pluviometrica,temperatura_minima,temperatura_maxima,temperatura_media,humedad_media,rocio_medio,tesion_vapor_media,radiacion_global,heliofania_efectiva,heliofania_relativa,anio,semana
67069,2021-11-15,NH0067,0.509441,19.0,30.5,24.75,70.0,21.729944,26.062143,27.526443,11.5,85.19,2021,46
96433,2018-01-11,NH0444,0.0,23.3,33.8,28.2,74.0,23.3,28.5,15.546337,6.3,43.0,2018,2
56883,2020-10-16,A872962,0.0,49.6,59.4,54.5,25.071429,47.971759,38.552996,18.13374,6.850163,53.0,2020,42
106019,2025-01-05,NH0492,0.0,17.3,30.0,23.65,53.0,13.587247,15.608695,25.464422,10.1,70.88,2025,1
57288,2022-03-30,A872962,0.053614,10.8,27.0,15.84306,59.0,4.95565,8.865591,13.06386,5.065873,43.0,2022,13
63078,2018-09-04,A872999,0.035856,21.458824,21.929412,21.672078,64.503546,16.175342,16.721858,17.4336,5.416667,90.1,2018,36
99135,2020-01-08,NH0446,0.290173,18.9,34.114306,26.650002,78.0,19.118736,22.624826,22.464157,8.5,58.82,2020,2
3629,2021-07-30,A872810,0.0,2.0,18.1,8.746528,61.0,2.399536,7.290768,8.146801,4.593981,45.0,2021,30
44990,2023-01-16,A872954,0.0,22.264693,41.970407,32.558272,36.0,24.602372,17.670433,21.598926,7.824514,57.421053,2023,3
111693,2023-05-29,NH0550,0.0,12.1,15.5,13.8,88.0,11.914408,13.946519,4.112551,0.0,0.0,2023,22


In [7]:
crear_tabla('clima', df_clima)

2025-10-24 19:31:42,673 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:42,682 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("clima")
2025-10-24 19:31:42,683 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:42,686 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("clima")
2025-10-24 19:31:42,687 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:42,689 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:42,691 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:42,693 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:42,693 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:42,695 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("clima")
2025-10-24 19:31:42,695 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:4

### Tabla `contagios`

In [8]:
df_contagios = pd.read_csv('../data/dengue-final.csv')
df_contagios.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68126 entries, 0 to 68125
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id_uta                   68126 non-null  int64  
 1   departamento_nombre      68126 non-null  object 
 2   provincia_nombre         68126 non-null  object 
 3   ano                      68126 non-null  int64  
 4   semanas_epidemiologicas  68126 non-null  float64
 5   grupo_edad_id            68126 non-null  int64  
 6   grupo_edad_desc          68126 non-null  object 
 7   cantidad_casos           68126 non-null  float64
 8   poblacion                68126 non-null  int64  
dtypes: float64(2), int64(4), object(3)
memory usage: 4.7+ MB


In [9]:
df_contagios.head()

Unnamed: 0,id_uta,departamento_nombre,provincia_nombre,ano,semanas_epidemiologicas,grupo_edad_id,grupo_edad_desc,cantidad_casos,poblacion
0,2003,comuna 3,ciudad de buenos aires,2018,15.0,6,de 15 a 19 anos,1.0,192945
1,2004,comuna 4,ciudad de buenos aires,2018,6.0,6,de 15 a 19 anos,1.0,239279
2,6091,berazategui,buenos aires,2018,11.0,7,de 20 a 24 anos,1.0,358262
3,6091,berazategui,buenos aires,2018,14.0,9,de 35 a 44 anos,2.0,358262
4,6091,berazategui,buenos aires,2018,19.0,11,mayores de 65 anos,1.0,358262


In [10]:
crear_tabla('contagios', df_contagios)

2025-10-24 19:31:46,107 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:46,115 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("contagios")
2025-10-24 19:31:46,116 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:46,117 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("contagios")
2025-10-24 19:31:46,118 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:46,119 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:46,120 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:46,122 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:46,124 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:46,126 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("contagios")
2025-10-24 19:31:46,126 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-1

# Tablas de dimensiones.

### Tabla `calendario`.

In [11]:
df_calendario = pd.read_csv('../data/calendario.csv')
df_calendario.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24004 entries, 0 to 24003
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   fecha            24004 non-null  object
 1   anio             24004 non-null  int64 
 2   mes              24004 non-null  int64 
 3   dia              24004 non-null  int64 
 4   trimestre        24004 non-null  int64 
 5   semestre         24004 non-null  int64 
 6   quincena         24004 non-null  int64 
 7   semanaMes        24004 non-null  int64 
 8   semana           24004 non-null  int64 
 9   diaSemana        24004 non-null  object
 10  diaNumeroSemana  24004 non-null  int64 
 11  bisiesto         24004 non-null  bool  
dtypes: bool(1), int64(9), object(2)
memory usage: 2.0+ MB


In [12]:
df_calendario

Unnamed: 0,fecha,anio,mes,dia,trimestre,semestre,quincena,semanaMes,semana,diaSemana,diaNumeroSemana,bisiesto
0,1960-01-01,1960,1,1,1,1,1,1,53,Viernes,4,True
1,1960-01-02,1960,1,2,1,1,1,1,53,Sábado,5,True
2,1960-01-03,1960,1,3,1,1,1,1,53,Domingo,6,True
3,1960-01-04,1960,1,4,1,1,1,1,1,Lunes,0,True
4,1960-01-05,1960,1,5,1,1,1,1,1,Martes,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...
23999,2025-09-15,2025,9,15,3,2,1,3,38,Lunes,0,False
24000,2025-09-16,2025,9,16,3,2,2,3,38,Martes,1,False
24001,2025-09-17,2025,9,17,3,2,2,3,38,Miércoles,2,False
24002,2025-09-18,2025,9,18,3,2,2,3,38,Jueves,3,False


In [13]:
crear_tabla('calendario', df_calendario)

2025-10-24 19:31:47,630 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:47,644 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calendario")
2025-10-24 19:31:47,647 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:47,650 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("calendario")
2025-10-24 19:31:47,651 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:47,654 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:47,655 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:47,658 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:47,660 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:47,663 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("calendario")
2025-10-24 19:31:47,665 INFO sqlalchemy.engine.Engine [raw sql] ()
202

### Tabla `ubicacion`

In [14]:
df_estaciones = pd.read_csv('../data/estaciones-meteorologicas-inta.csv')
df_departamentos = pd.read_csv('../data/departamentos_con_estacion.csv')

In [15]:
lista_provincias = df_contagios.provincia_nombre.apply(lambda x: ' '.join(p.capitalize() for p in x.split(' '))).unique()
print(lista_provincias)

['Ciudad De Buenos Aires' 'Buenos Aires' 'Mendoza' 'Santa Fe' 'Rio Negro'
 'Cordoba' 'Corrientes' 'Formosa' 'Chaco' 'La Rioja' 'San Luis'
 'Entre Rios' 'Misiones' 'Salta' 'Santiago Del Estero' 'Tucuman' 'Jujuy'
 'Catamarca' 'Neuquen' 'Chubut' 'La Pampa' 'San Juan' 'Santa Cruz'
 'Tierra Del Fuego Antartida E Islas Del Atlantico Sur']


In [16]:
df_provincias = pd.DataFrame({'provincia': lista_provincias})
df_provincias

Unnamed: 0,provincia
0,Ciudad De Buenos Aires
1,Buenos Aires
2,Mendoza
3,Santa Fe
4,Rio Negro
5,Cordoba
6,Corrientes
7,Formosa
8,Chaco
9,La Rioja


In [17]:
mapeo_renombrar = {
    'Cordoba': 'Córdoba', 
    'Tierra Del Fuego Antartida E Islas Del Atlantico Sur': 'Tierra del Fuego',
    'Ciudad De Buenos Aires': 'CABA',
    'Tucuman': 'Tucumán',
    'Neuquen': 'Neuquén',
    'Entre Rios': 'Entre Ríos',
    'Rio Negro': 'Río Negro'
}

df_provincias = df_provincias.replace(mapeo_renombrar).sort_values('provincia').reset_index(drop=True)
df_provincias

Unnamed: 0,provincia
0,Buenos Aires
1,CABA
2,Catamarca
3,Chaco
4,Chubut
5,Corrientes
6,Córdoba
7,Entre Ríos
8,Formosa
9,Jujuy


In [18]:
df_provincias['id_provincia'] = range(len(df_provincias))
df_provincias

Unnamed: 0,provincia,id_provincia
0,Buenos Aires,0
1,CABA,1
2,Catamarca,2
3,Chaco,3
4,Chubut,4
5,Corrientes,5
6,Córdoba,6
7,Entre Ríos,7
8,Formosa,8
9,Jujuy,9


In [19]:
df_provincias.provincia = df_provincias.provincia.str.upper()
df_provincias.head()

Unnamed: 0,provincia,id_provincia
0,BUENOS AIRES,0
1,CABA,1
2,CATAMARCA,2
3,CHACO,3
4,CHUBUT,4


In [20]:
crear_tabla('provincias', df_provincias)

2025-10-24 19:31:48,623 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:48,626 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("provincias")
2025-10-24 19:31:48,628 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:48,630 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("provincias")
2025-10-24 19:31:48,632 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:48,633 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:48,636 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:48,638 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:48,639 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:48,640 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("provincias")
2025-10-24 19:31:48,642 INFO sqlalchemy.engine.Engine [raw sql] ()
202

### Tabla `localidades`

In [21]:
df_estaciones.head()

Unnamed: 0,Id,Nombre,Tipo,Localidad,Provincia,Latitud,Longitud,Altura,Id Interno,Ubicacion,Desde,Hasta
0,413,25 de Mayo - EEA Pergamino,Nimbus THP,25 de Mayo,Buenos Aires,-35.48,-60.13,85,A872872,CC 18 CP6660,04/09/2012,13/09/2025
1,682,5000,Nimbus THP,Hurlingham,Buenos Aires,-85.0,-55.0,0,A875000,-,20/11/2019,13/09/2025
2,683,5001,Nimbus THP,Hurlingham,Buenos Aires,-85.0,-60.0,0,A875001,-,20/11/2019,13/09/2025
3,704,993 - LAB,Nimbus THP,loc_prueba,Sin asignar,-84.0,-70.0,0,A872993,LABORATORIO,08/03/2022,26/08/2024
4,705,994 - LAB,Nimbus THP,loc_prueba,Sin asignar,-84.0,-65.0,0,A872994,LABORATORIO,01/01/2012,28/08/2025


In [22]:
df_departamentos.head()

Unnamed: 0,departamento_id,departamento_nombre,lat_dep,lon_dep,estacion_id,estacion_id_interno,estacion_nombre,lat_est,lon_est,distancia_km
0,82105,san jeronimo,-32.239935,-61.231266,297,A872868,Las Rosas - EEA Oliveros,-32.49,-61.57,42.253384
1,38112,yavi,-22.329812,-65.825982,516,A872923,Abra Pampa - EEA Abra Pampa,-22.8,-65.83,52.069337
2,62035,el cuy,-39.866718,-68.703438,519,A872947,Plottier - IPAF Patagonia,-38.95,-68.33,106.737771
3,6623,pergamino,-33.589305,-60.772765,236,A872814,Alfonso - EEA Pergamino,-33.91,-60.84,36.112224
4,6868,villa gesell,-37.336673,-57.032501,368,A872833,Las Armas - EEA Cuenca Salado,-37.09,-57.87,79.218609


In [23]:
df_departamentos.departamento_nombre = df_departamentos.departamento_nombre.apply(lambda x: ' '.join(list(p.capitalize() for p in x.split(' '))))

In [24]:
merged = pd.merge(
    df_departamentos,
    df_estaciones,
    left_on='estacion_id_interno',
    right_on='Id Interno')

In [25]:
merged.isna().sum()

departamento_id         0
departamento_nombre     0
lat_dep                 0
lon_dep                 0
estacion_id             0
estacion_id_interno     0
estacion_nombre         0
lat_est                 0
lon_est                 0
distancia_km            0
Id                      0
Nombre                  0
Tipo                    0
Localidad               0
Provincia               0
Latitud                 0
Longitud                0
Altura                  0
Id Interno              0
Ubicacion              27
Desde                   0
Hasta                   0
dtype: int64

In [26]:
merged_mini = merged[[
    'departamento_id',
    'departamento_nombre',
    'Localidad'
]]

In [27]:
len(merged_mini.departamento_nombre.unique())

458

In [28]:
len(merged_mini.Localidad.unique())


142

In [29]:
df_localidades = df_estaciones[['Localidad', 'Provincia', 'Latitud', 'Longitud']]
df_localidades

Unnamed: 0,Localidad,Provincia,Latitud,Longitud
0,25 de Mayo,Buenos Aires,-35.48,-60.13
1,Hurlingham,Buenos Aires,-85.00,-55.00
2,Hurlingham,Buenos Aires,-85.00,-60.00
3,loc_prueba,Sin asignar,-84.00,-70.00
4,loc_prueba,Sin asignar,-84.00,-65.00
...,...,...,...,...
164,Manfredi,Córdoba,-31.94,-65.22
165,Villa Mercedes,San Luis,-33.66,-65.41
166,Villa Paranacito,Entre Rios,-33.71,-58.65
167,Villa Ramallo,Buenos Aires,-33.52,-60.11


In [30]:
# columnas a minusculas
df_localidades.columns = list(col.lower().strip() for col in df_localidades.columns)

In [31]:
# eliminado de duplicados
df_localidades = df_localidades.drop_duplicates(subset=['localidad', 'provincia'])

In [32]:
# eliminado de filas
df_localidades = df_localidades.drop(df_localidades.loc[df_localidades.provincia == 'Sin asignar'].index)

In [33]:
# ordeno y asignos ids
df_localidades = df_localidades.sort_values(by=['provincia', 'localidad']).reset_index(drop=True)

In [34]:
df_localidades.localidad = df_localidades.localidad.str.upper()
df_localidades.provincia = df_localidades.provincia.str.upper()

In [35]:
df_localidades['id_localidad'] = range(len(df_localidades))
df_localidades

Unnamed: 0,localidad,provincia,latitud,longitud,id_localidad
0,25 DE MAYO,BUENOS AIRES,-35.48,-60.13,0
1,ARRECIFES,BUENOS AIRES,-34.05,-60.14,1
2,BALCARCE,BUENOS AIRES,-37.76,-58.30,2
3,BORDENAVE,BUENOS AIRES,-37.75,-63.08,3
4,CHASCOMUS,BUENOS AIRES,-35.74,-58.05,4
...,...,...,...,...,...
140,SACHAYOJ,SANTIAGO DEL ESTERO,-26.46,-61.81,140
141,SILIPICA,SANTIAGO DEL ESTERO,-28.02,-64.23,141
142,VACA HUAÑUNA,SANTIAGO DEL ESTERO,-27.47,-63.47,142
143,SAN SEBASTIAN,TIERRA DEL FUEGO,-52.89,-68.45,143


In [36]:
PROV_MAP = {
    "BUENOS AIRES": "BUENOS AIRES",
    "CATAMARCA": "CATAMARCA",
    "CHACO": "CHACO",
    "CHUBUT": "CHUBUT",
    "CORRIENTES": "CORRIENTES",
    "CÓRDOBA": "CÓRDOBA",
    "ENTRE RIOS": "ENTRE RÍOS",
    "FORMOSA": "FORMOSA",
    "JUJUY": "JUJUY",
    "LA PAMPA": "LA PAMPA",
    "LA RIOJA": "LA RIOJA",
    "MENDOZA": "MENDOZA",
    "MISIONES": "MISIONES",
    "NEUQUEN": "NEUQUÉN",
    "RÍO NEGRO": "RÍO NEGRO",
    "SALTA": "SALTA",
    "SAN JUAN": "SAN JUAN",
    "SAN LUIS": "SAN LUIS",
    "SANTA CRUZ": "SANTA CRUZ",
    "SANTA FE": "SANTA FE",
    "SANTIAGO DEL ESTERO": "SANTIAGO DEL ESTERO",
    "TIERRA DEL FUEGO": "TIERRA DEL FUEGO",
    "TUCUMAN": "TUCUMÁN"
}


df_localidades.provincia = df_localidades.provincia.map(PROV_MAP)


In [37]:
df_localidades.head()

Unnamed: 0,localidad,provincia,latitud,longitud,id_localidad
0,25 DE MAYO,BUENOS AIRES,-35.48,-60.13,0
1,ARRECIFES,BUENOS AIRES,-34.05,-60.14,1
2,BALCARCE,BUENOS AIRES,-37.76,-58.3,2
3,BORDENAVE,BUENOS AIRES,-37.75,-63.08,3
4,CHASCOMUS,BUENOS AIRES,-35.74,-58.05,4


In [38]:
df_localidades = df_localidades.merge(df_provincias, on='provincia')
df_localidades

Unnamed: 0,localidad,provincia,latitud,longitud,id_localidad,id_provincia
0,25 DE MAYO,BUENOS AIRES,-35.48,-60.13,0,0
1,ARRECIFES,BUENOS AIRES,-34.05,-60.14,1,0
2,BALCARCE,BUENOS AIRES,-37.76,-58.30,2,0
3,BORDENAVE,BUENOS AIRES,-37.75,-63.08,3,0
4,CHASCOMUS,BUENOS AIRES,-35.74,-58.05,4,0
...,...,...,...,...,...,...
140,SACHAYOJ,SANTIAGO DEL ESTERO,-26.46,-61.81,140,21
141,SILIPICA,SANTIAGO DEL ESTERO,-28.02,-64.23,141,21
142,VACA HUAÑUNA,SANTIAGO DEL ESTERO,-27.47,-63.47,142,21
143,SAN SEBASTIAN,TIERRA DEL FUEGO,-52.89,-68.45,143,22


In [39]:
crear_tabla('localidades', df_localidades)

2025-10-24 19:31:49,219 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:49,226 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("localidades")
2025-10-24 19:31:49,228 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,231 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("localidades")
2025-10-24 19:31:49,233 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,236 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:49,237 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,242 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:49,245 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,247 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("localidades")
2025-10-24 19:31:49,247 INFO sqlalchemy.engine.Engine [raw sql] ()


### Tabla `departamentos`

In [40]:
df_estaciones.head()

Unnamed: 0,Id,Nombre,Tipo,Localidad,Provincia,Latitud,Longitud,Altura,Id Interno,Ubicacion,Desde,Hasta
0,413,25 de Mayo - EEA Pergamino,Nimbus THP,25 de Mayo,Buenos Aires,-35.48,-60.13,85,A872872,CC 18 CP6660,04/09/2012,13/09/2025
1,682,5000,Nimbus THP,Hurlingham,Buenos Aires,-85.0,-55.0,0,A875000,-,20/11/2019,13/09/2025
2,683,5001,Nimbus THP,Hurlingham,Buenos Aires,-85.0,-60.0,0,A875001,-,20/11/2019,13/09/2025
3,704,993 - LAB,Nimbus THP,loc_prueba,Sin asignar,-84.0,-70.0,0,A872993,LABORATORIO,08/03/2022,26/08/2024
4,705,994 - LAB,Nimbus THP,loc_prueba,Sin asignar,-84.0,-65.0,0,A872994,LABORATORIO,01/01/2012,28/08/2025


In [41]:
departamentos = df_departamentos.merge(
    df_estaciones[['Id Interno', 'Localidad', 'Provincia']], 
    left_on='estacion_id_interno',
    right_on='Id Interno')

In [42]:
departamentos = departamentos[[
    'departamento_id',
    'departamento_nombre',
    'lat_dep',
    'lon_dep',
    'Localidad',
    'Provincia'
]]

departamentos = departamentos.rename(columns={'Provincia': 'provincia', 'Localidad': 'localidad'})


In [43]:
departamentos.departamento_nombre = departamentos.departamento_nombre.str.upper().str.strip()
departamentos.localidad = departamentos.localidad.str.upper().str.strip()
departamentos.provincia = departamentos.provincia.str.upper().str.strip()

departamentos.head()

Unnamed: 0,departamento_id,departamento_nombre,lat_dep,lon_dep,localidad,provincia
0,82105,SAN JERONIMO,-32.239935,-61.231266,LAS ROSAS,SANTA FE
1,38112,YAVI,-22.329812,-65.825982,ABRA PAMPA,JUJUY
2,62035,EL CUY,-39.866718,-68.703438,PLOTTIER,NEUQUEN
3,6623,PERGAMINO,-33.589305,-60.772765,MARIANO ALFONSO,BUENOS AIRES
4,6868,VILLA GESELL,-37.336673,-57.032501,LAS ARMAS,BUENOS AIRES


In [44]:
departamentos.provincia = departamentos.provincia.map(PROV_MAP)

In [45]:
departamentos = departamentos.merge(df_provincias, on='provincia').merge(df_localidades[['id_localidad', 'localidad']], on='localidad')
departamentos

Unnamed: 0,departamento_id,departamento_nombre,lat_dep,lon_dep,localidad,provincia,id_provincia,id_localidad
0,82105,SAN JERONIMO,-32.239935,-61.231266,LAS ROSAS,SANTA FE,20,128
1,38112,YAVI,-22.329812,-65.825982,ABRA PAMPA,JUJUY,9,78
2,62035,EL CUY,-39.866718,-68.703438,PLOTTIER,NEUQUÉN,14,102
3,6623,PERGAMINO,-33.589305,-60.772765,MARIANO ALFONSO,BUENOS AIRES,0,14
4,6868,VILLA GESELL,-37.336673,-57.032501,LAS ARMAS,BUENOS AIRES,0,11
...,...,...,...,...,...,...,...,...
506,62007,ADOLFO ALSINA,-40.633583,-63.509506,VIEDMA,RÍO NEGRO,15,109
507,62028,CONESA,-40.220566,-64.070602,GUARDIA MITRE,RÍO NEGRO,15,106
508,6553,MONTE HERMOSO,-38.995358,-61.215202,CORONEL DORREGO,BUENOS AIRES,0,5
509,22084,LIBERTADOR GENERAL SAN MARTIN,-26.514810,-59.225561,PIRANE,FORMOSA,8,77


In [46]:
departamentos = departamentos.rename(
    columns={'lat_dep': 'latitud', 'lon_dep': 'longitud', 'departamento_id': 'id_departamento', 'departamento_nombre': 'departamento'})\
    .drop(['localidad', 'provincia'], axis=1)
departamentos.sample(2)

Unnamed: 0,id_departamento,departamento,latitud,longitud,id_provincia,id_localidad
374,6028,ALMIRANTE BROWN,-34.801558,-58.391468,0,13
262,30015,CONCORDIA,-31.352304,-58.053551,7,68


In [47]:
# finalmente ordeno ascendentemente por el nombre del departamento

departamentos.sort_values(by='departamento')

Unnamed: 0,id_departamento,departamento,latitud,longitud,id_provincia,id_localidad
278,22126,1 DE MAYO,-27.228550,-58.904051,3,34
125,22036,12 DE OCTUBRE,-27.345879,-61.434143,3,32
139,22039,2 DE ABRIL,-27.597486,-61.131971,3,37
35,6854,25 DE MAYO,-35.281385,-60.261818,0,0
162,70126,25 DE MAYO,-31.730632,-68.345013,17,112
...,...,...,...,...,...,...
1,38112,YAVI,-22.329812,-65.825982,9,78
436,90119,YERBA BUENA,-26.778964,-65.285321,23,144
393,58112,ZAPALA,-38.822468,-70.008003,14,102
17,6882,ZARATE,-33.980419,-59.284686,0,12


In [48]:
crear_tabla('departamentos', departamentos)

2025-10-24 19:31:49,640 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-10-24 19:31:49,646 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("departamentos")
2025-10-24 19:31:49,649 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,653 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("departamentos")
2025-10-24 19:31:49,655 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,658 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:49,660 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,663 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-10-24 19:31:49,665 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-10-24 19:31:49,667 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("departamentos")
2025-10-24 19:31:49,669 INFO sqlalchemy.engine.Engine [raw sq

## Chequeo de los datos en *`database.db`*

In [51]:
conn = sqlite3.connect('../database.db')
cursor = conn.cursor()

In [52]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()

[('cotagios',),
 ('clima',),
 ('contagios',),
 ('calendario',),
 ('provincias',),
 ('localidades',),
 ('departamentos',)]

In [57]:
def query(sql_query, cursor=cursor):
    return cursor.execute(sql_query).fetchall()

In [58]:
sql = '''
SELECT * FROM clima
GROUP BY anio

'''

query(sql)

[('2018-01-01 00:00:00.000000',
  'A872801',
  0.0,
  15.9,
  29.4,
  22.30069,
  52.0,
  12.40972,
  14.57236,
  27.55428,
  11.31803,
  79.0,
  2018,
  1),
 ('2019-01-01 00:00:00.000000',
  'A872801',
  0.6320452964742331,
  21.5,
  33.4,
  26.55208,
  80.0,
  23.54969,
  29.27195,
  21.09096,
  7.557858,
  53.0,
  2019,
  1),
 ('2020-01-01 00:00:00.000000',
  'A872801',
  0.0,
  19.6,
  26.8,
  22.93612,
  69.0,
  18.29063,
  21.04712,
  16.59516,
  4.94701,
  34.0,
  2020,
  1),
 ('2021-01-01 00:00:00.000000',
  'A872801',
  0.0,
  14.5,
  35.6,
  25.57222,
  37.0,
  11.50937,
  13.79877,
  27.05868,
  11.03144,
  77.0,
  2021,
  53),
 ('2022-01-01 00:00:00.000000',
  'A872801',
  0.24622984422583336,
  20.6,
  28.1,
  24.25555,
  12.0,
  -2.625698,
  7.589362,
  5.31996,
  10.667755,
  74.5,
  2022,
  52),
 ('2023-01-01 00:00:00.000000',
  'A872801',
  0.6255311792538264,
  19.5,
  25.9,
  22.42987,
  83.0,
  17.35078,
  20.01406,
  4.535221,
  10.993500000000001,
  76.5,
  2023,
