In [11]:
import numpy as np
from shapely.geometry import box

CELL_KM = 1  # Ajustá aquí el tamaño de celda (1 para 1x1 km, 2 para 2x2 km)

def build_grid(gdf, cell_km=1):
    crs_m = gdf.estimate_utm_crs()       # Pasar a CRS en metros
    gdf_m = gdf.to_crs(crs_m)
    minx, miny, maxx, maxy = gdf_m.total_bounds

    step = cell_km * 1000
    xs = np.arange(minx, maxx, step)
    ys = np.arange(miny, maxy, step)

    cells = []
    for i, x0 in enumerate(xs):
        for j, y0 in enumerate(ys):
            cells.append({"grid_id": f"G_{i}_{j}",
                          "geometry": box(x0, y0, x0 + step, y0 + step)})

    grid = gpd.GeoDataFrame(cells, crs=crs_m)
    return grid.to_crs(gdf.crs)



import geopandas as gpd

# Leer el GeoJSON
gdf_casos_utm = gpd.read_file(r"outputs\casos_con_distancias.geojson")



grid = build_grid(gdf_casos_utm, CELL_KM)

# Asignar grid_id a cada caso
casos_con_grid = gpd.sjoin(
    gdf_casos_utm.to_crs(grid.crs),
    grid[['grid_id','geometry']],
    how='left',
    predicate='within'
).drop(columns='index_right')

iso = casos_con_grid['fecha_picadura_estimada'].dt.isocalendar()
casos_con_grid['iso_year'] = iso['year']
casos_con_grid['iso_week'] = iso['week']

# Combina en un solo string "YYYY-Www"
casos_con_grid['year_week'] = (
    casos_con_grid['iso_year'].astype(str)
    + '-W'
    + casos_con_grid['iso_week'].astype(str).str.zfill(2)
)
casos_con_grid['sexo'] = casos_con_grid['sexo'].replace({'M': 1, 'F': 0})
casos_con_grid


casos_con_grid2=casos_con_grid.copy()[["sexo","edad_diagnostico","distancia_rio","distancia_verde_m","distancia_grande_m","year_week","grid_id"]]

import pandas as pd

casos_con_grid2["distancia_rio"]=casos_con_grid2["distancia_rio"].astype(float)
casos_weekly=pd.pivot_table(casos_con_grid2,index=['grid_id','year_week'],values=["sexo","edad_diagnostico","distancia_verde_m","distancia_grande_m"],aggfunc="mean")
casos_weekly=pd.DataFrame(casos_weekly.to_records())
casos_casos=pd.pivot_table(casos_con_grid2,index=['grid_id','year_week'],values=["sexo"],aggfunc="count")
casos_casos=pd.DataFrame(casos_casos.to_records()).rename(columns={"sexo":'casos'})
casos_weekly=pd.merge(casos_weekly,casos_casos,on=["grid_id",'year_week'],how="left")
casos_weekly.head()

  casos_con_grid['sexo'] = casos_con_grid['sexo'].replace({'M': 1, 'F': 0})


Unnamed: 0,grid_id,year_week,distancia_grande_m,distancia_verde_m,edad_diagnostico,sexo,casos
0,G_0_112,2024-W11,157265.519585,151446.609841,15.0,1.0,1
1,G_0_112,2024-W13,157265.519585,151446.609841,30.0,1.0,1
2,G_0_112,2024-W17,157265.519585,151446.609841,39.0,1.0,1
3,G_10_80,2024-W11,129637.239529,123932.828562,23.0,0.0,1
4,G_10_80,2024-W14,129204.491662,123496.962154,40.0,0.333333,3


In [15]:
# =========================
# 1) Completar semanas faltantes por grid_id
# =========================
import pandas as pd
from datetime import date

df = casos_weekly.copy()  # grid_id, year_week, ... , casos

# --- A: armar el rango completo de semanas (ISO, lunes)
mondays = pd.to_datetime(df['year_week'] + '-1', format='%G-W%V-%u')
periods_orig = mondays.dt.to_period('W-MON')
min_p, max_p = periods_orig.min(), periods_orig.max()
all_periods = pd.period_range(start=min_p, end=max_p, freq='W-MON')

def extend_to_today(all_year_weeks):
    weeks_list = list(all_year_weeks)
    mondays = pd.to_datetime(pd.Series(weeks_list) + '-1',
                             format='%G-W%V-%u', errors='raise')
    periods = mondays.dt.to_period('W-MON')

    max_period     = periods.max()
    current_period = pd.Period(date.today(), freq='W-MON')

    if current_period <= max_period:
        return weeks_list

    new_periods = pd.period_range(start=max_period + 1,
                                  end=current_period,
                                  freq='W-MON')
    new_weeks = new_periods.strftime('%G-W%V').tolist()
    return weeks_list + new_weeks

all_year_weeks = all_periods.strftime('%G-W%V')
all_year_weeks = extend_to_today(all_year_weeks)

# --- B: índice cartesiano grid_id x todas las semanas
grids = df['grid_id'].unique()
idx = pd.MultiIndex.from_product(
    [grids, all_year_weeks],
    names=['grid_id', 'year_week']
)

df_full = (
    df.set_index(['grid_id','year_week'])
      .reindex(idx)
      .reset_index()
)

# =========================
# 2) Relleno de NaN por media de su propia celda (grid_id)
# =========================
cols_fill_mean = [
    'distancia_grande_m',
    'distancia_verde_m',
    'edad_diagnostico',
    'sexo'
]
# Si querés volver a incluir 'distancia_rio', agrégala a cols_fill_mean cuando esté en df

# Asegurate de que esas columnas existan (si no, créalas vacías)
for c in cols_fill_mean:
    if c not in df_full.columns:
        df_full[c] = pd.NA

group_means = df_full.groupby('grid_id')[cols_fill_mean].transform('mean')
df_full[cols_fill_mean] = df_full[cols_fill_mean].fillna(group_means)

# Rellenar casos con 0 donde no había
if 'casos' not in df_full.columns:
    df_full['casos'] = 0
df_full['casos'] = df_full['casos'].fillna(0)

# =========================
# 3) Agregar centroides de cada celda (lon/lat) para merges posteriores (clima, etc.)
# =========================
grid_ll = grid.to_crs(epsg=4326).copy()
grid_ll['longitude'] = grid_ll.geometry.centroid.x
grid_ll['latitude']  = grid_ll.geometry.centroid.y

df_full = df_full.merge(
    grid_ll[['grid_id','longitude','latitude']],
    on='grid_id', how='left'
)

# =========================
# 4) (Opcional) crear lags, medias móviles, acumulados por celda
# =========================
df_full = df_full.sort_values(['grid_id','year_week'])

# lags
for lag in [1,2,3,4]:
    df_full[f'casos_lag_{lag}'] = (
        df_full.groupby('grid_id')['casos'].shift(lag)
    )

# media móvil 4 semanas (excluye la actual)
df_full['casos_ma_4'] = (
    df_full.groupby('grid_id')['casos']
           .transform(lambda s: s.shift(1).rolling(4, min_periods=1).mean())
)

# acumulados 2,3,4 semanas (excluyendo la actual)
for n in [2,3,4]:
    df_full[f'casos_sum_{n}w'] = (
        df_full.groupby('grid_id')['casos']
               .transform(lambda s: s.shift(1).rolling(n, min_periods=n).sum())
    )

# mes
df_full['mes'] = pd.to_datetime(df_full['year_week'] + '-1', format='%G-W%V-%u').dt.month

# =========================
# 5) Resultado final para modelar
# =========================
df_full2 = df_full.copy()

df_full2



  grid_ll['longitude'] = grid_ll.geometry.centroid.x

  grid_ll['latitude']  = grid_ll.geometry.centroid.y


Unnamed: 0,grid_id,year_week,distancia_grande_m,distancia_verde_m,edad_diagnostico,sexo,casos,longitude,latitude,casos_lag_1,casos_lag_2,casos_lag_3,casos_lag_4,casos_ma_4,casos_sum_2w,casos_sum_3w,casos_sum_4w,mes
0,G_0_112,2023-W49,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,,,,,,,,,12
1,G_0_112,2023-W50,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,,,,0.0,,,,12
2,G_0_112,2023-W51,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,0.0,,,0.0,0.0,,,12
3,G_0_112,2023-W52,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,0.0,0.0,,0.0,0.0,0.0,,12
4,G_0_112,2024-W01,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4171,G_8_78,2025-W27,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6
4172,G_8_78,2025-W28,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
4173,G_8_78,2025-W29,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
4174,G_8_78,2025-W30,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7


In [16]:
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree


import sqlite3
import pandas as pd

# 1) Abre la conexión

DB_PATH   = r"C:\Users\Nainh\Proton Drive\nainho1306\My files\Licitacion\Plataforma local\backend\data\mi_base_de_datos5.db"

conn = sqlite3.connect(DB_PATH)

# 2) Lée los últimos 20 registros (ordenados por fecha decreciente)
df_clima = pd.read_sql_query("""
    SELECT *
      FROM climate_test
     ORDER BY date
""", conn)

# 3) Muestra el DataFrame


# 4) Cierra la conexión
conn.close()
df_clima


df_clima['date'] = pd.to_datetime(df_clima['date'])
iso = df_clima['date'].dt.isocalendar()
df_clima['year_week'] = (
    iso['year'].astype(str) 
    + '-W' 
    + iso['week'].astype(str).str.zfill(2)
)

# 2) Agrupar por latitud, longitud y semana, y sacar medias
df_weekly_clima = (
    df_clima
      .groupby(['latitude','longitude','year_week'], as_index=False)
      .agg({
         't2m': 'mean',
         'd2m': 'mean',
         'tp':  'mean',
         # si quisieras otras variables, añádelas aquí
      })
)
df_clima

Unnamed: 0,date,latitude,longitude,t2m,d2m,tp,t2m_min,t2m_max,wind_speed,year_week
0,2023-01-01,-33.0,-63.00,296.222351,291.317322,0.000448,,,,2022-W52
1,2023-01-01,-33.0,-62.75,296.385437,290.895935,0.000464,,,,2022-W52
2,2023-01-01,-33.0,-62.50,296.388855,290.684509,0.000487,,,,2022-W52
3,2023-01-01,-33.0,-62.25,296.367371,290.656189,0.000407,,,,2022-W52
4,2023-01-01,-33.0,-62.00,296.280457,290.852478,0.000307,,,,2022-W52
...,...,...,...,...,...,...,...,...,...,...
40360,2025-07-18,-32.0,-62.00,281.916290,272.043365,0.000000,,,,2025-W29
40361,2025-07-18,-32.0,-61.75,281.845001,272.113190,0.000000,,,,2025-W29
40362,2025-07-18,-32.0,-61.50,281.649689,272.678131,0.000000,,,,2025-W29
40363,2025-07-18,-32.0,-61.25,281.551544,273.282623,0.000000,,,,2025-W29


In [17]:
for var in ['t2m','d2m','tp']:
    df_full2[var] = np.nan

 
clima_por_fecha = {
    fecha: subdf.reset_index(drop=True)
    for fecha, subdf in df_weekly_clima.groupby('year_week')
}


for i, row in df_full2.iterrows():
    fecha = row['year_week']
    puntos_dia = clima_por_fecha.get(fecha)

    if puntos_dia is None or puntos_dia.empty:
        # no hay clima para esa fecha
        continue


    coords = np.vstack([puntos_dia['longitude'], puntos_dia['latitude']]).T
    tree   = cKDTree(coords)

    # búsqueda
    dist, idx = tree.query([row['longitude'], row['latitude']])
    vecino    = puntos_dia.iloc[idx]

    # vuelca t2m, d2m, tp
    for var in ['t2m','d2m','tp']:
        df_full2.at[i, var] = vecino[var]



for c in ["t2m","d2m","tp"]:
    df_full2[c]=df_full2[c].ffill()

df_full2

Unnamed: 0,grid_id,year_week,distancia_grande_m,distancia_verde_m,edad_diagnostico,sexo,casos,longitude,latitude,casos_lag_1,...,casos_lag_3,casos_lag_4,casos_ma_4,casos_sum_2w,casos_sum_3w,casos_sum_4w,mes,t2m,d2m,tp
0,G_0_112,2023-W49,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,,...,,,,,,,12,294.173632,286.491115,1.934596e-05
1,G_0_112,2023-W50,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,,,0.0,,,,12,299.722395,292.769052,3.324236e-04
2,G_0_112,2023-W51,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,,,0.0,0.0,,,12,295.071084,291.219914,2.580030e-04
3,G_0_112,2023-W52,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,0.0,,0.0,0.0,0.0,,12,298.198447,288.496150,6.982258e-07
4,G_0_112,2024-W01,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,297.725355,292.184671,6.403753e-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4171,G_8_78,2025-W27,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,6,279.709656,273.177992,6.811959e-08
4172,G_8_78,2025-W28,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,286.509417,283.157985,1.288823e-04
4173,G_8_78,2025-W29,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,285.755109,279.814648,1.220703e-05
4174,G_8_78,2025-W30,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,285.755109,279.814648,1.220703e-05


## Esto hay que cambiarlo cuando tenga datos mas actualizados
## De momento lo que hace es completar hasta la fecha a partir del ultimo dato de casos W23

In [19]:
import pandas as pd

# 1) Define hasta qué semana usar datos reales
umbral_yrwk = "2025-W23"

# 2) Separa pasado vs. futuro
mask_pasado = df_full2['year_week'] <= umbral_yrwk
mask_futuro = df_full2['year_week'] >  umbral_yrwk

# 3) Calcula la media de casos por municerca usando solo el pasado
medias = (
    df_full2[mask_pasado]
    .groupby('grid_id')['casos']
    .mean()
)

# 4) Crea una copia para no alterar el original
df_fill = df_full2.copy()

# 5) Rellena los casos futuros mapeando la media calculada
df_fill.loc[mask_futuro, 'casos'] = (
    df_fill.loc[mask_futuro, 'grid_id']
           .map(medias)
)

# 6) (Opcional) Si te queda algún NaN (p.ej. municerca sin datos históricos),
#    puedes rellenarlo con 0 ó con la media global:
media_global = medias.mean()
df_fill['casos'] = df_fill['casos'].fillna(media_global).round(0)

# Ya tienes df_fill con todos los "casos" llenados en semanas > 2025-W23
print(df_fill[df_fill['year_week'] > umbral_yrwk].head())
df_full2=df_fill.copy()
df_full2

    grid_id year_week  distancia_grande_m  distancia_verde_m  \
79  G_0_112  2025-W24       157265.519585      151446.609841   
80  G_0_112  2025-W25       157265.519585      151446.609841   
81  G_0_112  2025-W26       157265.519585      151446.609841   
82  G_0_112  2025-W27       157265.519585      151446.609841   
83  G_0_112  2025-W28       157265.519585      151446.609841   

    edad_diagnostico  sexo  casos  longitude   latitude  casos_lag_1  ...  \
79              28.0   1.0    0.0 -64.499088 -31.416738          0.0  ...   
80              28.0   1.0    0.0 -64.499088 -31.416738          0.0  ...   
81              28.0   1.0    0.0 -64.499088 -31.416738          0.0  ...   
82              28.0   1.0    0.0 -64.499088 -31.416738          0.0  ...   
83              28.0   1.0    0.0 -64.499088 -31.416738          0.0  ...   

    casos_lag_3  casos_lag_4  casos_ma_4  casos_sum_2w  casos_sum_3w  \
79          0.0          0.0         0.0           0.0           0.0   
80      

Unnamed: 0,grid_id,year_week,distancia_grande_m,distancia_verde_m,edad_diagnostico,sexo,casos,longitude,latitude,casos_lag_1,...,casos_lag_3,casos_lag_4,casos_ma_4,casos_sum_2w,casos_sum_3w,casos_sum_4w,mes,t2m,d2m,tp
0,G_0_112,2023-W49,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,,...,,,,,,,12,294.173632,286.491115,1.934596e-05
1,G_0_112,2023-W50,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,,,0.0,,,,12,299.722395,292.769052,3.324236e-04
2,G_0_112,2023-W51,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,,,0.0,0.0,,,12,295.071084,291.219914,2.580030e-04
3,G_0_112,2023-W52,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,0.0,,0.0,0.0,0.0,,12,298.198447,288.496150,6.982258e-07
4,G_0_112,2024-W01,157265.519585,151446.609841,28.0,1.00,0.0,-64.499088,-31.416738,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,297.725355,292.184671,6.403753e-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4171,G_8_78,2025-W27,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,6,279.709656,273.177992,6.811959e-08
4172,G_8_78,2025-W28,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,286.509417,283.157985,1.288823e-04
4173,G_8_78,2025-W29,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,285.755109,279.814648,1.220703e-05
4174,G_8_78,2025-W30,129484.524257,123803.197739,57.0,0.75,0.0,-64.419587,-31.724363,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,7,285.755109,279.814648,1.220703e-05


## Tengo que traspasar variables de mundo sano a grilla, haciendo algun tipo de interpolación o estimación