In [32]:
import numpy as np
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go

In [13]:
# project directory
current_directory = os.getcwd()
folder = current_directory.split("\\")[-1]
if folder != 'galizia_weather':
    os.chdir('..')
    print(os.getcwd()) 
else:
    print(os.getcwd())


c:\projects\galizia_weather


In [20]:
path = "data/raw/Santiago-EOAS_diaria_01-01-2025_to_01-04-2025.csv"
df = pd.read_csv(path, delimiter=";", skiprows=2)
print(df.shape)
df.head()

(273, 5)


Unnamed: 0,Fecha,Variable,Valor,Código validación,Unidad
0,01/01/2025,Lluvia,0.0,1,L/m2
1,01/01/2025,Humedad relativa media a 1.5m,73.0,1,%
2,01/01/2025,Temperatura media atura a 1.5m,6.43,1,ºC
3,02/01/2025,Lluvia,0.0,1,L/m2
4,02/01/2025,Humedad relativa media a 1.5m,75.0,1,%


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273 entries, 0 to 272
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Fecha              273 non-null    object 
 1   Variable           273 non-null    object 
 2   Valor              273 non-null    float64
 3   Código validación  273 non-null    int64  
 4   Unidad             273 non-null    object 
dtypes: float64(1), int64(1), object(3)
memory usage: 10.8+ KB


In [23]:
df["Fecha"] = pd.to_datetime(df["Fecha"], dayfirst=True)
df["Valor"] = pd.to_numeric(df["Valor"], errors="coerce")

In [36]:
df_pivot = df.pivot_table(
    index="Fecha",
    columns="Variable",
    values="Valor",
    aggfunc="first"
)

In [37]:
df_pivot

Variable,Humedad relativa media a 1.5m,Lluvia,Temperatura media atura a 1.5m
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-01-01,73.0,0.0,6.43
2025-01-02,75.0,0.0,11.06
2025-01-03,95.0,27.3,12.00
2025-01-04,85.0,12.8,12.71
2025-01-05,92.0,46.8,12.69
...,...,...,...
2025-03-28,78.0,0.0,10.72
2025-03-29,73.0,0.0,11.74
2025-03-30,58.0,0.0,14.04
2025-03-31,53.0,0.0,17.21


In [38]:
df_pivot.columns = [col.strip().split(" ")[0].lower() for col in df_pivot.columns]
df_pivot = df_pivot.rename(columns={
    "temperatura": "temperatura_media",
    "humedad": "humedad_media",
    "lluvia": "lluvia"
})
df_pivot = df_pivot.reset_index()

In [41]:
print(df.info())
df_pivot

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273 entries, 0 to 272
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Fecha              273 non-null    datetime64[ns]
 1   Variable           273 non-null    object        
 2   Valor              273 non-null    float64       
 3   Código validación  273 non-null    int64         
 4   Unidad             273 non-null    object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 10.8+ KB
None


Unnamed: 0,Fecha,humedad_media,lluvia,temperatura_media
0,2025-01-01,73.0,0.0,6.43
1,2025-01-02,75.0,0.0,11.06
2,2025-01-03,95.0,27.3,12.00
3,2025-01-04,85.0,12.8,12.71
4,2025-01-05,92.0,46.8,12.69
...,...,...,...,...
86,2025-03-28,78.0,0.0,10.72
87,2025-03-29,73.0,0.0,11.74
88,2025-03-30,58.0,0.0,14.04
89,2025-03-31,53.0,0.0,17.21


## GRÁFICAS EN PLOTLY

In [46]:
# 2. Visualizaciones

# Temperatura diaria
fig_temp = px.line(df_pivot, x="Fecha", y="temperatura_media", title="Temperatura media diaria en Santiago")
fig_temp.show()

# Lluvia diaria
fig_rain = px.bar(df_pivot, x="Fecha", y="lluvia", title="Lluvia diaria en Santiago")
fig_rain.show()

# Temperatura + lluvia juntas
fig_combo = go.Figure()
fig_combo.add_trace(go.Bar(x=df_pivot["Fecha"], y=df_pivot["lluvia"], name="Lluvia (mm)", yaxis="y", marker_color="rgba(0,0,255,0.4)"))
fig_combo.add_trace(go.Scatter(x=df_pivot["Fecha"], y=df_pivot["temperatura_media"], name="Temperatura (°C)", yaxis="y2", mode='lines', line=dict(color="red")))

fig_combo.update_layout(
    title="Lluvia y Temperatura diarias en Santiago",
    yaxis=dict(title="Lluvia (mm)"),
    yaxis2=dict(title="Temperatura (°C)", overlaying="y", side="right"),
    legend=dict(x=0.01, y=0.99),
    height=500
)
fig_combo.show()

# Lluvia mensual
df_pivot["Mes"] = df_pivot["Fecha"].dt.to_period("M").astype(str)
rain_monthly = df_pivot.groupby("Mes")["lluvia"].sum().reset_index()
fig_rain_month = px.bar(rain_monthly, x="Mes", y="lluvia", title="Lluvia acumulada por mes")
fig_rain_month.update_xaxes(dtick="M1", tickformat="%b %Y")
fig_rain_month.show()

# Temperatura mensual
temp_monthly = df_pivot.groupby("Mes")["temperatura_media"].mean().reset_index()
fig_temp_month = px.line(temp_monthly, x="Mes", y="temperatura_media", title="Temperatura media mensual")
fig_temp_month.update_xaxes(dtick="M1", tickformat="%b %Y")
fig_temp_month.show()

# Histograma
fig_hist = px.histogram(df_pivot, x="temperatura_media", nbins=30, title="Distribución de temperaturas")
fig_hist.update_xaxes(dtick="M1", tickformat="%b %Y")
fig_hist.show()

# Boxplot por mes
fig_box = px.box(df_pivot, x="Mes", y="temperatura_media", title="Boxplot mensual de temperatura")
fig_box.update_xaxes(dtick="M1", tickformat="%b %Y")
fig_box.show()

In [53]:
df_pivot.to_csv('data/processed/weather_santiago.csv')

In [43]:
df_pivot

Unnamed: 0,Fecha,humedad_media,lluvia,temperatura_media,Mes
0,2025-01-01,73.0,0.0,6.43,2025-01
1,2025-01-02,75.0,0.0,11.06,2025-01
2,2025-01-03,95.0,27.3,12.00,2025-01
3,2025-01-04,85.0,12.8,12.71,2025-01
4,2025-01-05,92.0,46.8,12.69,2025-01
...,...,...,...,...,...
86,2025-03-28,78.0,0.0,10.72,2025-03
87,2025-03-29,73.0,0.0,11.74,2025-03
88,2025-03-30,58.0,0.0,14.04,2025-03
89,2025-03-31,53.0,0.0,17.21,2025-03


In [47]:
# Definir si llovió o no
df_pivot["llovio"] = df_pivot["lluvia"] > 0

# Contar días con y sin lluvia
dias_lluvia = df_pivot["llovio"].value_counts().rename({True: "Día con lluvia", False: "Día sin lluvia"}).reset_index()
dias_lluvia.columns = ["Tipo de día", "Cantidad"]

# Pie chart
fig_pie = px.pie(
    dias_lluvia,
    names="Tipo de día",
    values="Cantidad",
    title="Porcentaje de días con lluvia vs sin lluvia",
    hole=0.4  # para hacerlo tipo donut
)
fig_pie.update_traces(textinfo="percent+label")
fig_pie.show()


In [48]:
dias_con_lluvia = (df_pivot["lluvia"] > 0).sum()
total_dias = len(df_pivot)

print(f"Días con lluvia: {dias_con_lluvia} de {total_dias} ({dias_con_lluvia / total_dias:.1%})")


Días con lluvia: 48 de 91 (52.7%)


In [49]:
# Contar días con lluvia por mes
dias_lluviosos_por_mes = df_pivot[df_pivot["lluvia"] > 0].groupby("Mes").size().reset_index(name="Días con lluvia")

# Ordenar para ver el mes con más días de lluvia
dias_lluviosos_por_mes = dias_lluviosos_por_mes.sort_values(by="Días con lluvia", ascending=False)

# Mostrar tabla
print(dias_lluviosos_por_mes)

# O visualizar con barras
import plotly.express as px

fig_dias_lluvia = px.bar(
    dias_lluviosos_por_mes,
    x="Mes",
    y="Días con lluvia",
    title="Cantidad de días con lluvia por mes"
)
fig_dias_lluvia.update_xaxes(type="category")
fig_dias_lluvia.show()

       Mes  Días con lluvia
0  2025-01               20
1  2025-02               16
2  2025-03               12


In [50]:
# Suma de lluvia por mes
lluvia_total_por_mes = df_pivot.groupby("Mes")["lluvia"].sum().reset_index(name="Lluvia total (mm)")

# Ordenar
lluvia_total_por_mes = lluvia_total_por_mes.sort_values(by="Lluvia total (mm)", ascending=False)

# Mostrar tabla
print(lluvia_total_por_mes)

# Visualizar con barras
fig_lluvia_total = px.bar(
    lluvia_total_por_mes,
    x="Mes",
    y="Lluvia total (mm)",
    title="Cantidad total de lluvia por mes"
)
fig_lluvia_total.update_xaxes(type="category")
fig_lluvia_total.show()


       Mes  Lluvia total (mm)
0  2025-01              514.0
1  2025-02              139.8
2  2025-03              113.0
3  2025-04                0.0
