In [58]:
import pandas as pd
import numpy as np
import os

from datetime import datetime
import torch
from chronos import Chronos2Pipeline

In [59]:
pipeline = Chronos2Pipeline.from_pretrained("amazon/chronos-2",  device_map="cuda")

In [82]:
def make_ws_hourly(df_raw):
    df = df_raw.copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df = df.sort_values("Timestamp").set_index("Timestamp")
    df = df[~df.index.duplicated(keep="first")]

    hourly = df.resample("1h").agg(
        precipitation_mm=("precipitationQuantityDiff_mm", "sum"),
        intensity_mm_h=("precipitationIntensity_mm_h", "mean"),
        samples=("precipitationType", "count"),
        precip_type=("precipitationType",
                     lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan),)


    gap_mask = hourly["samples"] == 0
    hourly.loc[gap_mask, ["precipitation_mm", "intensity_mm_h",]] = np.nan
    hourly = hourly.drop(columns="samples").reset_index()
    hourly = hourly.interpolate(method="linear", limit=3, limit_direction="both", limit_area="inside")
    return hourly

In [84]:
df_Kotaniementie = pd.read_csv('cleaned_datasets/wes100/df_Kaakkovuorentie.csv')

In [85]:
df_Kotaniementie.head()

Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2024-04-02 08:15:00,0.0,0.0,36.78,0.0,0
1,2024-04-02 08:25:00,0.0,0.0,36.78,0.0,0
2,2024-04-02 08:35:00,0.0,0.0,36.78,0.0,0
3,2024-04-02 08:45:00,0.0,0.0,36.78,0.0,0
4,2024-04-02 08:55:00,0.0,0.0,36.78,0.0,0


In [86]:
df_Kotaniementie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73502 entries, 0 to 73501
Data columns (total 6 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Timestamp                      73502 non-null  object 
 1   precipitationIntensity_mm_h    73502 non-null  float64
 2   precipitationIntensity_mm_min  73502 non-null  float64
 3   precipitationQuantityAbs_mm    73502 non-null  float64
 4   precipitationQuantityDiff_mm   73502 non-null  float64
 5   precipitationType              73502 non-null  int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 3.4+ MB


In [87]:
df_Kotaniementie["Timestamp"] = pd.to_datetime(df_Kotaniementie["Timestamp"],format="%Y-%m-%d %H:%M:%S", errors="coerce")
print("NaT after parse:", df_Kotaniementie["Timestamp"].isna().sum())
# Sort the ds by timestamp
df_Kotaniementie = df_Kotaniementie.sort_values(by="Timestamp").reset_index(drop=True)
df_Kotaniementie.head()

NaT after parse: 0


Unnamed: 0,Timestamp,precipitationIntensity_mm_h,precipitationIntensity_mm_min,precipitationQuantityAbs_mm,precipitationQuantityDiff_mm,precipitationType
0,2024-04-02 08:15:00,0.0,0.0,36.78,0.0,0
1,2024-04-02 08:25:00,0.0,0.0,36.78,0.0,0
2,2024-04-02 08:35:00,0.0,0.0,36.78,0.0,0
3,2024-04-02 08:45:00,0.0,0.0,36.78,0.0,0
4,2024-04-02 08:55:00,0.0,0.0,36.78,0.0,0


In [88]:
Kotaniementie_hourly = make_ws_hourly(df_Kotaniementie)
Kotaniementie_hourly.head() 

Unnamed: 0,Timestamp,precipitation_mm,intensity_mm_h,precip_type
0,2024-04-02 08:00:00,0.0,0.0,0.0
1,2024-04-02 09:00:00,0.0,0.0,0.0
2,2024-04-02 10:00:00,0.0,0.0,0.0
3,2024-04-02 11:00:00,0.0,0.0,0.0
4,2024-04-02 12:00:00,0.0,0.0,0.0


In [89]:
Kotaniementie_hourly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12818 entries, 0 to 12817
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Timestamp         12818 non-null  datetime64[ns]
 1   precipitation_mm  12299 non-null  float64       
 2   intensity_mm_h    12299 non-null  float64       
 3   precip_type       12299 non-null  float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 400.7 KB


In [90]:
Kotaniementie_hourly['precip_type'].value_counts()

precip_type
0.000000     11145
60.000000      576
67.000000      358
70.000000      138
69.000000       74
1.224490         1
2.448980         1
3.673469         1
56.326531        1
57.551020        1
58.775510        1
20.000000        1
40.000000        1
Name: count, dtype: int64

In [91]:
Kotaniementie_hourly["id"] = "Kotaniementie"

context_data = Kotaniementie_hourly[[
    "id",
    "Timestamp",
    "precipitation_mm",
    "intensity_mm_h",
    "precip_type",
]]

In [92]:
context_data.head()

Unnamed: 0,id,Timestamp,precipitation_mm,intensity_mm_h,precip_type
0,Kotaniementie,2024-04-02 08:00:00,0.0,0.0,0.0
1,Kotaniementie,2024-04-02 09:00:00,0.0,0.0,0.0
2,Kotaniementie,2024-04-02 10:00:00,0.0,0.0,0.0
3,Kotaniementie,2024-04-02 11:00:00,0.0,0.0,0.0
4,Kotaniementie,2024-04-02 12:00:00,0.0,0.0,0.0


In [76]:

last_time = Kotaniementie_hourly["Timestamp"].max()
target_end = pd.Timestamp("2025-11-27 00:00:00")
prediction_length = int((target_end - last_time).total_seconds() // 3600)
if prediction_length <= 0:
    prediction_length = 168  
target = ["precipitation_mm", "intensity_mm_h", "precip_type"]


In [77]:
prediction_WS100 = pipeline.predict_df(
    context_data,              
    future_df=None,
    prediction_length=prediction_length,
    quantile_levels=[0.1, 0.5, 0.9],
    id_column="id",
    timestamp_column="Timestamp",
    target=target,
)



In [78]:
print(prediction_WS100.head())
print(prediction_WS100.columns)


              id           Timestamp       target_name  predictions       0.1  \
0  Kotaniementie 2025-09-18 10:00:00  precipitation_mm     0.002401 -0.001584   
1  Kotaniementie 2025-09-18 11:00:00  precipitation_mm     0.002683 -0.001201   
2  Kotaniementie 2025-09-18 12:00:00  precipitation_mm     0.002595 -0.001078   
3  Kotaniementie 2025-09-18 13:00:00  precipitation_mm     0.003159 -0.001600   
4  Kotaniementie 2025-09-18 14:00:00  precipitation_mm     0.003239 -0.001109   

        0.5       0.9  
0  0.002401  0.102687  
1  0.002683  0.122169  
2  0.002595  0.166594  
3  0.003159  0.238572  
4  0.003239  0.334279  
Index(['id', 'Timestamp', 'target_name', 'predictions', '0.1', '0.5', '0.9'], dtype='object')


In [79]:
prediction_WS100.tail()

Unnamed: 0,id,Timestamp,target_name,predictions,0.1,0.5,0.9
5008,Kotaniementie,2025-11-26 20:00:00,precip_type,0.335264,0.159056,0.335264,34.711323
5009,Kotaniementie,2025-11-26 21:00:00,precip_type,0.331495,0.142845,0.331495,35.100498
5010,Kotaniementie,2025-11-26 22:00:00,precip_type,0.337735,0.151234,0.337735,35.972206
5011,Kotaniementie,2025-11-26 23:00:00,precip_type,0.326199,0.153582,0.326199,32.945942
5012,Kotaniementie,2025-11-27 00:00:00,precip_type,0.326461,0.135704,0.326461,29.175549


In [93]:
prediction_WS100["target_name"].unique()

array(['precipitation_mm', 'intensity_mm_h', 'precip_type'], dtype=object)

In [95]:
wide_pred = (
    prediction_WS100
    .pivot(index=["Timestamp", "id"], columns="target_name", values="predictions")
    .reset_index()
)

wide_pred.tail()


target_name,Timestamp,id,intensity_mm_h,precip_type,precipitation_mm
1666,2025-11-26 20:00:00,Kotaniementie,-0.000854,0.335264,-0.000653
1667,2025-11-26 21:00:00,Kotaniementie,-0.000684,0.331495,-0.000508
1668,2025-11-26 22:00:00,Kotaniementie,-0.000716,0.337735,-0.000544
1669,2025-11-26 23:00:00,Kotaniementie,-0.00122,0.326199,-0.001081
1670,2025-11-27 00:00:00,Kotaniementie,-0.000755,0.326461,-0.000615
