In [1]:
# Install and importing neccessary libraries
try:
    import polars as pl
except ImportError:
    !pip install polars
    import polars as pl
try:
    import geopandas as gpd
except ImportError:
    !pip install geopandas
    import geopandas as gpd
try:
    import netCDF4
except ImportError:
    !pip install netCDF4
    import netCDF4

import numpy as np
import pandas as pd
import datetime
import pickle

In [2]:
# Loading train data
df_all = pl.read_parquet("./Data/Train/Train_2years.parquet")

# Fernverkehrszüge: https://www.oebb.at/de/reiseplanung-services/im-zug/unsere-zuege
trains = ["RJ", "RJX", "NJ", "IC", "ICE", "EC"]#, "D", "EN", "WB"]

df_fern = df_all.filter(
    pl.col("Train Type").is_in(trains)
)

### Include Areacode and State

Next Cell provided by Hannah Schuster

In [3]:
Regions = gpd.read_file("./Data/Weather/STATISTIK_AUSTRIA_GEM_20230101.shp", encoding="utf-8")
Regions.columns = ["id","name", "geometry"]
Regions["id"] = Regions["id"].astype(int)
Regions["bundesländer"] = pd.cut(Regions["id"], [10000, 20000,30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000], 
                                   labels = ["Burgenland","Kärnten", "Niederösterreich", "Oberösterreich", "Salzburg", "Steiermark", "Tirol", "Vorarlberg", "Wien"])
Regions["bundesländer"] = Regions["bundesländer"].astype(str)
Regions["id"] = Regions["id"].astype(str)
Regions.rename(columns = {"id":"REGCODE", "bundesländer":"BL"}, inplace = True)
Regions["REGCODE"] = Regions.REGCODE.apply(lambda x: "AT" + x)
Regions = Regions.drop(columns = ["geometry"])

In [4]:
pattern = r"\b(" + "|".join(Regions["name"]) + r")\b"
regions_pl = pl.from_pandas(Regions)

In [5]:
replace_map = {
    "St.Veit/Glan": "St. Veit an der Glan", 
    "Wien Meidling": "Wien-Meidling", 
    "Wien Hbf": "Wien-Favoriten", 
    "Wien Praterstern": "Wien-Leopoldstadt", 
    "Wien Westbahnhof": "Wien-Rudolfsheim-Fünfhaus", 
    "Wien Aspern Nord": "Wien-Donaustadt", 
    "Wien Heiligenstadt": "Wien-Döbling", 
    "Wien Atzgersdorf": "Wien-Liesing", 
    "Wien Simmering": "Wien-Simmering", 
    "Wien Traisengasse": "Wien-Brigittenau", 
    "Wien Spittelau": "Wien-Alsergrund",
    "Wien Mitte-Landstraße": "Wien-Landstraße",
    "Wien Hütteldorf": "Wien-Penzing", 
    "Wien Matzleinsdorfer Platz": "Wien-Margareten",
    "Wien Matzleinsdorfer Platz (ÖBB)": "Wien-Margareten",
    "Wien Mitte": "Wien-Landstraße",
    "Wien Hadersdorf": "Wien-Penzing",
    "Wien Quartier Belvedere": "Wien-Landstraße",
    "Wien Stadlau": "Wien-Donaustadt",
    "Wien Hetzendorf": "Wien-Meidling",
    "Wien Penzing": "Wien-Penzing",
    "Wien Weidlingau": "Wien-Penzing",
    "Wien Süßenbrunn": "Wien-Donaustadt", 
    "St.Pölten": "St. Pölten",
    "Flughafen Wien": "Schwechat",
    "Klagenfurt Hbf": "Klagenfurt am Wörthersee",
    "Bruck/Mur": "Bruck an der Mur",
    "Neuhofen/Krems": "Neuhofen an der Krems",
    "Stainach-Irdning": "Stainach-Pürgg", 
    "Tullnerfeld": "Judenau-Baumgarten",
    "Ötztal": "Haiming",
    "St. Johann im Pongau": "Sankt Johann im Pongau",
    "Spittal-Millstättersee": "Spittal an der Drau", 
    "Unzmarkt": "Unzmarkt-Frauenburg",
    "Kirchdorf/Krems": "Kirchdorf an der Krems",
    "Langen am Arlberg": "Klösterle",
    "Krumpendorf/Wörthersee": "Krumpendorf am Wörthersee",
    "St.Michael in Obersteiermark": "Sankt Michael in Obersteiermark",
    "St. Michael": "Sankt Michael in Obersteiermark",
    "Spielfeld-Straß": "Straß in Steiermark",
    "Summerau": "Rainbach im Mühlkreis",
    "Studenzen-Fladnitz": "Kirchberg an der Raab",
    "Fürnitz": "Finkenstein am Faaker See",
    "Ebensee": "Ebensee am Traunsee",
    "Saalfelden": "Saalfelden am Steinernen Meer",
    "Bad Goisern": "Bad Goisern am Hallstättersee",
    "Bruck/Glocknerstraße-Fusch": "Bruck an der Großglocknerstraße",
    "Pruggern": "Michaelerberg-Pruggern",
    "Deutsch Wagram": "Deutsch-Wagram",
    "Helmahof": "Deutsch-Wagram",
    "Raaba": "Raaba-Grambach",
    "Pottschach": "Buchbach",
    "Schlöglmühl": "Enzenreith",
    "Silberwald": "Strasshof an der Nordbahn",
    "Wartberg/Krems": "Wartberg an der Krems",
    "Pusarnitz": "Lurnfeld",
    "Kolbnitz": "Reißeck",
    "Götzendorf/Leitha": "Götzendorf an der Leitha", 
    "Kastenreith": "Weyer",
    "Platt": "Zellerndorf",
    "Bruck/Leitha": "Bruck an der Leitha",
    "Mixnitz Bärenschützklamm": "Pernegg an der Mur",
    "Nöstlbach-St.Marien": "St. Marien",
    "Breitenschützing": "Schlatt",
    "Stübing": "Peggau",
    "Furth-Göttweig": "Furth bei Göttweig",
    "Furth-Palt": "Furth bei Göttweig",
    "Redl-Zipf": "Pfaffing",
    "Judendorf-Straßengel": "Gratwein-Straßengel",
    "Rohrendorf b.Krems": "Rohrendorf bei Krems",
    "Mariahof-St.Lambrecht": "Sankt Lambrecht",
    "Nettingsdorf": "Ansfelden",
    "Mauer-Öhling": "Oed-Oehling",
    "Steyrling": "Klaus an der Pyhrnbahn",
    "Rottenegg": "St. Gotthard im Mühlkreis",
    "Untertullnerbach": "Tullnerbach",
    "Zellerndorf": "Pernersdorf",
    "Wallersee": "Seekirchen am Wallersee",
    "LinzHbf": "Linz Hbf",
    "Kainisch": "Bad Mitterndorf",
    "Tauplitz": "Bad Mitterndorf",
    "Tauplitz Schiflugschanze Kulm": "Bad Mitterndorf",
    "Bad Mitterndorf Heilbrunn": "Bad Mitterndorf",
    "Niederöblarn": "Öblarn",
    "Tenneck": "Werfen",
    "Bruck-Fusch": "Bruck an der Großglocknerstraße",
    "Böckstein": "Bad Gastein",
    "Kleinreifling": "Weyer",
    "Mitterberghütten": "Bischofshofen",
    "Ebensee/Traunsee": "Ebensee am Traunsee",
    "Faak am See": "Finkenstein am Faaker See"
}

def replace_text(text):
    for old, new in replace_map.items():
        text = text.replace(old, new)
    return text

In [6]:
# Match Arrival Station with Region names
df_fern = df_fern.with_columns(
    pl.col("Arrival Station")
    .map_elements(replace_text, return_dtype = pl.String)
    .str.extract(pattern)
    .alias("Matched Region")
)

In [7]:
# Include REGCODE and BL in df_fern
df_fern = df_fern.join(
    regions_pl.select(["REGCODE", "BL", "name"]),
    left_on = "Matched Region",
    right_on = "name",
    how = "left"
)
df_fern

Searched Station,Date,Train Nr.,Train Type,Departing Station,Scheduled Departure Time,Arrival Station,Scheduled Arrival Time,Actual Arrival Time,Delay,Cancellation,On Time,Departure Hour,Arrival Hour,Day of Week,Month,Year,Station Latitude,Station Longitude,Matched Region,REGCODE,BL
str,date,str,str,str,datetime[μs],str,datetime[μs],datetime[μs],i64,i32,i32,i8,i8,i8,i8,i32,f64,f64,str,str,str
"""Landeck_Zams""",2024-09-30,"""RJX 869""","""RJX""","""Bregenz""",2024-09-30 12:40:00,"""Landeck-Zams""",2024-09-30 14:25:00,2024-09-30 14:27:00,2,0,1,12,14,1,9,2024,47.148287,10.578534,"""Landeck""","""AT70614""","""Tirol"""
"""Wien_Hbf""",2025-01-08,"""RJ 130""","""RJ""","""Venezia Santa Lucia""",2025-01-08 15:52:00,"""Wien Hbf""",2025-01-08 23:36:00,2025-01-09 00:10:00,34,0,0,15,23,3,1,2025,48.185332,16.37865,"""Wien-Favoriten""","""AT91001""","""Wien"""
"""Tullnerfeld""",2024-01-20,"""NJ 446""","""NJ""","""Wien Hbf (Autoreisezuganlage)""",2024-01-20 22:44:00,"""Tullnerfeld""",2024-01-20 23:19:00,2024-01-20 23:20:00,1,0,1,22,23,6,1,2024,48.295004,15.996219,"""Judenau-Baumgarten""","""AT32112""","""Niederösterreich"""
"""Schärding""",2024-02-28,"""IC 296""","""IC""","""Fürth Hbf""",2024-02-28 00:33:00,"""Schärding""",2024-02-28 21:25:00,2024-02-28 21:25:00,0,0,1,0,21,3,2,2024,48.46422,13.442111,"""Schärding""","""AT41422""","""Oberösterreich"""
"""Innsbruck_Hbf""",2024-09-13,"""RJX 169""","""RJX""","""Zürich HB""",2024-09-13 14:40:00,"""Innsbruck Hbf""",2024-09-13 18:11:00,2024-09-13 18:12:00,1,0,1,14,18,5,9,2024,47.263198,11.400825,"""Innsbruck""","""AT70101""","""Tirol"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Kuchl""",2025-01-02,"""IC 794""","""IC""","""Salzburg Hbf""",2025-01-02 21:12:00,"""Kuchl""",2025-01-02 21:33:00,2025-01-02 21:38:00,5,0,1,21,21,4,1,2025,47.624937,13.142155,"""Kuchl""","""AT50207""","""Salzburg"""
"""Wien_Meidling""",2023-06-26,"""NJ 490""","""NJ""","""Wien Hbf""",2023-06-26 19:35:00,"""Wien Meidling""",2023-06-26 19:40:00,2023-06-26 19:41:00,1,0,1,19,19,1,6,2023,48.174652,16.333905,"""Wien-Meidling""","""AT91201""","""Wien"""
"""Wels_Hbf""",2023-05-25,"""RJX 42""","""RJX""","""Mosonmagyarovar""",2023-05-25 19:20:00,"""Wels Hbf""",2023-05-25 22:00:00,2023-05-25 22:00:00,0,0,1,19,22,4,5,2023,48.165701,14.026241,"""Wels""","""AT40301""","""Oberösterreich"""
"""Unzmarkt""",2024-02-27,"""RJ 633""","""RJ""","""Wien Hbf""",2024-02-27 18:24:00,"""Unzmarkt""",2024-02-27 21:12:00,2024-02-27 21:33:00,21,0,0,18,21,2,2,2024,47.201071,14.443005,"""Unzmarkt-Frauenburg""","""AT62036""","""Steiermark"""


In [8]:
# Checking if ever station has a REGCODE
df_fern.filter(pl.col("REGCODE").is_null())

Searched Station,Date,Train Nr.,Train Type,Departing Station,Scheduled Departure Time,Arrival Station,Scheduled Arrival Time,Actual Arrival Time,Delay,Cancellation,On Time,Departure Hour,Arrival Hour,Day of Week,Month,Year,Station Latitude,Station Longitude,Matched Region,REGCODE,BL
str,date,str,str,str,datetime[μs],str,datetime[μs],datetime[μs],i64,i32,i32,i8,i8,i8,i8,i32,f64,f64,str,str,str


### Include Precipitation count

Dictionary provided by Hannah Schuster

In [9]:
with open("./Data/daily_count_dict.pkl", "rb") as file:
    daily_count_dict = pickle.load(file)
#print(daily_count_dict)

In [10]:
# Converting Dictionary into Dataframe
data_rain = []
for regcode, rain_dict in daily_count_dict.items():
    for dt, rain in rain_dict.items():
        date = dt.strftime("%Y-%m-%d")
        if float(rain) > 0.0:
            rain_value = float(rain)
        else:
            rain_value = 0.0
        data_rain.append({
            "REGCODE": regcode,
            "Date": date,
            "Rain": rain_value
        })

df_rain = pl.DataFrame(data_rain)
df_rain = df_rain.with_columns(
    pl.col("Date")
    .str.strptime(pl.Date, "%Y-%m-%d")
)

  if float(rain) > 0.0:


In [11]:
# Creat one final Dataframe with every information
df = df_fern.filter(
    (pl.col("Year") == 2023) |
    (pl.col("Year") == 2024)
).join(
    df_rain, 
    on = ["REGCODE", "Date"], 
    how = "left"
)
df

Searched Station,Date,Train Nr.,Train Type,Departing Station,Scheduled Departure Time,Arrival Station,Scheduled Arrival Time,Actual Arrival Time,Delay,Cancellation,On Time,Departure Hour,Arrival Hour,Day of Week,Month,Year,Station Latitude,Station Longitude,Matched Region,REGCODE,BL,Rain
str,date,str,str,str,datetime[μs],str,datetime[μs],datetime[μs],i64,i32,i32,i8,i8,i8,i8,i32,f64,f64,str,str,str,f64
"""Landeck_Zams""",2024-09-30,"""RJX 869""","""RJX""","""Bregenz""",2024-09-30 12:40:00,"""Landeck-Zams""",2024-09-30 14:25:00,2024-09-30 14:27:00,2,0,1,12,14,1,9,2024,47.148287,10.578534,"""Landeck""","""AT70614""","""Tirol""",0.2
"""Tullnerfeld""",2024-01-20,"""NJ 446""","""NJ""","""Wien Hbf (Autoreisezuganlage)""",2024-01-20 22:44:00,"""Tullnerfeld""",2024-01-20 23:19:00,2024-01-20 23:20:00,1,0,1,22,23,6,1,2024,48.295004,15.996219,"""Judenau-Baumgarten""","""AT32112""","""Niederösterreich""",0.0
"""Schärding""",2024-02-28,"""IC 296""","""IC""","""Fürth Hbf""",2024-02-28 00:33:00,"""Schärding""",2024-02-28 21:25:00,2024-02-28 21:25:00,0,0,1,0,21,3,2,2024,48.46422,13.442111,"""Schärding""","""AT41422""","""Oberösterreich""",0.0
"""Innsbruck_Hbf""",2024-09-13,"""RJX 169""","""RJX""","""Zürich HB""",2024-09-13 14:40:00,"""Innsbruck Hbf""",2024-09-13 18:11:00,2024-09-13 18:12:00,1,0,1,14,18,5,9,2024,47.263198,11.400825,"""Innsbruck""","""AT70101""","""Tirol""",67.9
"""Mürzzuschlag""",2023-09-08,"""RJ 75""","""RJ""","""Praha hl.n.""",2023-09-08 08:45:00,"""Mürzzuschlag""",2023-09-08 14:28:00,2023-09-08 14:33:00,5,0,1,8,14,5,9,2023,47.607782,15.677731,"""Mürzzuschlag""","""AT62143""","""Steiermark""",0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Kirchberg_in_Tirol""",2023-11-01,"""EC 163""","""EC""","""Zürich HB""",2023-11-01 08:33:00,"""Kirchberg in Tirol""",2023-11-01 16:10:00,,,1,0,8,16,3,11,2023,47.448953,12.308896,"""Kirchberg in Tirol""","""AT70409""","""Tirol""",0.0
"""Wien_Meidling""",2023-06-26,"""NJ 490""","""NJ""","""Wien Hbf""",2023-06-26 19:35:00,"""Wien Meidling""",2023-06-26 19:40:00,2023-06-26 19:41:00,1,0,1,19,19,1,6,2023,48.174652,16.333905,"""Wien-Meidling""","""AT91201""","""Wien""",0.4
"""Wels_Hbf""",2023-05-25,"""RJX 42""","""RJX""","""Mosonmagyarovar""",2023-05-25 19:20:00,"""Wels Hbf""",2023-05-25 22:00:00,2023-05-25 22:00:00,0,0,1,19,22,4,5,2023,48.165701,14.026241,"""Wels""","""AT40301""","""Oberösterreich""",0.0
"""Unzmarkt""",2024-02-27,"""RJ 633""","""RJ""","""Wien Hbf""",2024-02-27 18:24:00,"""Unzmarkt""",2024-02-27 21:12:00,2024-02-27 21:33:00,21,0,0,18,21,2,2,2024,47.201071,14.443005,"""Unzmarkt-Frauenburg""","""AT62036""","""Steiermark""",0.8


In [12]:
df.write_parquet("./Data/Train_Rain_2years.parquet")