Import statements

In [224]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Read in dataframe

In [227]:
visitation = pd.read_csv("2025 Allianz Datathon Dataset(Visitation Data).csv")
visitation = visit.iloc[:165,:]
climate = pd.read_csv("2025 Allianz Datathon Dataset(Climate Data).csv")

Check for nan values

In [230]:
print(visitation.isnull().sum())
print(climate.isnull().sum())

Year              0
Week              0
Mt. Baw Baw       0
Mt. Stirling      0
Mt. Hotham        0
Falls Creek       0
Mt. Buller        0
Selwyn            0
Thredbo           0
Perisher          0
Charlotte Pass    0
dtype: int64
Bureau of Meteorology station number       0
Year                                       0
Month                                      0
Day                                        0
Maximum temperature (Degree C)          1538
Minimum temperature (Degree C)          1533
Rainfall amount (millimetres)           1956
dtype: int64


Check for nan values by year

In [233]:
for year in range(2010, 2026):
    climate_specific_year = climate[climate["Year"] == year]
    print(year, climate_specific_year["Maximum temperature (Degree C)"].isnull().sum())


2010 189
2011 61
2012 44
2013 5
2014 9
2015 90
2016 24
2017 30
2018 50
2019 168
2020 192
2021 126
2022 147
2023 207
2024 159
2025 37


Impute missing values using interpolation

In [236]:
#Use cubic interpolation to impute temperature
climate["Maximum temperature (Degree C)"] = climate["Maximum temperature (Degree C)"].interpolate(method='cubic')
climate["Minimum temperature (Degree C)"] = climate["Minimum temperature (Degree C)"].interpolate(method='cubic')
#Use linear interpolation to impute rainfall
climate["Rainfall amount (millimetres)"] = climate["Rainfall amount (millimetres)"].interpolate(method='linear')

#Optional: check missing values for each year
for year in range(2010, 2026):
    climate_specific_year = climate[climate["Year"] == year]
    print(year, climate_specific_year["Maximum temperature (Degree C)"].isnull().sum())


2010 144
2011 0
2012 0
2013 0
2014 0
2015 0
2016 0
2017 0
2018 0
2019 0
2020 0
2021 0
2022 0
2023 0
2024 0
2025 0


Create dataframes for each ski resort

In [239]:
#This function takes a row in the dataframe as input and returns the week of the skiing season it corresponds to. It returns NaN if the row falls outside of the ski season.
def assign_ski_week(row):
    year = row.date.year
    #Week 1 start
    start = pd.Timestamp(year, 6, 9) 
    #Week 15 end
    end   = pd.Timestamp(year, 9, 21)  

    if not (start <= row.date <= end):
        return np.nan
    
    #Days since 9 June
    days_offset = (row.date - start).days
    #Weeks since 9 June
    week = days_offset // 7 + 1
    return week

def createDataframe(resort_name, station_number):
    #Create visitation dataframe for just the specific skiing resort
    specific_visitation = visitation[["Year", "Week", resort_name]].rename(
        columns={resort_name: "visitation"}
    )
    
    #Create climate dataframe for just the specific station number
    specific_climate = climate[climate["Bureau of Meteorology station number"] == station_number].copy()
    
    #Build datetime column
    specific_climate["date"] = pd.to_datetime(specific_climate[["Year", "Month", "Day"]])
    
    #Apply the assign_ski_week function to each row in the climate dataframe
    specific_climate["Week"] = specific_climate.apply(assign_ski_week, axis=1)
    specific_climate["Year"] = specific_climate["date"].dt.year
    
    #Drop rows outside ski season
    specific_climate = specific_climate.dropna(subset=["Week"])
    
    #Calculate mean temperature statistics and the sum of of all snow
    weekly_climate = specific_climate.groupby(["Year", "Week"]).agg({
        "Maximum temperature (Degree C)": "mean",
        "Minimum temperature (Degree C)": "mean",
        "Rainfall amount (millimetres)": "sum"
    }).reset_index()
    
    #Rename columns for clarity
    weekly_climate = weekly_climate.rename(columns={
        "Maximum temperature (Degree C)": "maxtemp",
        "Minimum temperature (Degree C)": "mintemp",
        "Rainfall amount (millimetres)": "snow"
    })
    
    #Merge the visitation and climate dataframes
    specific_df = pd.merge(
        specific_visitation,
        weekly_climate,
        on=["Year", "Week"],
        how="left"
    )
    
    return specific_df

bawbaw = createDataframe("Mt. Baw Baw", 85291)
stirling = createDataframe("Mt. Stirling", 83024) #Mt Stirling uses Mt. Buller's station number
hotham = createDataframe("Mt. Hotham", 83085)
falls = createDataframe("Falls Creek", 83084)
buller = createDataframe("Mt. Buller", 83024)
selwyn = createDataframe("Selwyn", 72161)
thredbo = createDataframe("Thredbo", 71032)
perisher = createDataframe("Perisher", 71075)
charlotte = createDataframe("Charlotte Pass", 71032) #Charlotte Pass could use either 71032 or 71075, but 71032 has slightly less NaN values in its climate data

Checking nan values for the two closest weather stations to Charlotte Pass ski resort.

In [241]:
climate[climate["Bureau of Meteorology station number"] == 71032].isnull().sum()


Bureau of Meteorology station number    0
Year                                    0
Month                                   0
Day                                     0
Maximum temperature (Degree C)          0
Minimum temperature (Degree C)          0
Rainfall amount (millimetres)           0
dtype: int64

In [242]:
climate[climate["Bureau of Meteorology station number"] == 71075].isnull().sum()

Bureau of Meteorology station number      0
Year                                      0
Month                                     0
Day                                       0
Maximum temperature (Degree C)          144
Minimum temperature (Degree C)          145
Rainfall amount (millimetres)           145
dtype: int64

View the dataframes

In [244]:
bawbaw

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,555.0,4.957143,1.100000,24.4
1,2014.0,2.0,804.0,5.485714,0.814286,21.0
2,2014.0,3.0,993.0,1.042857,-1.814286,83.3
3,2014.0,4.0,2976.0,2.528571,-0.957143,82.0
4,2014.0,5.0,11112.0,0.685714,-2.257143,82.6
...,...,...,...,...,...,...
160,2024.0,11.0,5977.0,6.157143,1.296722,26.2
161,2024.0,12.0,3597.0,5.428571,0.928571,23.6
162,2024.0,13.0,1500.0,7.142857,-0.100000,50.6
163,2024.0,14.0,0.0,5.628571,0.261872,45.8


In [245]:
stirling

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,60.0,4.271429,-0.014286,19.0
1,2014.0,2.0,42.0,5.257143,-0.342857,47.6
2,2014.0,3.0,30.0,0.385714,-2.514286,133.6
3,2014.0,4.0,165.0,0.957143,-1.857143,80.0
4,2014.0,5.0,645.0,-0.285714,-3.400000,71.4
...,...,...,...,...,...,...
160,2024.0,11.0,1416.0,4.885714,0.828571,44.2
161,2024.0,12.0,701.0,4.257143,0.157143,43.6
162,2024.0,13.0,213.0,6.100000,-0.671429,44.4
163,2024.0,14.0,0.0,5.728571,-0.571429,26.4


In [246]:
hotham

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,3483.0,3.457143,-1.142857,42.0
1,2014.0,2.0,1253.0,3.000000,-1.257143,13.0
2,2014.0,3.0,2992.0,-0.900000,-3.285714,23.0
3,2014.0,4.0,9680.0,-0.985714,-3.471429,0.6
4,2014.0,5.0,29628.0,-1.842857,-4.000000,3.2
...,...,...,...,...,...,...
160,2024.0,11.0,24260.0,3.471429,-0.057143,51.0
161,2024.0,12.0,19289.0,2.871429,-0.614286,103.0
162,2024.0,13.0,14487.0,4.714286,-1.357143,36.2
163,2024.0,14.0,2700.0,4.628571,-1.000000,29.6


In [250]:
falls

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,2790.0,4.600000,-0.300000,23.8
1,2014.0,2.0,1425.0,4.414286,-0.285714,7.6
2,2014.0,3.0,2101.0,0.100000,-2.828571,69.6
3,2014.0,4.0,9544.0,1.385714,-2.685714,24.2
4,2014.0,5.0,26211.0,-0.728571,-3.757143,13.6
...,...,...,...,...,...,...
160,2024.0,11.0,38541.0,3.874503,0.433813,49.2
161,2024.0,12.0,42101.0,3.214286,-0.157143,75.0
162,2024.0,13.0,25252.0,5.771429,-0.514286,39.6
163,2024.0,14.0,17137.0,5.900000,-0.585714,22.2


In [254]:
buller

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,8296.0,4.271429,-0.014286,19.0
1,2014.0,2.0,1987.0,5.257143,-0.342857,47.6
2,2014.0,3.0,2413.0,0.385714,-2.514286,133.6
3,2014.0,4.0,18831.0,0.957143,-1.857143,80.0
4,2014.0,5.0,49217.0,-0.285714,-3.400000,71.4
...,...,...,...,...,...,...
160,2024.0,11.0,43387.0,4.885714,0.828571,44.2
161,2024.0,12.0,36668.0,4.257143,0.157143,43.6
162,2024.0,13.0,15415.0,6.100000,-0.671429,44.4
163,2024.0,14.0,0.0,5.728571,-0.571429,26.4


In [257]:
selwyn

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,1041.0,6.828571,1.571429,29.2
1,2014.0,2.0,383.0,6.714286,1.528571,14.4
2,2014.0,3.0,597.0,1.671429,-0.857143,109.0
3,2014.0,4.0,2877.0,3.142857,-0.757143,60.2
4,2014.0,5.0,8588.0,1.271429,-2.257143,44.2
...,...,...,...,...,...,...
160,2024.0,11.0,6859.0,8.600000,2.628571,22.4
161,2024.0,12.0,5969.0,7.242857,1.928571,27.2
162,2024.0,13.0,3351.0,9.301800,2.085714,20.1
163,2024.0,14.0,1177.0,8.728571,2.142857,24.2


In [259]:
thredbo

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,5535.0,4.714286,-2.000000,40.6
1,2014.0,2.0,2090.0,2.657143,-1.685714,8.0
2,2014.0,3.0,3216.0,-1.028571,-4.114286,4.2
3,2014.0,4.0,15497.0,-1.314286,-4.357143,0.5
4,2014.0,5.0,46546.0,-1.685714,-5.185714,3.0
...,...,...,...,...,...,...
160,2024.0,11.0,37608.0,3.500000,-0.700000,29.8
161,2024.0,12.0,33208.0,2.500000,-1.171429,69.4
162,2024.0,13.0,18120.0,4.614286,-0.700000,23.6
163,2024.0,14.0,6270.0,5.857143,-1.242857,11.4


In [261]:
perisher

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,7370.0,6.328571,-1.585714,62.0
1,2014.0,2.0,2751.0,5.257143,-1.857143,13.8
2,2014.0,3.0,4255.0,0.885714,-2.457143,220.6
3,2014.0,4.0,20265.0,2.942857,-2.457143,53.8
4,2014.0,5.0,61339.0,1.185714,-3.757143,39.0
...,...,...,...,...,...,...
160,2024.0,11.0,49136.0,6.300000,0.085714,29.4
161,2024.0,12.0,43382.0,4.585714,0.671429,99.2
162,2024.0,13.0,23908.0,7.985714,0.671429,41.0
163,2024.0,14.0,8244.0,8.971429,-0.728571,5.4


In [263]:
charlotte

Unnamed: 0,Year,Week,visitation,maxtemp,mintemp,snow
0,2014.0,1.0,408.0,4.714286,-2.000000,40.6
1,2014.0,2.0,151.0,2.657143,-1.685714,8.0
2,2014.0,3.0,230.0,-1.028571,-4.114286,4.2
3,2014.0,4.0,1134.0,-1.314286,-4.357143,0.5
4,2014.0,5.0,3403.0,-1.685714,-5.185714,3.0
...,...,...,...,...,...,...
160,2024.0,11.0,2667.0,3.500000,-0.700000,29.8
161,2024.0,12.0,2392.0,2.500000,-1.171429,69.4
162,2024.0,13.0,1323.0,4.614286,-0.700000,23.6
163,2024.0,14.0,454.0,5.857143,-1.242857,11.4
