Here’s an example using RandomForestClassifier to predict the two-week threshold.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the snow data (example format)
data = pd.read_csv("snow_data.csv")

# Create rolling sum features (14-day for 20 cm threshold)
data['14_day_sum'] = data.groupby('Venue')['Snowfall (cm)'].rolling(window=14).sum().reset_index(0, drop=True)

# Create binary target for 14-day sum >= 20 cm (for two-week threshold)
data['target_two_week'] = (data['14_day_sum'] >= 20).astype(int)

# Drop rows with NaN values (e.g., first 13 days will have NaN for the rolling sum)
data.dropna(inplace=True)

# Select features and target for training (here using the rolling sum as a feature)
X = data[['Snowfall (cm)', '14_day_sum']]
y = data['target_two_week']

# Split the data into training and test sets (chronological order for time series)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train the Random Forest model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

To predict whether a location will have two continuous weeks (14+ days) of at least 20 cm of snow each year

In [None]:
import pandas as pd

# Load snow data (Date, Snow Depth in cm)
df = pd.read_csv("snow_data.csv")

# Convert Date to datetime format
df["Date"] = pd.to_datetime(df["Date"])

# Sort Data by Date
df = df.sort_values("Date")

# Create a rolling window for 14 days (continuous snow depth ≥ 20 cm)
df["Snow_14Day_Streak"] = df["Snow Depth (cm)"].rolling(window=14, min_periods=14).apply(lambda x: (x >= 20).all())
# .rolling(window=14, min_periods=14): This creates a rolling window of size 14, meaning it will look at groups of 14 consecutive values in the "Snow Depth (cm)" column.



# Create Yearly Labels (1 = At least one 14-day snow streak in a year)
df["Year"] = df["Date"].dt.year
yearly_labels = df.groupby("Year")["Snow_14Day_Streak"].max().reset_index()
yearly_labels.rename(columns={"Snow_14Day_Streak": "Target"}, inplace=True)

# Merge Labels Back to Dataset
df = df.merge(yearly_labels, on="Year", how="left")

from sklearn.model_selection import train_test_split

# Select Features (X) and Target (Y)
features = ["Avg Snow Depth", "Avg Temperature", "Total Precipitation"]
X = df.groupby("Year")[features].mean()  # Aggregate yearly data
y = df.groupby("Year")["Target"].max()   # Target: 1 if 14+ days of 20 cm snow, else 0

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Train the Model
model = HistGradientBoostingClassifier()
model.fit(X_train, y_train)

# Make Predictions
y_pred = model.predict(X_test)

# Evaluate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)

future_data = pd.DataFrame({
    "Avg Snow Depth": [30],   # Example: Forecasted avg snow depth
    "Avg Temperature": [-5],  # Example: Forecasted avg temp
    "Total Precipitation": [50] # Example: Forecasted precipitation
})

prediction = model.predict(future_data)
print("Will there be 2 weeks of 20+ cm snow?", "Yes" if prediction[0] else "No")

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Lataa data
data = pd.read_csv("lumidata.csv")

# Laske liikkuva summa 14 päivän ajalta
data['14_day_sum'] = data['Lumimäärä (cm)'].rolling(window=14).sum()

# Luodaan uusi kolumni, jossa on "target" (1 jos alle 20 cm, muuten 0)
data['target'] = (data['14_day_sum'] <= 20).astype(int)

# Poista NaN-arvot (saattaa tulla ensimmäisiltä 13 päivältä)
data.dropna(inplace=True)

# Määritä piirteet ja target
X = data[['Lumimäärä (cm)', '14_day_sum']]  # Esimerkki piirteistä
y = data['target']

# Jaa data koulutus- ja testausosioihin
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Kouluta satunnaismetsämalli
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Ennusta testidatalla
y_pred = model.predict(X_test)

# Arvioi suorituskyky
accuracy = accuracy_score(y_test, y_pred)
print(f'Mallin tarkkuus: {accuracy:.2f}')