# 🌤️ Weather Pipeline - Säädatan koneoppimispipeline

Tässä notebookissa rakennetaan säädatan hakuun ja analysointiin perustuva koneoppimispipeline.

## 1. Datan haku Open-Meteo API:sta

In [None]:
import pandas as pd
import requests
import os

lat, lon = 60.1699, 24.9384
start_date = "2013-01-01"
end_date = "2023-12-31"

url = (
    f"https://archive-api.open-meteo.com/v1/archive?"
    f"latitude={lat}&longitude={lon}"
    f"&start_date={start_date}&end_date={end_date}"
    f"&daily=temperature_2m_max,temperature_2m_min,precipitation_sum"
    f"&timezone=Europe/Helsinki"
)

response = requests.get(url)
data = response.json()

df = pd.DataFrame(data["daily"])
df["time"] = pd.to_datetime(df["time"])
df.set_index("time", inplace=True)

os.makedirs("data", exist_ok=True)
df.to_csv("data/weather_raw.csv")
df.head()

## 2. Datan esikäsittely

In [None]:
df["temp_avg"] = (df["temperature_2m_max"] + df["temperature_2m_min"]) / 2
df["temp_avg_t-1"] = df["temp_avg"].shift(1)
df["temp_avg_t-2"] = df["temp_avg"].shift(2)
df["temp_avg_t-3"] = df["temp_avg"].shift(3)

df = df.dropna()
df.to_csv("data/weather_processed.csv")
df.tail()

## 3. Mallin koulutus

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X = df[["temp_avg_t-1", "temp_avg_t-2", "temp_avg_t-3"]]
y = df["temp_avg"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = LinearRegression()
model.fit(X_train, y_train)

## 4. Mallin arviointi

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f} °C")

plt.figure(figsize=(12, 5))
plt.plot(y_test.index, y_test, label="Todellinen")
plt.plot(y_test.index, y_pred, label="Ennuste", linestyle="--")
plt.title("Keskilämpötila: Todellinen vs Ennuste")
plt.legend()
plt.grid(True)

os.makedirs("outputs", exist_ok=True)
plt.savefig("outputs/temperature_forecast.png")
plt.show()

## 5. Mallin tallennus

In [None]:
import pickle

with open("outputs/weather_model.pkl", "wb") as f:
    pickle.dump(model, f)