In [1]:
import os
from pprint import pprint
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# =========================
# Wczytanie i przygotowanie danych
# =========================

df = pd.read_csv(
    "european_capitals_history_clean.csv",
    parse_dates=["date"]
)

df_city = df[df['city'] == 'Warszawa']
df_city = df_city[['date', 'city', 'tavg']]
df_city.sort_values(by=["date"], inplace=True)

# =========================
# Tworzenie sekwencji czasowych
# =========================

def create_sequences(df, window=5):
    sequences = []
    labels = []

    for i in range(len(df) - window):
        seq = df.iloc[i:i + window]['tavg'].values
        label = df.iloc[i + window]['tavg']  # Temperatura na kolejny dzień

        sequences.append(seq)
        labels.append(label)

    return np.array(sequences), np.array(labels)


sequences, labels = create_sequences(df_city, window=5)

# =========================
# Normalizacja danych
# =========================

scaler = MinMaxScaler()

sequences_scaled = scaler.fit_transform(
    sequences.reshape(-1, 1)
).reshape(sequences.shape)

labels_scaled = scaler.transform(labels.reshape(-1, 1))

# Reshape do LSTM: (samples, time_steps, features)
sequences_scaled = sequences_scaled.reshape(
    (sequences_scaled.shape[0], sequences_scaled.shape[1], 1)
)

# =========================
# Podział na zbiór treningowy i testowy
# =========================

train_size = int(len(sequences_scaled) * 0.8)

X_train = sequences_scaled[:train_size]
X_test  = sequences_scaled[train_size:]

y_train = labels_scaled[:train_size]
y_test  = labels_scaled[train_size:]

# =========================
# Budowa modelu LSTM
# =========================

model = Sequential()
model.add(
    LSTM(
        units=50,
        return_sequences=False,
        input_shape=(X_train.shape[1], 1)
    )
)
model.add(Dense(1))

model.compile(
    optimizer='adam',
    loss='mean_squared_error'
)

# =========================
# Trenowanie modelu
# =========================

history = model.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test)
)
# =========================
# Zapis modelu do folderu "models"
# =========================

model_dir = "models"
model_path = os.path.join(model_dir, "lstm_warszawa_temperature.keras")

os.makedirs(model_dir, exist_ok=True)
model.save(model_path)

print(f"Model zapisany w lokalizacji: {model_path}")



# =========================
# Wizualizacja funkcji straty
# =========================

plt.plot(history.history['loss'], label='Trening')
plt.plot(history.history['val_loss'], label='Walidacja')
plt.legend()
plt.title("Strata modelu podczas treningu")
plt.xlabel("Epoka")
plt.ylabel("Strata")
plt.show()

# =========================
# Predykcja na zbiorze testowym
# =========================

predictions = model.predict(X_test)

predictions_rescaled = scaler.inverse_transform(predictions)
y_test_rescaled = scaler.inverse_transform(y_test)

plt.plot(y_test_rescaled, label='Rzeczywiste wartości')
plt.plot(predictions_rescaled, label='Prognozy')
plt.legend()
plt.title("Porównanie prognoz i rzeczywistych wartości")
plt.xlabel("Próbka")
plt.ylabel("Średnia temperatura")
plt.show()

# =========================
# Prognoza na 5 dni w przyszłość
# =========================

future_predictions = []
input_sequence = X_test[-1]  # (5, 1)

for _ in range(5):
    pred_scaled = model.predict(
        input_sequence.reshape(1, 5, 1)
    )

    future_predictions.append(pred_scaled[0][0])

    # Przesunięcie okna czasowego
    input_sequence = np.roll(input_sequence, -1)
    input_sequence[-1] = pred_scaled

future_predictions_rescaled = scaler.inverse_transform(
    np.array(future_predictions).reshape(-1, 1)
)

print("Prognozy na 5 dni do przodu (średnia temperatura):")
print(future_predictions_rescaled)


In [2]:
print(model.summary)

In [7]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# =========================
# Funkcja budująca model LSTM
# =========================

def build_lstm_model(input_shape):
    model = Sequential()
    model.add(
        LSTM(
            units=50,
            return_sequences=False,
            input_shape=input_shape
        )
    )
    model.add(Dense(1))
    model.compile(
        optimizer='adam',
        loss='mean_squared_error'
    )
    return model

# =========================
# Funkcja MAPE
# =========================



# =========================
# Time Series Cross-Validation
# =========================

tscv = TimeSeriesSplit(n_splits=5)

rmse_scores = []
mae_scores  = []
mse_scores  = []
r2_scores   = []
mape_scores = []

fold = 1

for train_index, test_index in tscv.split(sequences_scaled):
    print(f"\nFold {fold}")

    X_train = sequences_scaled[train_index]
    X_test  = sequences_scaled[test_index]

    y_train = labels_scaled[train_index]
    y_test  = labels_scaled[test_index]

    model = build_lstm_model(
        input_shape=(X_train.shape[1], 1)
    )

    model.fit(
        X_train,
        y_train,
        epochs=20,
        batch_size=32,
        verbose=0,
        shuffle=False
    )

    y_pred = model.predict(X_test)

    # Odwrócenie skalowania
    y_test_rescaled = scaler.inverse_transform(y_test)
    y_pred_rescaled = scaler.inverse_transform(y_pred)

    mse  = mean_squared_error(y_test_rescaled, y_pred_rescaled)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
    r2   = r2_score(y_test_rescaled, y_pred_rescaled)


    mse_scores.append(mse)
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    r2_scores.append(r2)
  

    print(
        f"R²: {r2:.4f} | "
        f"MAE: {mae:.4f} | "
        f"MSE: {mse:.4f} | "
        f"RMSE: {rmse:.4f} | "
      
    )

    fold += 1

# =========================
# Podsumowanie
# =========================

print("\nWalk-Forward Cross-Validation — LSTM")
print(f"R²   : {np.mean(r2_scores):.4f}")
print(f"MAE  : {np.mean(mae_scores):.4f}")
print(f"MSE  : {np.mean(mse_scores):.4f}")
print(f"RMSE : {np.mean(rmse_scores):.4f}")



In [8]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# =========================
# Obliczenie residuals
# =========================
residuals = y_test_rescaled - predictions_rescaled

# =========================
# Wykres residuals w czasie
# =========================
plt.figure(figsize=(12,6))
plt.plot(df_city['date'].iloc[-len(residuals):], residuals, marker='o', linestyle='-', color='b')
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Residuals modelu LSTM — błąd predykcji w czasie')
plt.xlabel('Data')
plt.ylabel('Błąd (°C)')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# =========================
# Histogram residuals
# =========================
plt.figure(figsize=(10,6))
sns.histplot(residuals, kde=True, color='b')
plt.title('Rozkład residuals modelu LSTM')
plt.xlabel('Błąd predykcji (°C)')
plt.ylabel('Częstość')
plt.grid(True)
plt.show()
