In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_absolute_percentage_error, r2_score

In [9]:
DATASETS = {
    "AAPL": "data/AAPL/AAPL_data.csv",
    "BTC": "data/BTC/BTC_data.csv",
    "ETH": "data/ETH/ETH_data.csv",
    "SPY": "data/SPY/SPY_data.csv"
}

FORECAST_INTERVALS = {
    "1_tydzień": {"AAPL": 16 * 7, "BTC": 24 * 7, "ETH": 24 * 7, "SPY": 17 * 7}, # 112 168 168 119
    "2_tygodnie": {"AAPL": 16 * 14, "BTC": 24 * 14, "ETH": 24 * 14, "SPY": 17 * 14}, # 224 336 336 238
    "3_tygodnie": {"AAPL": 16 * 21, "BTC": 24 * 21, "ETH": 24 * 21, "SPY": 17 * 21},
    "1_miesiąc": {"AAPL": 320, "BTC": 744, "ETH": 744, "SPY": 340},
    "2_miesiące": {"AAPL": 320*2, "BTC": 744*2, "ETH": 744*2, "SPY": 340*2},
    "3_miesiące": {"AAPL": "test_len", "BTC": "test_len", "ETH": "test_len", "SPY": "test_len"} # 960 2160 2160 1020
}

range_name_map = {
        "1_tydzień": "tyg",
        "2_tygodnie": "2tyg",
        "3_tygodnie": "3tyg",
        "1_miesiąc": "msc",
        "2_miesiące": "2msc",
        "3_miesiące": "3msc"
    }

In [10]:
def train_holt_winters(data, seasonal_periods=24):
    if len(data) < 2 * seasonal_periods:
        print(f"Za mało danych na sezonowość ({len(data)} < {2 * seasonal_periods}), używam tylko trendu.")
        model = ExponentialSmoothing(data, trend='add', seasonal=None)
    else:
        model = ExponentialSmoothing(data, trend='add', seasonal='add', seasonal_periods=seasonal_periods)
    return model.fit()


def plot_prediction(real, pred, history, title):
    plt.figure(figsize=(10, 4))
    plt.plot(range(len(history)), history, label='Historia', linestyle='--')
    plt.plot(range(len(history), len(history) + len(real)), real, label='Rzeczywiste', marker='o')
    plt.plot(range(len(history), len(history) + len(pred)), pred, label='Predykcja', marker='x')
    plt.title(title)
    plt.xlabel('Notowanie')
    plt.ylabel('Cena "close"')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    #plt.show()

In [None]:
if __name__ == "__main__":
    for name, path in DATASETS.items():
        print(f"\n================= {name} =================\n")
        data = pd.read_csv(path, parse_dates=['timestamp'])
        data = data.sort_values('timestamp')
        data = data.set_index('timestamp')

        test_len = (data.index.year == 2025).sum()
        train = data.iloc[:-test_len]
        test = data.iloc[-test_len:]

        forecast_ranges = {}
        for label, source in FORECAST_INTERVALS.items():
            val = source[name]
            forecast_ranges[label] = test_len if val == "test_len" else val

        best_results = {}

        for range_name, forecast_len in forecast_ranges.items():
            train_series = train['close']
            available_len = min(forecast_len, len(test))
            test_series = test['close'][:available_len]

            if len(train_series) < 10 or len(test_series) < 2:
                print(f"Pominięto {name} - {range_name}: za mało danych")
                continue

            try:
                model = train_holt_winters(train_series)
                forecast = model.forecast(steps=available_len)

                mape = mean_absolute_percentage_error(test_series, forecast) * 100
                r2 = r2_score(test_series, forecast)

                best_results[range_name] = {
                    "mape": mape,
                    "r2": r2,
                    "real": test_series.values,
                    "pred": forecast.values,
                    "history": train_series.values[-forecast_len:]
                }

                print(f"{range_name} | MAPE = {mape:.2f}% | R² = {r2:.4f}")
                plot_prediction(test_series.values, forecast.values, train_series.values[-forecast_len:],
                                f"{name} - {range_name}")

            except Exception as e:
                print(f"Błąd dla {name} - {range_name}: {e}")

        # ZAPIS PLIKÓW
        output_folder = f"wyniki_predykcjiHW/{name.lower()}"
        os.makedirs(output_folder, exist_ok=True)

        for range_name, res in best_results.items():
            okres = range_name_map.get(range_name, range_name)
            podmiot = name.lower()

            preds_filename = f"{podmiot}_{okres}_HW.txt"
            metrics_filename = f"{podmiot}_{okres}_HW_metrics.txt"

            np.savetxt(os.path.join(output_folder, preds_filename), res["pred"], fmt="%.6f")
            with open(os.path.join(output_folder, metrics_filename), "w") as f:
                f.write(f"{res['mape']:.4f}\n")
                f.write(f"{res['r2']:.4f}\n")