In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras import Input
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score
import os

In [7]:
DATASETS = {
    "AAPL": "data/AAPL/AAPL_data.csv",
    "BTC": "data/BTC/BTC_data.csv",
    "ETH": "data/ETH/ETH_data.csv",
    "SPY": "data/SPY/SPY_data.csv"
}

In [8]:
SEQ_LENGTHS = list(range(100, 1501, 100))

FORECAST_INTERVALS = {
    "1_tydzień": {"AAPL": 16 * 7, "BTC": 24 * 7, "ETH": 24 * 7, "SPY": 17 * 7},                 # 112 168 168 119
    "2_tygodnie": {"AAPL": 16 * 14, "BTC": 24 * 14, "ETH": 24 * 14, "SPY": 17 * 14},            # 224 336 336 238
    "3_tygodnie": {"AAPL": 16 * 21, "BTC": 24 * 21, "ETH": 24 * 21, "SPY": 17 * 21},
    "1_miesiąc": {"AAPL": 320, "BTC": 744, "ETH": 744, "SPY": 340},                             # 320 744 744 340
    "2_miesiące": {"AAPL": 320*2, "BTC": 744*2, "ETH": 744*2, "SPY": 340*2},            # 320+304 744+672 744+672 340+323
    "3_miesiące": {"AAPL": "test_len", "BTC": "test_len", "ETH": "test_len", "SPY": "test_len"} # 960 2160 2160 1020
}

In [9]:
def get_data(file, tail=None, custom_test_length=None):
    data = pd.read_csv(file, parse_dates=['timestamp'])
    if tail is not None:
        data = data.tail(tail)
    if custom_test_length is not None:
        test_length = custom_test_length
    else:
        test_length = (data['timestamp'].dt.year == 2025).sum()
    msk = (data.index < len(data) - test_length)
    df_train = data[msk].copy()
    df_test = data[~msk].copy()
    return data, df_train, df_test

def create_mlp_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len].flatten())
        y.append(data[i+seq_len][0])
    return np.array(X), np.array(y)

def build_model(seq_len):
    model = Sequential([
        Input(shape=(seq_len,)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def plot_prediction(real, pred, history, title):
    plt.figure(figsize=(10, 4))
    plt.plot(range(len(history)), history, label='Historia', linestyle='--')
    plt.plot(range(len(history), len(history) + len(real)), real, label='Rzeczywiste', marker='o')
    plt.plot(range(len(history), len(history) + len(pred)), pred, label='Predykcja', marker='x')
    plt.title(title)
    plt.xlabel('Notowanie')
    plt.ylabel('Cena "close"')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:


all_results = []

for name, path in DATASETS.items():
    print(f"\n================= {name} =================\n")
    data, df_train, df_test = get_data(path)
    test_len = len(df_test)
    scaler_train = MinMaxScaler()
    train_scaled = scaler_train.fit_transform(df_train[['close']])
    test_values = df_test[['close']].values

    forecast_ranges = {}
    for label, source in FORECAST_INTERVALS.items():
        val = source[name]
        forecast_ranges[label] = test_len if val == "test_len" else val

    results_table = []
    best_results = {}

    for seq_len in SEQ_LENGTHS:
        print(seq_len)
        X_train, y_train = create_mlp_sequences(train_scaled, seq_len)
        model = build_model(seq_len)
        model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

        initial_input = train_scaled[-seq_len:].flatten().tolist()
        pred_scaled = []
        input_seq = initial_input.copy()

        for _ in range(test_len):
            pred = model.predict(np.array(input_seq).reshape(1, seq_len), verbose=0)[0][0]
            pred_scaled.append(pred)
            input_seq = input_seq[1:] + [pred]

        pred_rescaled = scaler_train.inverse_transform(np.array(pred_scaled).reshape(-1, 1))

        for range_name, range_len in forecast_ranges.items():
            if range_len > len(pred_rescaled):
                continue
            pred_slice = pred_rescaled[:range_len].flatten()
            real_slice = test_values[:range_len].flatten()
            history_slice = df_train['close'].values[-range_len:]

            mape = np.mean(np.abs((real_slice - pred_slice) / real_slice)) * 100
            r2 = r2_score(real_slice, pred_slice)

            results_table.append({
                "Instrument": name,
                "Zakres": range_name,
                "Seq_len": seq_len,
                "MAPE": mape,
                "R2": r2
            })

            if (range_name not in best_results) or (mape < best_results[range_name]['mape']):
                best_results[range_name] = {
                    'seq_len': seq_len,
                    'mape': mape,
                    'r2': r2,
                    'real': real_slice,
                    'pred': pred_slice,
                    'history': history_slice
                }

    df_results = pd.DataFrame(results_table)
    print(f"\nTabela wyników dla {name} (wszystkie zakresy i seq_len - MSE_%):\n")
    print(df_results.pivot_table(index='Seq_len', columns='Zakres', values='MAPE').round(2))



    print(f"\nNajlepsze modele dla {name} (najniższe MSE):\n")
    for range_name, res in best_results.items():
        print(f"{range_name} | seq_len = {res['seq_len']} | MAPE = {res['mape']:.2f}% | R2 = {res['r2']:.4f}")
        plot_prediction(res['real'], res['pred'], res['history'], f"{name} - {range_name} (seq_len={res['seq_len']})")



    # ZAPIS PLIKÓW Z PREDYKCJAMI I METRYKAMI

    range_name_map = {
        "1_tydzień": "tyg",
        "2_tygodnie": "2tyg",
        "3_tygodnie": "3tyg",
        "1_miesiąc": "msc",
        "2_miesiące": "2msc",
        "3_miesiące": "3msc"
    }

    output_folder = f"wyniki_predykcji/{name.lower()}"
    os.makedirs(output_folder, exist_ok=True)

    for range_name, res in best_results.items():
        okres = range_name_map.get(range_name, range_name)
        podmiot = name.lower()

        # ZAPIS PREDYKCJI
        preds_filename = f"{podmiot}_{okres}_MLP.txt"
        preds_filepath = os.path.join(output_folder, preds_filename)
        np.savetxt(preds_filepath, res["pred"], fmt="%.6f")

        # ZAPIS METRYK
        metrics_filename = f"{podmiot}_{okres}_MLP_metrics.txt"
        metrics_filepath = os.path.join(output_folder, metrics_filename)
        with open(metrics_filepath, "w") as f:
            f.write(f"{res['mape']:.4f}\n")
            f.write(f"{res['r2']:.4f}\n")
    all_results.append(df_results)