Select for best 5 combinations for rnn

In [None]:
import pandas as pd
import numpy as np
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

# === Excel einlesen ===
file_path = r"C:\Users\41799\Desktop\Kopie von market_data.xlsx"
df = pd.read_excel(file_path)

# === Zielvariable & CNN-geeignete Feature-Kandidaten ===
target_col = "_MKT"
allowed_features = [
    "EMP",       # Beschäftigungstrend
    "GDP",       # Wirtschaftswachstum
    "UN",        # Arbeitslosigkeit
    "CPI",       # Inflation
    "M2",        # Geldmengenwachstum
    "Y02",       # Kurzfristige Rendite
    "Y10",       # Langfristige Rendite
    "STP",       # Steilheit Zinskurve
    "IR",        # Nominalzins
    "RR",        # Realzins
    "MOV",       # Volatilität
    "NYF",       # New York Fed Index
    "_TY",       # Treasury Markt
    "_OIL",      # Ölpreis
    "_DXY",      # Dollar Index
    "_LCP",      # Large Cap Index
    "_AU"        # Goldpreis
]

# === Datum verarbeiten ===
if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values("Date")
    df = df.set_index("Date")

# === Nur numerische Daten & Normalisieren ===
df = df.select_dtypes(include=["number"]).dropna()
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# === Split: 15% Training, Rest Validierung ===
split_index = int(len(df_scaled) * 0.15)
train_df = df_scaled[:split_index]
val_df = df_scaled[split_index:]

# === Zeitreihen-Daten generieren ===
def create_dataset(X, y, seq_len=5):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i + seq_len])
        ys.append(y[i + seq_len])
    return np.array(Xs), np.array(ys)

# === RNN (LSTM) testen mit allen 3er-Kombinationen ===
results = []
for combo in combinations(allowed_features, 3):
    combo = list(combo)
    try:
        X_train, y_train = create_dataset(train_df[combo].values, train_df[target_col].values)
        X_val, y_val = create_dataset(val_df[combo].values, val_df[target_col].values)

        model = tf.keras.Sequential([
            tf.keras.layers.LSTM(64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer="adam", loss="mse")
        early_stop = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

        history = model.fit(X_train, y_train,
                            validation_data=(X_val, y_val),
                            epochs=50,
                            batch_size=16,
                            verbose=0,
                            callbacks=[early_stop])

        val_loss = min(history.history["val_loss"])
        results.append((combo, val_loss))
        print(f"✅ Getestet (LSTM): {combo} | val_loss: {val_loss:.5f}")

    except Exception as e:
        print(f"⚠️ Fehler bei Kombination {combo}: {str(e)}")

# === Beste 5 Kombinationen anzeigen ===
results.sort(key=lambda x: x[1])
print("\n🏆 Beste 5 Kombinationen mit genau 3 Features (LSTM):")
for i, (combo, loss) in enumerate(results[:5], 1):
    print(f"{i}. {combo} ➞ val_loss: {loss:.5f}")

besz results  
1. ['GDP', '_TY', '_DXY'] ➞ val_loss: 0.06024
2. ['CPI', '_TY', '_LCP'] ➞ val_loss: 0.06063
3. ['UN', 'Y02', '_TY'] ➞ val_loss: 0.06120
4. ['_TY', '_DXY', '_LCP'] ➞ val_loss: 0.06159
5. ['Y02', '_TY', '_DXY'] ➞ val_loss: 0.06295


best arhitecture for each result

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from itertools import product
import random

# === Reproduzierbarkeit ===
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# === Daten einlesen ===
df = pd.read_excel(r"C:\Users\41799\Desktop\Kopie von market_data.xlsx")
df = df.dropna()
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)

df_numeric = df.select_dtypes(include=[np.number])
target_col = "_MKT"

# === Feature-Kombinationen ===
combinations_to_test = [
    ['GDP', '_TY', '_DXY'],       # ➞ val_loss: 0.06024
    ['CPI', '_TY', '_LCP'],       # ➞ val_loss: 0.06063
    ['UN', 'Y02', '_TY'],         # ➞ val_loss: 0.06120
    ['_TY', '_DXY', '_LCP'],      # ➞ val_loss: 0.06159
    ['Y02', '_TY', '_DXY']        # ➞ val_loss: 0.06295
]


# === WindowGenerator ===
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, input_columns=None, label_columns=None, df_train=None):
        self.label_columns = label_columns
        self.input_columns = input_columns
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.label_start = self.total_window_size - self.label_width

        if df_train is not None:
            self.train_input_indices = {name: i for i, name in enumerate(df_train.columns)}
            self.train_label_indices = {name: i for i, name in enumerate(df_train.columns)}

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.label_start:, :]
        if self.input_columns:
            inputs = tf.stack([inputs[:, :, self.train_input_indices[name]] for name in self.input_columns], axis=-1)
        if self.label_columns:
            labels = tf.stack([labels[:, :, self.train_label_indices[name]] for name in self.label_columns], axis=-1)
        return inputs, labels

    def make_dataset(self, data, shuffle=False, batchsize=64):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            sampling_rate=1,
            shuffle=shuffle,
            batch_size=batchsize
        )
        return ds.map(self.split_window)

# === Hyperparameter-Space erweitern ===
hyperparams = list(product(
    [10, 20, 30, 45],
    [(32, 64, 128), (64, 64, 64), (128, 64, 32)],
    [(0.1, 0.3), (0.2, 0.4), (0.3, 0.5)],
    [32, 64, 128, 256]
))
hyperparams = random.sample(hyperparams, 40)

# === Ergebnisliste ===
final_results = []

# === Loop über Feature-Kombinationen ===
for features in combinations_to_test:
    print(f"\n🧪 Testing: {features}")
    selected_cols = features + [target_col]
    data = df_numeric[selected_cols].copy()
    scaler = MinMaxScaler()
    data_scaled = pd.DataFrame(scaler.fit_transform(data), columns=selected_cols)

    split = int(len(data_scaled) * 0.8)
    train_df = data_scaled[:split]
    val_df = data_scaled[split:]

    best_loss = np.inf
    best_corr = -1
    best_sharpe = -np.inf
    best_config = None

    for input_width, units, drops, dense in hyperparams:
        window = WindowGenerator(input_width=input_width, label_width=1, shift=1,
                                 input_columns=features, label_columns=[target_col], df_train=train_df)
        train_data = window.make_dataset(train_df, shuffle=True)
        val_data = window.make_dataset(val_df)

        model = Sequential([
            LSTM(units=units[0], return_sequences=True),
            BatchNormalization(),
            Dropout(drops[0]),
            LSTM(units=units[1], return_sequences=True),
            BatchNormalization(),
            Dropout(drops[1]),
            LSTM(units=units[2], return_sequences=False),
            BatchNormalization(),
            Dense(dense, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])

        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='mse')
        early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        history = model.fit(
            train_data,
            validation_data=val_data,
            epochs=50,
            callbacks=[early_stop],
            verbose=0
        )

        y_pred_val = model.predict(val_data)
        y_true_val = np.concatenate([y for x, y in val_data], axis=0)

        if y_pred_val.ndim == 3:
            y_pred_val = y_pred_val[:, -1, :]
        if y_true_val.ndim == 3:
            y_true_val = y_true_val[:, -1, :]

        corr, _ = pearsonr(np.ravel(y_true_val), np.ravel(y_pred_val))
        val_loss = min(history.history['val_loss'])

        # Lineare Regression zur Korrektur
        reg = LinearRegression().fit(y_pred_val.reshape(-1, 1), y_true_val.reshape(-1, 1))
        y_pred_corrected = reg.predict(y_pred_val.reshape(-1, 1))

        # Sharpe Ratio berechnen
        returns = y_true_val[1:] - y_true_val[:-1]
        position = np.sign(y_pred_corrected[1:] - y_pred_corrected[:-1])
        strategy_returns = position * returns
        sharpe_ratio = np.mean(strategy_returns) / (np.std(strategy_returns) + 1e-6)

        if sharpe_ratio > best_sharpe or (sharpe_ratio == best_sharpe and corr > best_corr):
            best_loss = val_loss
            best_corr = corr
            best_sharpe = sharpe_ratio
            best_config = (input_width, units, drops, dense)

    print(f"✅ Best Config: {best_config} | loss: {best_loss:.5f} | corr: {best_corr:.3f} | Sharpe: {best_sharpe:.3f}")
    final_results.append((features, best_loss, best_corr, best_config, best_sharpe))

# === Ergebnisse anzeigen (Sharpe-basiert sortiert) ===
final_results.sort(key=lambda x: (x[4], x[2]), reverse=True)
print("\n🏁 Best Feature Combinations:")
for i, (feat, loss, corr, cfg, sharpe) in enumerate(final_results, 1):
    print(f"{i}. {feat} ➔ val_loss: {loss:.5f} | corr: {corr:.3f} | Sharpe: {sharpe:.3f} | config: {cfg}")


FileNotFoundError: [Errno 2] No such file or directory: '/content/Kopie von market_data.xlsx'

1. ['_TY', '_DXY', '_LCP'] ➔ val_loss: 0.11024 | corr: 0.694 | Sharpe: 0.127 | config: (20, (128, 64, 32), (0.1, 0.3), 128)
2. ['CPI', '_TY', '_LCP'] ➔ val_loss: 0.25088 | corr: 0.876 | Sharpe: 0.126 | config: (20, (64, 64, 64), (0.2, 0.4), 256)
3. ['GDP', '_TY', '_DXY'] ➔ val_loss: 0.39701 | corr: 0.347 | Sharpe: 0.099 | config: (20, (32, 64, 128), (0.2, 0.4), 256)
4. ['UN', 'Y02', '_TY'] ➔ val_loss: 0.24832 | corr: 0.384 | Sharpe: 0.087 | config: (30, (128, 64, 32), (0.2, 0.4), 256)
5. ['Y02', '_TY', '_DXY'] ➔ val_loss: 0.31353 | corr: 0.523 | Sharpe: 0.061 | config: (30, (128, 64, 32), (0.1, 0.3), 64)

only number 1 and 2 are relevant because of sharp ratio and korelation

now thind the best traiding strategy for 1 and 2

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics.pairwise import cosine_similarity
import random
import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

# Load data
df = pd.read_excel(r"C:\\Users\\41799\\Desktop\\Kopie von market_data.xlsx")
df = df.dropna()
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)

# Parameters
target_col = '_MKT'
features = ['_TY', '_DXY', '_LCP']
hyperparam_space = [
    (20, (128, 64, 32), (0.1, 0.3), 128),
    (20, (64, 64, 64), (0.2, 0.4), 256),
    (15, (32, 64, 128), (0.2, 0.4), 256),
    (10, (32, 32, 32), (0.1, 0.2), 64)
]

class WindowGenerator:
    def __init__(self, input_width, label_width, shift, input_columns, label_columns, df_train):
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.label_start = self.total_window_size - self.label_width
        self.input_columns = input_columns
        self.label_columns = label_columns
        self.train_indices = {col: i for i, col in enumerate(df_train.columns)}

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.label_start:, :]
        inputs = tf.stack([inputs[:, :, self.train_indices[col]] for col in self.input_columns], axis=-1)
        labels = tf.stack([labels[:, :, self.train_indices[col]] for col in self.label_columns], axis=-1)
        return inputs, labels

    def make_dataset(self, data, batchsize=64):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            sampling_rate=1,
            shuffle=False,
            batch_size=batchsize)
        return ds.map(self.split_window)

# Strategy evaluation
def evaluate_strategy(y_true, y_pred):
    reg = LinearRegression().fit(y_pred, y_true)
    y_pred_corr = reg.predict(y_pred)
    returns = y_true[1:] - y_true[:-1]
    strategies = {
        'sign': np.sign(y_pred_corr[1:] - y_pred_corr[:-1]),
        'tanh': np.tanh(y_pred_corr[1:] - y_pred_corr[:-1]),
        'adaptive': np.clip((y_pred_corr[1:] - y_pred_corr[:-1]) * 5, -1, 1)
    }
    sharpes = {k: np.mean(v * returns) / (np.std(v * returns) + 1e-6) for k, v in strategies.items()}
    best_strat = max(sharpes, key=sharpes.get)
    return sharpes[best_strat], best_strat

# Pattern extractor
def extract_market_pattern(df_block):
    return df_block[features].rolling(5).mean().dropna().mean().values

# Scale data
df_scaled = pd.DataFrame(MinMaxScaler().fit_transform(df[features + [target_col]]), columns=features + [target_col])
df_scaled['Date'] = df['Date']

# Dynamisch Einheiten berechnen mit Mindestgröße 30
min_unit_size = 20
total_len = len(df_scaled)
max_units = total_len // min_unit_size
unit_size = total_len // 80

if unit_size < min_unit_size:
    raise ValueError(f"Nicht genug Daten. Mindestens {min_unit_size * 80} benötigt.")

train_units = 60
model_store = []

print("\n📊 Training Phase: 100 Units")
for i in range(train_units):
    df_unit = df_scaled.iloc[i * unit_size:(i + 1) * unit_size].copy()
    if len(df_unit) < min_unit_size:
        continue

    best_model, best_config, best_sharpe, best_corr, best_strat = None, None, -np.inf, None, None
    for config in random.sample(hyperparam_space, 4):
        input_width, units, drops, dense = config
        df_b = df_unit.drop(columns=['Date'])
        window = WindowGenerator(input_width, 1, 1, features, [target_col], df_b)
        ds = window.make_dataset(df_b)

        model = Sequential([
            LSTM(units=units[0], return_sequences=True),
            BatchNormalization(),
            Dropout(drops[0]),
            LSTM(units=units[1], return_sequences=True),
            BatchNormalization(),
            Dropout(drops[1]),
            LSTM(units=units[2], return_sequences=False),
            BatchNormalization(),
            Dense(dense, activation='relu'),
            Dropout(0.2),
            Dense(1)
        ])

        model.compile(optimizer=tf.keras.optimizers.Adam(0.0005), loss='mse')
        model.fit(ds, validation_data=ds, epochs=20,
                  callbacks=[EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)], verbose=0)

        y_pred = model.predict(ds)
        y_true = np.concatenate([y for x, y in ds], axis=0)

        if y_pred.ndim == 3:
            y_pred = y_pred[:, -1, :]
        if y_true.ndim == 3:
            y_true = y_true[:, -1, :]

        corr, _ = pearsonr(np.ravel(y_true), np.ravel(y_pred))
        sharpe, strat = evaluate_strategy(y_true, y_pred)

        if sharpe > best_sharpe:
            best_model, best_config, best_sharpe, best_corr, best_strat = model, config, sharpe, corr, strat

    if best_model:
        pattern_vector = extract_market_pattern(df_unit.drop(columns=['Date']))
        model_store.append({
            'unit': i,
            'model': best_model,
            'pattern': pattern_vector,
            'config': best_config,
            'strategy': best_strat
        })
        print(f"✅ Unit {i + 1}: Sharpe={best_sharpe:.3f} | Corr={best_corr:.3f} | Strategy={best_strat} | Config={best_config}")

print(f"\n📦 Anzahl gespeicherter Modelle: {len(model_store)}")

# Test Phase
print("\n🧪 Test Phase: Matching Models to Segments")
test_df = df_scaled.iloc[train_units * unit_size:].reset_index(drop=True)
segment_size = unit_size
for start in range(60, len(test_df) - segment_size, segment_size):
    past_segment = test_df.iloc[start - 60:start]
    future_segment = test_df.iloc[start:start + segment_size]
    if len(future_segment) < segment_size:
        break

    pattern = extract_market_pattern(past_segment.drop(columns=['Date']))
    stored_patterns = np.array([m['pattern'] for m in model_store if m['pattern'].shape == pattern.shape])
    if len(stored_patterns) == 0:
        print("⚠️ Kein gespeichertes Muster verfügbar für den aktuellen Block.")
        continue

    similarities = cosine_similarity([pattern], stored_patterns)[0]
    best_index = np.argmax(similarities)
    best_model_info = model_store[best_index]
    best_model = best_model_info['model']

    window = WindowGenerator(20, 1, 1, features, [target_col], future_segment.drop(columns=['Date']))
    ds = window.make_dataset(future_segment.drop(columns=['Date']))

    y_pred = best_model.predict(ds)
    y_true = np.concatenate([y for x, y in ds], axis=0)
    if y_pred.ndim == 3:
        y_pred = y_pred[:, -1, :]
    if y_true.ndim == 3:
        y_true = y_true[:, -1, :]

    sharpe, strat_used = evaluate_strategy(y_true, y_pred)
    print(f"📈 Segment ab {future_segment['Date'].iloc[0].date()} ➜ Strategy: {strat_used} | Sharpe={sharpe:.3f} | Source Unit: {best_model_info['unit'] + 1}")

🧪 Test Phase: Matching Models to Segments
1/1 [==============================] - 0s 76ms/step
📈 Segment ab 2015-11-15 ➜ Strategy: sign | Sharpe=0.164 | Source Unit: 39
1/1 [==============================] - 0s 72ms/step
📈 Segment ab 2016-04-24 ➜ Strategy: adaptive | Sharpe=1.841 | Source Unit: 39
1/1 [==============================] - 0s 62ms/step
📈 Segment ab 2016-10-02 ➜ Strategy: adaptive | Sharpe=19.665 | Source Unit: 39
1/1 [==============================] - 0s 61ms/step
📈 Segment ab 2017-03-12 ➜ Strategy: adaptive | Sharpe=0.917 | Source Unit: 39
1/1 [==============================] - 0s 64ms/step
📈 Segment ab 2017-08-20 ➜ Strategy: sign | Sharpe=4.686 | Source Unit: 39
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2018-01-28 ➜ Strategy: adaptive | Sharpe=6.371 | Source Unit: 39
1/1 [==============================] - 0s 67ms/step
📈 Segment ab 2018-07-08 ➜ Strategy: adaptive | Sharpe=0.234 | Source Unit: 39
1/1 [==============================] - 0s 62ms/step
📈 Segment ab 2018-12-16 ➜ Strategy: sign | Sharpe=1.908 | Source Unit: 39
1/1 [==============================] - 0s 66ms/step
📈 Segment ab 2019-05-26 ➜ Strategy: adaptive | Sharpe=5.176 | Source Unit: 39
1/1 [==============================] - 0s 60ms/step
📈 Segment ab 2019-11-03 ➜ Strategy: adaptive | Sharpe=0.964 | Source Unit: 39
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2020-04-12 ➜ Strategy: sign | Sharpe=90.423 | Source Unit: 39
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2020-09-20 ➜ Strategy: adaptive | Sharpe=0.715 | Source Unit: 48
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2021-02-28 ➜ Strategy: sign | Sharpe=0.675 | Source Unit: 48
1/1 [==============================] - 0s 62ms/step
📈 Segment ab 2021-08-08 ➜ Strategy: sign | Sharpe=0.540 | Source Unit: 47
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2022-01-16 ➜ Strategy: sign | Sharpe=18.160 | Source Unit: 47
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2022-06-26 ➜ Strategy: sign | Sharpe=0.273 | Source Unit: 51
1/1 [==============================] - 0s 70ms/step
📈 Segment ab 2022-12-04 ➜ Strategy: sign | Sharpe=26.019 | Source Unit: 41
1/1 [==============================] - 0s 60ms/step
📈 Segment ab 2023-05-14 ➜ Strategy: sign | Sharpe=12.706 | Source Unit: 41
1/1 [==============================] - 0s 63ms/step
📈 Segment ab 2023-10-22 ➜ Strategy: adaptive | Sharpe=0.885 | Source Unit: 41
