In [None]:
# 09_backtesting_senator.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import os

os.makedirs("images", exist_ok=True)

# Load test set (unseen data)
df = pd.read_parquet(
    r"C:\Users\User\PycharmProjects\Project-Trading\data\test_set_senator.parquet"
)

print(f"Test samples: {len(df)}")

# Load trained model
model = joblib.load(
    r"C:\Users\User\PycharmProjects\Project-Trading\models\senator_randomforest.pkl"
)

In [None]:
# 2. Features & Prediction

features = [
    'signed_amount', 'mean_amount', 'relative_trade_size',
    'tx_hour', 'tx_weekday',
    'price_before', 'vol_before', 'vwap_before', 'trade_count_before',
    'ema_5', 'ema_15', 'ema_30',
    'price_slope_15'
]

# Keep only existing features
features = [f for f in features if f in df.columns]

X = df[features]

# Model predicts absolute price movement (30min)
df['predicted_abs_move'] = np.abs(model.predict(X))

# Realized absolute movement
df['realized_abs_move'] = np.abs(df['price_change_pct'])

In [None]:

#3. Baseline definieren (Feedback vom Prof!)
#Baseline = durchschnittlicher 30min Return

baseline_return = df['realized_abs_move'].mean()
print(f"Baseline abs return (30min): {baseline_return:.4f}%")

#. Trading-Regel definieren
#Trade only when model expects strong reaction

# Threshold: top 20% predicted movements
threshold = df['predicted_abs_move'].quantile(0.80)

df['trade'] = df['predicted_abs_move'] > threshold

print(f"Trades taken: {df['trade'].sum()} ({df['trade'].mean()*100:.1f}%)")

In [None]:
#5. Strategie-Performance berechnen

# Strategy performance (only when trade=True)
strategy_returns = df.loc[df['trade'], 'realized_abs_move']

# Baseline performance (always average)
baseline_returns = pd.Series(
    baseline_return,
    index=strategy_returns.index
)

print("\n=== BACKTEST RESULTS ===")
print(f"Strategy mean abs return: {strategy_returns.mean():.4f}%")
print(f"Baseline mean abs return: {baseline_return:.4f}%")
print(f"Improvement: {strategy_returns.mean() - baseline_return:.4f}%")

if strategy_returns.mean() > baseline_return:
    print("✅ Strategy OUTPERFORMS baseline")
else:
    print("⚠️ Strategy does NOT outperform baseline")

In [None]:

#6. Performance pro Aktie (SEHR WICHTIG)

print("\n=== PERFORMANCE PER STOCK ===")

for ticker, g in df[df['trade']].groupby('Ticker'):
    mean_ret = g['realized_abs_move'].mean()
    improvement = mean_ret - baseline_return

    print(f"{ticker}: mean={mean_ret:.4f}% | Δ={improvement:.4f}%")

#Referenz: "schauen ob manche Marken größere Auswirkungen durch Nachrichten haben“

In [None]:

#7. Plot 1: Equity Curve (abs return)

df_sorted = df[df['trade']].sort_values('TimeOfFiled')
df_sorted['cum_return'] = df_sorted['realized_abs_move'].cumsum()

plt.figure(figsize=(10, 5))
plt.plot(df_sorted['TimeOfFiled'], df_sorted['cum_return'], label='Strategy')
plt.axhline(
    baseline_return * len(df_sorted),
    linestyle='--',
    color='red',
    label='Baseline'
)

plt.title("Cumulative Absolute Return (Strategy vs Baseline)")
plt.xlabel("Time")
plt.ylabel("Cumulative abs return (%)")
plt.legend()
plt.tight_layout()
plt.savefig("images/09_backtest_equity_curve.png", dpi=150)
plt.show()

In [None]:

#8. Plot 2: Distribution of Trades over Time

plt.figure(figsize=(10, 4))
df[df['trade']]['TimeOfFiled'].dt.date.value_counts().sort_index().plot()
plt.title("Distribution of Trades over Time")
plt.xlabel("Date")
plt.ylabel("Number of Trades")
plt.tight_layout()
plt.savefig("images/09_trade_distribution.png", dpi=150)
plt.show()