# Market-Neutral News Sentiment Strategy - Training & Backtesting

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from model import SentimentModel
from portfolio import PortfolioConstructor
from backtester import Backtester

In [None]:
# Load processed data
df = pd.read_csv('../data/processed_data.csv')
df['date'] = pd.to_datetime(df['date'])
print(f"Loaded {len(df)} records from {df['year'].min()} to {df['year'].max()}")

## Walk-Forward Model Training

In [None]:
model = SentimentModel(alpha=1.0, training_years=10)
predictions_df = model.walk_forward_validation(df, start_year=2013, end_year=2023)
predictions_df.head()

## Portfolio Construction & Backtesting

In [None]:
# Market-neutral (0% net exposure)
portfolio_constructor = PortfolioConstructor(long_pct=0.2, short_pct=0.2, net_exposure=0.0)
backtester = Backtester(portfolio_constructor)

# Mock S&P 1500 constituents
sp1500_constituents = {date: ['AAPL', 'MSFT', 'GOOGL'] for date in predictions_df['date'].unique()}

results = backtester.run(predictions_df, df, sp1500_constituents)
metrics = backtester.calculate_metrics()

print("\nPerformance Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

In [None]:
# Plot cumulative returns
cumulative = (1 + results['portfolio_return']).cumprod()
plt.figure(figsize=(12, 6))
plt.plot(results['date'], cumulative)
plt.title('Market-Neutral News Sentiment Strategy - Cumulative Returns')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.grid(True)
plt.show()

## Fama-French 3-Factor Analysis

In [None]:
import statsmodels.api as sm

# Load Fama-French factors (download from Kenneth French's data library)
# ff_factors = pd.read_csv('ff_factors.csv')
# merged = results.merge(ff_factors, on='date')
# X = sm.add_constant(merged[['Mkt-RF', 'SMB', 'HML']])
# y = merged['portfolio_return'] - merged['RF']
# model = sm.OLS(y, X).fit()
# print(model.summary())
# print(f"\nAnnualized Alpha: {model.params['const'] * 252:.4f}")
# print(f"Alpha t-stat: {model.tvalues['const']:.4f}")