In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('..')

from src.data.collector import DataCollector
from src.data.processor import DataProcessor
from src.models.baseline import BaselineModelTrainer
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
collector = DataCollector()
df = collector.fetch_historical_data('AAPL', interval='5m', period='60d', save=False)

processor = DataProcessor()
df_processed, features = processor.process_pipeline(df, save=False)

print(f"Data shape: {df_processed.shape}")
print(f"Features: {len(features)}")

In [None]:
trainer = BaselineModelTrainer()
trainer.prepare_data(df_processed, features, test_size=0.2, use_time_split=True)
trainer.train_all_models()

In [None]:
trainer.print_comparison()

In [None]:
feature_imp = trainer.get_feature_importance('Random Forest', top_n=20)

plt.figure(figsize=(10, 8))
plt.barh(range(20), feature_imp.head(20)['importance'])
plt.yticks(range(20), feature_imp.head(20)['feature'])
plt.xlabel('Importance')
plt.title('Top 20 Feature Importance')
plt.tight_layout()
plt.show()

In [None]:
best_name, best_model = trainer.get_best_model()
predictions = trainer.results[best_name]['predictions']

cm = confusion_matrix(trainer.y_test, predictions)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Down', 'Up'])
disp.plot(cmap='Blues')
plt.title(f'Confusion Matrix - {best_name}')
plt.show()

In [None]:
probabilities = trainer.results[best_name]['probabilities']

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(probabilities, bins=50)
plt.title('Prediction Probability Distribution')
plt.xlabel('Probability')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
plt.scatter(range(len(probabilities)), probabilities, alpha=0.5)
plt.axhline(y=0.5, color='r', linestyle='--', label='Decision Boundary')
plt.axhline(y=0.6, color='g', linestyle='--', label='Conservative Threshold')
plt.title('Prediction Probabilities Over Time')
plt.xlabel('Sample')
plt.ylabel('Probability')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
trainer.save_models()
print("Models saved successfully!")