In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('..')

from src.data.collector import DataCollector
from src.data.processor import DataProcessor


In [None]:
collector = DataCollector()
df = collector.fetch_historical_data('AAPL', interval='5m', period='30d', save=False)

In [None]:
processor = DataProcessor()
df_clean = processor.clean_data(df)
print(f"Cleaned data: {len(df_clean)} records")

In [None]:
df_indicators = processor.add_technical_indicators(df_clean)
print(f"Total columns: {len(df_indicators.columns)}")
print("New indicators:", [col for col in df_indicators.columns if col not in df.columns])

In [None]:
plt.figure(figsize=(14, 6))
plt.subplot(2, 1, 1)
plt.plot(df_indicators.index, df_indicators['Close'])
plt.title('Price')

plt.subplot(2, 1, 2)
plt.plot(df_indicators.index, df_indicators['RSI_14'])
plt.axhline(y=70, color='r', linestyle='--', label='Overbought')
plt.axhline(y=30, color='g', linestyle='--', label='Oversold')
plt.title('RSI Indicator')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(14, 8))
plt.subplot(2, 1, 1)
plt.plot(df_indicators.index, df_indicators['Close'])
plt.title('Price')

plt.subplot(2, 1, 2)
plt.plot(df_indicators.index, df_indicators['MACD'], label='MACD')
plt.plot(df_indicators.index, df_indicators['MACD_Signal'], label='Signal')
plt.bar(df_indicators.index, df_indicators['MACD_Hist'], alpha=0.3, label='Histogram')
plt.title('MACD Indicator')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df_labeled = processor.create_labels(df_indicators, lookahead=1, threshold=0.0)
print("Label distribution:")
print(df_labeled['Target'].value_counts(normalize=True))

In [None]:
features = [col for col in df_labeled.columns if col not in ['Target', 'Future_Close', 'Future_Return']]
correlations = df_labeled[features + ['Target']].corr()['Target'].sort_values(ascending=False)
print("Top 10 correlations with target:")
print(correlations.head(10))