# StockSense - Feature Engineering
## Creating technical indicators for ML

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.data_loader import StockDataLoader
from src.features import FeatureEngineering
import config

%matplotlib inline

## 1. Load Data

In [None]:
ticker = 'AAPL'
loader = StockDataLoader(ticker)
df = loader.fetch_data()
print(f"Original data: {df.shape}")

## 2. Create Technical Indicators

In [None]:
fe = FeatureEngineering(df)
df_processed = fe.create_all_features()
print(f"Processed data: {df_processed.shape}")
print(f"\nFeatures created: {df_processed.columns.tolist()}")

## 3. Visualize Indicators

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(15, 12))

# Price with moving averages
axes[0].plot(df_processed.index, df_processed['Close'], label='Close')
axes[0].plot(df_processed.index, df_processed['SMA_20'], label='SMA 20')
axes[0].plot(df_processed.index, df_processed['EMA_12'], label='EMA 12')
axes[0].set_title('Price with Moving Averages')
axes[0].legend()
axes[0].grid(True)

# RSI
axes[1].plot(df_processed.index, df_processed['RSI'], label='RSI', color='purple')
axes[1].axhline(70, color='r', linestyle='--', alpha=0.5)
axes[1].axhline(30, color='g', linestyle='--', alpha=0.5)
axes[1].set_title('RSI (Relative Strength Index)')
axes[1].legend()
axes[1].grid(True)

# MACD
axes[2].plot(df_processed.index, df_processed['MACD'], label='MACD')
axes[2].plot(df_processed.index, df_processed['MACD_Signal'], label='Signal')
axes[2].bar(df_processed.index, df_processed['MACD_Diff'], label='Histogram', alpha=0.3)
axes[2].set_title('MACD')
axes[2].legend()
axes[2].grid(True)

# Bollinger Bands
axes[3].plot(df_processed.index, df_processed['Close'], label='Close')
axes[3].plot(df_processed.index, df_processed['BB_High'], label='BB High', alpha=0.5)
axes[3].plot(df_processed.index, df_processed['BB_Low'], label='BB Low', alpha=0.5)
axes[3].fill_between(df_processed.index, df_processed['BB_Low'], 
                      df_processed['BB_High'], alpha=0.1)
axes[3].set_title('Bollinger Bands')
axes[3].legend()
axes[3].grid(True)

plt.tight_layout()
plt.show()

## 4. Target Distribution

In [None]:
target_counts = df_processed['Target'].value_counts().sort_index()
target_labels = ['Down', 'Sideways', 'Up']

plt.figure(figsize=(10, 6))
plt.bar(target_labels, target_counts.values, color=['red', 'gray', 'green'])
plt.title('Target Class Distribution')
plt.xlabel('Movement')
plt.ylabel('Count')
for i, v in enumerate(target_counts.values):
    plt.text(i, v, str(v), ha='center', va='bottom')
plt.show()

print("Target distribution:")
print(target_counts)
print(f"\nPercentages:")
print(target_counts / len(df_processed) * 100)

## 5. Feature Correlation

In [None]:
feature_cols = config.FEATURE_COLUMNS + ['Target']
corr_matrix = df_processed[feature_cols].corr()

plt.figure(figsize=(14, 12))
sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

# Most correlated with target
print("\nFeatures most correlated with Target:")
target_corr = corr_matrix['Target'].abs().sort_values(ascending=False)
print(target_corr[1:11])

## 6. Save Processed Data

In [None]:
fe.save_processed_data(ticker)
print("âœ… Processed data saved!")