# Stock Market Trend Analysis – Phase 1
## Notebook 3: Exploratory Data Analysis

**Purpose:** Professional exploratory analysis with clean, efficient code.

In [None]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams.update({'figure.figsize': (14,7), 'font.size': 10})

DATA_DIR = '../data/raw'
EDA_DIR = '../visuals/eda'
os.makedirs(EDA_DIR, exist_ok=True)

print('Environment ready')

In [None]:
company_info = pd.read_csv(f'{DATA_DIR}/company_info.csv')
stock_prices = pd.read_csv(f'{DATA_DIR}/stock_prices.csv', parse_dates=['date'])
stock_indicators = pd.read_csv(f'{DATA_DIR}/stock_prices_with_indicators.csv', parse_dates=['date'])
market_indices = pd.read_csv(f'{DATA_DIR}/market_indices.csv', parse_dates=['date'])

print(f'Dataset loaded: {stock_indicators.shape[0]:,} rows × {stock_indicators.shape[1]} columns')

In [None]:
tickers = stock_indicators['ticker'].unique()[:5]
fig, axes = plt.subplots(2, 3, figsize=(18,10))
axes = axes.flatten()

for ax, t in zip(axes, tickers):
    d = stock_indicators.query('ticker == @t').sort_values('date')
    ax.plot(d.date, d.close, lw=2)
    ax.fill_between(d.date, d.close, alpha=0.3)
    ax.set_title(f'{t} Price Trend')
    ax.tick_params(axis='x', rotation=45)

axes[-1].axis('off')
plt.tight_layout()
plt.savefig(f'{EDA_DIR}/price_trends.png', dpi=300)

In [None]:
df = stock_indicators.merge(company_info[['ticker','sector']], on='ticker', how='left')
df['daily_return'] = df.groupby('ticker')['close'].pct_change()

plt.figure()
sns.boxplot(data=df, x='sector', y='daily_return')
plt.axhline(0, ls='--', c='red', alpha=0.4)
plt.xticks(rotation=45)
plt.title('Daily Returns by Sector')
plt.tight_layout()
plt.savefig(f'{EDA_DIR}/sector_returns.png', dpi=300)

In [None]:
ticker = stock_indicators['ticker'].iloc[0]
d = stock_indicators.query('ticker == @ticker').sort_values('date')

fig, (ax1, ax2) = plt.subplots(2,1,sharex=True)
ax1.plot(d.date, d.close, lw=2)
ax1.set_title(f'{ticker} Price & Volume')

colors = np.where(d.close.diff() > 0, 'green', 'red')
ax2.bar(d.date, d.volume, color=colors, alpha=0.6)

plt.tight_layout()
plt.savefig(f'{EDA_DIR}/volume_analysis.png', dpi=300)

In [None]:
d = d.tail(250)

fig, (ax1, ax2) = plt.subplots(2,1,sharex=True)
ax1.plot(d.date, d.close, lw=2)
ax1.set_title(f'{ticker} RSI Analysis')

ax2.plot(d.date, d.rsi_14, color='purple', lw=2)
ax2.axhline(70, ls='--', c='red')
ax2.axhline(30, ls='--', c='green')
ax2.set_ylim(0,100)

plt.tight_layout()
plt.savefig(f'{EDA_DIR}/rsi_plot.png', dpi=300)

In [None]:
features = ['close','volume','rsi_14','macd','macd_signal','sma_20','sma_50','atr_14','bb_width']
features = [f for f in features if f in stock_indicators.columns]

corr = stock_indicators[features].corr()

plt.figure(figsize=(12,10))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0)
plt.title('Indicator Correlation Heatmap')
plt.tight_layout()
plt.savefig(f'{EDA_DIR}/correlation_heatmap.png', dpi=300)

In [None]:
plt.figure()
plt.plot(d.date, d.close, lw=2, label='Price')
plt.plot(d.date, d.sma_20, label='SMA 20')
plt.plot(d.date, d.sma_50, label='SMA 50')

signal = (d.sma_20 > d.sma_50).diff()
plt.scatter(d.date[signal == 1], d.close[signal == 1], marker='^', color='green', s=100)
plt.scatter(d.date[signal == -1], d.close[signal == -1], marker='v', color='red', s=100)

plt.legend()
plt.title('Moving Average Crossover')
plt.tight_layout()
plt.savefig(f'{EDA_DIR}/ma_crossover.png', dpi=300)

In [None]:
if 'trend_label' in stock_indicators.columns:
    counts = stock_indicators['trend_label'].value_counts()
    counts.plot(kind='bar', title='Target Distribution')
    plt.tight_layout()
    plt.savefig(f'{EDA_DIR}/target_distribution.png', dpi=300)

In [None]:
print('EDA Complete')
print(f'Visualizations saved to {EDA_DIR}')