
## Step 3: Exploratory Data Analysis (EDA)
Analyze trends, volatility, outliers, seasonality, and compute key metrics.

In [1]:
import sys, os
import pandas as pd
sys.path.append(os.path.abspath('..'))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from src.eda import EDA

In [5]:
cleaned_data = pd.read_csv('../data/cleaned_data.csv')

In [6]:
# Initialize EDA
eda = EDA(cleaned_data)

In [7]:
# Calculate daily returns
eda.calculate_daily_returns()

Unnamed: 0,Date,Ticker,Close,High,Low,Open,Volume,Daily_Return
0,2015-01-02,BND,62.573112,62.603397,62.398980,62.406553,2218800,
1,2015-01-02,SPY,172.592880,173.811114,171.542687,173.391037,121465900,
2,2015-01-02,TSLA,14.620667,14.883333,14.217333,14.858000,71466000,
3,2015-01-05,BND,62.754848,62.777560,62.611000,62.641284,5820100,0.002904
4,2015-01-05,SPY,169.475906,171.702310,169.165053,171.534282,169632600,-0.018060
...,...,...,...,...,...,...,...,...
7600,2025-01-29,SPY,601.809998,604.130005,599.219971,603.719971,37177400,-0.004483
7601,2025-01-29,TSLA,389.100006,398.589996,384.480011,395.209991,68033600,-0.022583
7602,2025-01-30,BND,72.204674,72.294378,72.144864,72.224605,5622300,0.001382
7603,2025-01-30,SPY,605.039978,606.599976,600.719971,603.960022,39281300,0.005367


In [8]:
tickers = ['TSLA', 'BND', 'SPY']


### Visualize Closing Prices
Plot closing prices for each ticker to identify trends.

In [9]:
# Plot closing prices
for ticker in tickers:
    eda.plot_closing_price(ticker)

### Analyze Volatility
Calculate rolling means and standard deviations (20-day window).

In [10]:
eda.analyze_volatility(window=20)

Unnamed: 0,Date,Ticker,Close,High,Low,Open,Volume,Daily_Return,Rolling_Mean,Rolling_Std
0,2015-01-02,BND,62.573112,62.603397,62.398980,62.406553,2218800,,,
1,2015-01-02,SPY,172.592880,173.811114,171.542687,173.391037,121465900,,,
2,2015-01-02,TSLA,14.620667,14.883333,14.217333,14.858000,71466000,,,
3,2015-01-05,BND,62.754848,62.777560,62.611000,62.641284,5820100,0.002904,,
4,2015-01-05,SPY,169.475906,171.702310,169.165053,171.534282,169632600,-0.018060,,
...,...,...,...,...,...,...,...,...,...,...
7600,2025-01-29,SPY,601.809998,604.130005,599.219971,603.719971,37177400,-0.004483,0.000608,0.009125
7601,2025-01-29,TSLA,389.100006,398.589996,384.480011,395.209991,68033600,-0.022583,-0.004567,0.036204
7602,2025-01-30,BND,72.204674,72.294378,72.144864,72.224605,5622300,0.001382,0.000288,0.002925
7603,2025-01-30,SPY,605.039978,606.599976,600.719971,603.960022,39281300,0.005367,0.001447,0.008724


In [11]:
# Display updated data with volatility metrics
eda.data.head()

Unnamed: 0,Date,Ticker,Close,High,Low,Open,Volume,Daily_Return,Rolling_Mean,Rolling_Std
0,2015-01-02,BND,62.573112,62.603397,62.39898,62.406553,2218800,,,
1,2015-01-02,SPY,172.59288,173.811114,171.542687,173.391037,121465900,,,
2,2015-01-02,TSLA,14.620667,14.883333,14.217333,14.858,71466000,,,
3,2015-01-05,BND,62.754848,62.77756,62.611,62.641284,5820100,0.002904,,
4,2015-01-05,SPY,169.475906,171.70231,169.165053,171.534282,169632600,-0.01806,,


In [12]:
# Detect outliers
outliers = eda.detect_outliers(threshold=3)
outliers

Unnamed: 0,Date,Ticker,Close,High,Low,Open,Volume,Daily_Return,Rolling_Mean,Rolling_Std,Z_Score
638,2015-11-04,TSLA,15.442,15.516,15.013333,15.133333,190896000,0.111735,0.000521,0.03579,3.107441
696,2015-12-03,BND,62.492912,62.833602,62.469685,62.833602,2510600,-0.007745,-0.000302,0.002323,3.204572
1112,2016-06-22,TSLA,13.110667,13.73,13.05,13.298,356136000,-0.104503,-0.004643,0.031008,3.220503
1117,2016-06-24,SPY,176.086334,182.679609,175.635805,176.424228,333444400,-0.035909,-0.001174,0.009589,3.622553
1262,2016-09-01,TSLA,13.384667,14.073333,13.366667,13.934,119146500,-0.053016,-0.006806,0.014229,3.247559
1276,2016-09-09,SPY,184.784958,188.033943,184.758967,187.981962,221589100,-0.023935,-0.001225,0.00604,3.75971
1399,2016-11-07,SPY,185.605194,185.640032,181.599648,181.599648,109794900,0.022057,-0.000679,0.006773,3.35673
1404,2016-11-09,BND,65.038177,65.442192,64.974801,65.355052,3448800,-0.009888,-0.000648,0.002585,3.575219
1792,2017-05-17,SPY,207.454514,211.202097,207.392927,211.202097,172174100,-0.017744,0.000523,0.005683,3.214151
1969,2017-08-10,SPY,215.485199,217.854341,215.432161,217.721732,120479500,-0.014115,-0.000128,0.00385,3.633223


### Decompose Time Series
Decompose each ticker's closing price into trend, seasonal, and residual components (annual period = 252 trading days).

In [13]:
for ticker in tickers:
    eda.decompose_series(ticker, period=252)

### Calculate Key Metrics
Compute VaR (95%) and Sharpe Ratio for each ticker (assume 2% risk-free rate).

In [14]:
metrics = eda.calculate_metrics(risk_free_rate=0.02)
metrics

{'BND': {'VaR (95%)': np.float64(-0.004801219479811536),
  'Sharpe Ratio': np.float64(-0.07866243387466695)},
 'SPY': {'VaR (95%)': np.float64(-0.01671924939688598),
  'Sharpe Ratio': np.float64(0.6838425788060678)},
 'TSLA': {'VaR (95%)': np.float64(-0.05138725064469325),
  'Sharpe Ratio': np.float64(0.825698921799305)}}

## Summary
Check the `logs/` directory for detailed logs and `notebooks/` for saved plots. Key insights:
- Trends: See closing price plots.
- Volatility: Rolling stats added to data.
- Outliers: Listed above.
- Seasonality: Decomposition plots saved.
- Metrics: VaR and Sharpe Ratio computed.