In [1]:
# quick_eda.py
# Minimal EDA for AAPL, MSFT, NVDA: prices, returns, correlations, Sharpe, and plots.

import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

In [2]:
# ----------------------------
# Parameters
# ----------------------------
TICKERS = ["AAPL", "MSFT", "NVDA"]
START = "2019-01-01"
END = "2024-12-31"
ANNUAL_RISK_FREE = 0.02     # 2% as a simple placeholder; set to 0 if you prefer
TRADING_DAYS = 252

# ----------------------------
# Setup
# ----------------------------
out_dir = Path("week4_outputs")
out_dir.mkdir(exist_ok=True, parents=True)

In [3]:
# ----------------------------
# Download data
# ----------------------------
print(f"Downloading adjusted close prices for {TICKERS}...")
px = yf.download(TICKERS, start=START, end=END, auto_adjust=True)["Close"]
px = px.dropna(how="all")
print(f"Got {px.shape[0]} rows.")

# ----------------------------
# Daily returns
# ----------------------------
ret = px.pct_change().dropna()
# Basic sanity
assert ret.notna().all().all(), "Returns contain NaNs after dropna."

# ----------------------------
# Summary metrics per ticker
# ----------------------------
# Annualized mean return and volatility (simple)
ann_ret = (1 + ret.mean())**TRADING_DAYS - 1
ann_vol = ret.std() * np.sqrt(TRADING_DAYS)
excess_ret = ann_ret - ANNUAL_RISK_FREE
sharpe = excess_ret / ann_vol

metrics = pd.DataFrame({
    "AnnReturn": ann_ret,
    "AnnVol": ann_vol,
    "AnnExcessReturn": excess_ret,
    "Sharpe": sharpe
}).sort_values("Sharpe", ascending=False)

metrics.to_csv(out_dir / "metrics.csv")
print("\n=== Annualized Metrics ===")
print(metrics.round(4))

# ----------------------------
# Correlation of daily returns
# ----------------------------
corr = ret.corr()
corr.to_csv(out_dir / "correlations.csv")
print("\n=== Return Correlations ===")
print(corr.round(3))


Downloading adjusted close prices for ['AAPL', 'MSFT', 'NVDA']...


[*********************100%***********************]  3 of 3 completed

Got 1509 rows.

=== Annualized Metrics ===
        AnnReturn  AnnVol  AnnExcessReturn  Sharpe
Ticker                                            
NVDA       1.1224  0.5189           1.1024  2.1244
AAPL       0.4406  0.3085           0.4206  1.3634
MSFT       0.3388  0.2900           0.3188  1.0992

=== Return Correlations ===
Ticker   AAPL   MSFT   NVDA
Ticker                     
AAPL    1.000  0.735  0.603
MSFT    0.735  1.000  0.671
NVDA    0.603  0.671  1.000





In [4]:
# ----------------------------
# Plots (matplotlib; no special styles)
# ----------------------------

# 1) Price levels
plt.figure(figsize=(10, 5))
for col in px.columns:
    plt.plot(px.index, px[col], label=col)
plt.title("Adjusted Close Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.tight_layout()
plt.savefig(out_dir / "prices_levels.png", dpi=150)
plt.close()

# 2) Normalized prices (index to 100)
norm = px / px.iloc[0] * 100.0
plt.figure(figsize=(10, 5))
for col in norm.columns:
    plt.plot(norm.index, norm[col], label=col)
plt.title("Normalized Prices (Index = 100 at Start)")
plt.xlabel("Date")
plt.ylabel("Index (100 = start)")
plt.legend()
plt.tight_layout()
plt.savefig(out_dir / "prices_normalized.png", dpi=150)
plt.close()

# 3) Daily returns (line)
plt.figure(figsize=(10, 5))
for col in ret.columns:
    plt.plot(ret.index, ret[col], label=col, alpha=0.7)
plt.title("Daily Returns")
plt.xlabel("Date")
plt.ylabel("Return")
plt.legend()
plt.tight_layout()
plt.savefig(out_dir / "daily_returns.png", dpi=150)
plt.close()

# 4) 21-day rolling volatility
rolling_vol = ret.rolling(21).std() * np.sqrt(TRADING_DAYS)
plt.figure(figsize=(10, 5))
for col in rolling_vol.columns:
    plt.plot(rolling_vol.index, rolling_vol[col], label=col)
plt.title("21-Day Rolling Volatility (Annualized)")
plt.xlabel("Date")
plt.ylabel("Volatility")
plt.legend()
plt.tight_layout()
plt.savefig(out_dir / "rolling_vol_21d.png", dpi=150)
plt.close()

# 5) Correlation matrix (imshow)
plt.figure(figsize=(5, 4))
im = plt.imshow(corr, interpolation="nearest")
plt.title("Correlation Matrix (Daily Returns)")
plt.xticks(range(len(corr.columns)), corr.columns, rotation=45, ha="right")
plt.yticks(range(len(corr.index)), corr.index)
plt.colorbar(im, fraction=0.046, pad=0.04)
plt.tight_layout()
plt.savefig(out_dir / "correlation_matrix.png", dpi=150)
plt.close()

print(f"\nSaved outputs to: {out_dir.resolve()}")
print("Files:")
for f in sorted(os.listdir(out_dir)):
    print(" -", f)



Saved outputs to: /Users/shruti.kalaskar/Documents/Northwestern/Fall 2025/Financial Engineering/week4_outputs
Files:
 - correlation_matrix.png
 - correlations.csv
 - daily_returns.png
 - metrics.csv
 - prices_levels.png
 - prices_normalized.png
 - rolling_vol_21d.png
