In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

# Set style for plots
sns.set(style="darkgrid")

# Define stock symbols
stocks = ["TSLA", "BND", "SPY"]

# Fetch historical data (last 5 years)
data = {ticker: yf.download(ticker, start="2019-01-01", end="2024-01-01") for ticker in stocks}

# Convert to DataFrame and keep only the 'Close' price
df = pd.DataFrame({ticker: data[ticker]["Close"] for ticker in stocks})

# Display the first few rows
df.head()


In [None]:
# Check for missing values
print(df.isnull().sum())

# Fill missing values using forward fill
df.fillna(method='ffill', inplace=True)

# Summary statistics
print(df.describe())

# Check data types
print(df.dtypes)


In [None]:
plt.figure(figsize=(12, 6))
for ticker in stocks:
    plt.plot(df.index, df[ticker], label=ticker)

plt.title("Closing Prices Over Time")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.show()


In [None]:
# Calculate daily percentage change
returns = df.pct_change().dropna()

# Plot daily returns
plt.figure(figsize=(12, 6))
for ticker in stocks:
    plt.plot(returns.index, returns[ticker], label=f"{ticker} Returns")

plt.title("Daily Percentage Returns")
plt.xlabel("Date")
plt.ylabel("Return (%)")
plt.legend()
plt.show()


In [None]:
# Calculate rolling mean and standard deviation (volatility)
rolling_mean = df.rolling(window=30).mean()
rolling_std = df.rolling(window=30).std()

# Plot rolling statistics
plt.figure(figsize=(12, 6))
for ticker in stocks:
    plt.plot(rolling_std.index, rolling_std[ticker], label=f"{ticker} Volatility")

plt.title("Rolling 30-Day Standard Deviation (Volatility)")
plt.xlabel("Date")
plt.ylabel("Standard Deviation")
plt.legend()
plt.show()


In [None]:
# Detect extreme returns
threshold = 0.05  # 5% daily change
outliers = returns[(returns > threshold) | (returns < -threshold)].dropna(how='all')

# Plot outliers
plt.figure(figsize=(12, 6))
for ticker in stocks:
    plt.scatter(outliers.index, outliers[ticker], label=f"{ticker} Outliers")

plt.title("Extreme Daily Returns (Outliers)")
plt.xlabel("Date")
plt.ylabel("Return (%)")
plt.legend()
plt.show()


In [None]:
# Decompose TSLA stock price into trend, seasonal, and residual components
decomposition = seasonal_decompose(df["TSLA"].dropna(), period=365, model="multiplicative")

plt.figure(figsize=(10, 8))

plt.subplot(411)
plt.plot(decomposition.observed, label="Observed")
plt.legend()

plt.subplot(412)
plt.plot(decomposition.trend, label="Trend")
plt.legend()

plt.subplot(413)
plt.plot(decomposition.seasonal, label="Seasonality")
plt.legend()

plt.subplot(414)
plt.plot(decomposition.resid, label="Residuals")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Compute Value at Risk (VaR) at 95% confidence interval
VaR_95 = returns.quantile(0.05)
print("Value at Risk (95% Confidence Interval):\n", VaR_95)

# Compute Sharpe Ratio (Risk-Adjusted Returns)
risk_free_rate = 0.02  # Assume 2% annual risk-free return
sharpe_ratio = (returns.mean() - risk_free_rate / 252) / returns.std()
print("Sharpe Ratio:\n", sharpe_ratio)


In [None]:
# Save cleaned data to CSV inside the 'data/' folder
df.to_csv("../data/cleaned_stock_data.csv")
returns.to_csv("../data/daily_returns.csv")
