In [2]:
# Import Libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from src.data_loader import DataLoader
from src.eda import EDA


In [3]:

# Load Data
print("Loading data...")
loader = DataLoader()
raw_data = loader.download_data()
cleaned_data = loader.clean_data(raw_data)

print(f"Data loaded for {len(cleaned_data)} assets")
for ticker, df in cleaned_data.items():
    print(f"{ticker}: {len(df)} records from {df.index.min().date()} to {df.index.max().date()}")


Loading data...
Downloading data from Yahoo Finance...


[*********************100%***********************]  1 of 1 completed


Downloaded TSLA: 2775 records


[*********************100%***********************]  1 of 1 completed


Downloaded BND: 2775 records


[*********************100%***********************]  1 of 1 completed

Downloaded SPY: 2775 records
Cleaned TSLA: Removed 0 missing values, 2756 rows remaining
Cleaned BND: Removed 0 missing values, 2756 rows remaining
Cleaned SPY: Removed 0 missing values, 2756 rows remaining
Data loaded for 3 assets
TSLA: 2756 records from 2015-01-30 to 2026-01-14
BND: 2756 records from 2015-01-30 to 2026-01-14
SPY: 2756 records from 2015-01-30 to 2026-01-14





In [4]:

# Summary Statistics
eda = EDA(cleaned_data)
summary_stats = eda.generate_summary_statistics()
print("Summary Statistics:")
display(summary_stats)

# Risk Metrics
print("\nRisk Metrics:")
risk_metrics = {}
for ticker, df in cleaned_data.items():
    metrics = loader.calculate_risk_metrics(df)
    risk_metrics[ticker] = metrics

risk_df = pd.DataFrame(risk_metrics).T
display(risk_df)

# Visualizations
# 1. Price Series
eda.plot_price_series()

# 2. Returns Distribution
eda.plot_returns_distribution()

# 3. Volatility Analysis
eda.plot_volatility()

# 4. Correlation Matrix
correlation_matrix = eda.calculate_correlation_matrix()

# Stationarity Tests
print("Stationarity Test Results:")
stationarity_results = eda.perform_stationarity_test()
display(stationarity_results)

# Outlier Detection
print("Outlier Detection:")
outliers = eda.detect_outliers(threshold=3)
for ticker, outlier_info in outliers.items():
    print(f"{ticker}: {outlier_info['outlier_count']} outliers "
          f"({outlier_info['outlier_percentage']:.2f}% of returns)")

# Key Insights
print("\nKey Insights:")
print("1. TSLA shows highest volatility and returns among the three assets")
print("2. BND has the lowest volatility, suitable for risk-averse investors")
print("3. SPY provides balanced risk-return profile")
print("4. Daily returns are stationary (p < 0.05) for all assets")
print("5. TSLA and SPY show moderate positive correlation")


Summary Statistics:


Unnamed: 0,Start_Date,End_Date,Total_Days,Initial_Price,Final_Price,Total_Return,Annualized_Return,Mean_Daily_Return,Std_Daily_Return,Skewness,Kurtosis,Min_Daily_Return,Max_Daily_Return
TSLA,2015-01-30 00:00:00,2026-01-14 00:00:00,2756,13.573333,439.200012,3135.756605,37.425542,0.19192,3.641076,0.289803,4.444204,-21.062824,22.68999
BND,2015-01-30 00:00:00,2026-01-14 00:00:00,2756,61.628029,74.43,20.772969,1.740778,0.00762,0.338074,-0.925663,36.709835,-5.438547,4.220132
SPY,2015-01-30 00:00:00,2026-01-14 00:00:00,2756,165.623795,690.359985,316.824157,13.942711,0.057633,1.119594,-0.316437,14.288331,-10.942358,10.501934



Risk Metrics:


Unnamed: 0,Mean_Return,Volatility,VaR_Historical,VaR_Parametric,Sharpe_Ratio,Max_Drawdown
TSLA,0.48364,0.578003,-0.052526,-0.057881,0.80214,-0.736322
BND,0.019201,0.053668,-0.004798,-0.005502,-0.01488,-0.185821
SPY,0.145235,0.17773,-0.016713,-0.017752,0.704635,-0.337173


Stationarity Test Results:


Unnamed: 0,Price_ADF_Statistic,Price_p_value,Price_Stationary,Return_ADF_Statistic,Return_p_value,Return_Stationary
TSLA,-0.789094,0.822305,False,-36.248463,0.0,True
BND,-1.124118,0.705359,False,-10.382106,0.0,True
SPY,1.20102,0.995993,False,-17.147193,0.0,True


Outlier Detection:
TSLA: 44 outliers (1.60% of returns)
BND: 32 outliers (1.16% of returns)
SPY: 39 outliers (1.42% of returns)

Key Insights:
1. TSLA shows highest volatility and returns among the three assets
2. BND has the lowest volatility, suitable for risk-averse investors
3. SPY provides balanced risk-return profile
4. Daily returns are stationary (p < 0.05) for all assets
5. TSLA and SPY show moderate positive correlation
