In [1]:
import sys
import os

# Get the absolute path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_path = os.path.join(project_root, 'src')

# Add to Python path
sys.path.insert(0, project_root)
sys.path.insert(0, src_path)

print(f"Project root: {project_root}")
print(f"Src path: {src_path}")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import classes
from src.data.preprocess import DataPreprocessor
from src.utils.visualization import FinancialVisualizer

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

print("All imports successful!")

Project root: c:\Users\aweso\portfolio-forecast-optimizer
Src path: c:\Users\aweso\portfolio-forecast-optimizer\src
All imports successful!


In [None]:
# Test the complete modular workflow
from src.data.collect_data import main as collect_data_main
from src.data.preprocess import DataPreprocessor

# Step 1: Collect data 
print("Step 1: Data Collection")
collect_data_main()

# Step 2: Process data
print("\nStep 2: Data Preprocessing")
preprocessor = DataPreprocessor()
summary = preprocessor.process_all_data()

# Extract processed data
data_dict = summary['processed_data']
stationarity_results = summary['stationarity_results']
outlier_results = summary['outlier_results']

print(f"\nLoaded data for {len(data_dict)} assets")
for ticker, data in data_dict.items():
    print(f"{ticker}: {data.shape[0]} data points, {data.shape[1]} features")

Step 1: Data Collection
PORTFOLIO FORECAST OPTIMIZER - DATA COLLECTION
Output directory: c:\Users\aweso\portfolio-forecast-optimizer\data\raw
Fetching data from 2015-07-01 to 2025-07-31
Assets: TSLA, BND, SPY

Fetching data for TSLA...
Successfully fetched data for TSLA
Data shape: (2535, 7)
Date range: 2015-07-01 to 2025-07-30
Data saved to c:\Users\aweso\portfolio-forecast-optimizer\data\raw\TSLA_data.csv

Basic statistics for TSLA:
  - Total trading days: 2535
  - Price range: $9.58 - $479.86
  - Average volume: 114,178,885
----------------------------------------

Fetching data for BND...
Successfully fetched data for BND
Data shape: (2535, 8)
Date range: 2015-07-01 to 2025-07-30
Data saved to c:\Users\aweso\portfolio-forecast-optimizer\data\raw\BND_data.csv

Basic statistics for BND:
  - Total trading days: 2535
  - Price range: $60.78 - $77.32
  - Average volume: 4,434,376
----------------------------------------

Fetching data for SPY...
Successfully fetched data for SPY
Data sh

In [5]:
# Create interactive price comparison
fig = go.Figure()

colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
for i, (ticker, data) in enumerate(data_dict.items()):
    fig.add_trace(go.Scatter(
        x=data.index, 
        y=data['Close'],
        name=ticker,
        line=dict(color=colors[i])
    ))

fig.update_layout(
    title='Interactive Price Comparison',
    xaxis_title='Date',
    yaxis_title='Price ($)',
    hovermode='x unified'
)
fig.show()

In [16]:
# Daily percentage change plots
fig = make_subplots(rows=3, cols=1, subplot_titles=[f'{ticker} Daily Returns' for ticker in data_dict.keys()])

for i, (ticker, data) in enumerate(data_dict.items(), 1):
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        fig.add_trace(
            go.Scatter(x=data.index, y=data[return_col], name=f'{ticker} Daily Returns'),
            row=i, col=1
        )

fig.update_layout(height=900, title_text="Daily Percentage Change (Returns)")
fig.show()

In [6]:
# Calculate and display returns statistics
returns_stats = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        returns_stats[ticker] = {
            'Mean': returns.mean(),
            'Std': returns.std(),
            'Min': returns.min(),
            'Max': returns.max(),
            'Skewness': returns.skew(),
            'Kurtosis': returns.kurtosis()
        }

returns_df = pd.DataFrame(returns_stats).T
returns_df

Unnamed: 0,Mean,Std,Min,Max,Skewness,Kurtosis
TSLA,0.001974,0.037862,-0.210628,0.2269,0.301498,4.319976
BND,6.6e-05,0.003552,-0.054385,0.042201,-0.93443,35.362516
SPY,0.00061,0.011506,-0.109424,0.105019,-0.318753,14.924605


In [7]:
# Interactive volatility comparison
fig = make_subplots(rows=3, cols=1, subplot_titles=[f'{ticker} Volatility' for ticker in data_dict.keys()])

for i, (ticker, data) in enumerate(data_dict.items(), 1):
    vol_col = f'{ticker}_Volatility_20d'
    if vol_col in data.columns:
        fig.add_trace(
            go.Scatter(x=data.index, y=data[vol_col], name=f'{ticker} 20d Vol'),
            row=i, col=1
        )

fig.update_layout(height=900, title_text="Rolling Volatility Comparison")
fig.show()

In [8]:
# Create returns correlation matrix
returns_data = {}
for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns_data[ticker] = data[return_col]

returns_df = pd.DataFrame(returns_data).dropna()
corr_matrix = returns_df.corr()

# Interactive correlation heatmap
fig = px.imshow(
    corr_matrix,
    text_auto=True,
    aspect="auto",
    title="Correlation Matrix of Daily Returns"
)
fig.show()

In [9]:
# Calculate comprehensive risk metrics
risk_metrics = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        
        # Annualized metrics
        annual_return = returns.mean() * 252
        annual_vol = returns.std() * np.sqrt(252)
        sharpe_ratio = annual_return / annual_vol if annual_vol > 0 else 0
        
        # Drawdown
        cumulative_returns = (1 + returns).cumprod()
        running_max = cumulative_returns.expanding().max()
        drawdown = (cumulative_returns - running_max) / running_max
        max_drawdown = drawdown.min()
        
        # VaR and CVaR
        var_95 = returns.quantile(0.05)
        cvar_95 = returns[returns <= var_95].mean()
        
        risk_metrics[ticker] = {
            'Annual Return': annual_return,
            'Annual Volatility': annual_vol,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'VaR (95%)': var_95,
            'CVaR (95%)': cvar_95
        }

risk_df = pd.DataFrame(risk_metrics).T
risk_df

Unnamed: 0,Annual Return,Annual Volatility,Sharpe Ratio,Max Drawdown,VaR (95%),CVaR (95%)
TSLA,0.497527,0.601041,0.827776,-0.736322,-0.055459,-0.083169
BND,0.01652,0.056384,0.292982,-0.185821,-0.0051,-0.007943
SPY,0.153736,0.182646,0.841713,-0.337173,-0.017058,-0.028194


In [10]:
# Display stationarity test results
if stationarity_results:
    stationarity_df = pd.DataFrame(stationarity_results)
    stationarity_df[['ticker', 'adf_statistic', 'p_value', 'is_stationary']]

In [11]:
# Display outlier analysis results
if outlier_results:
    outlier_df = pd.DataFrame(outlier_results)
    outlier_df[['ticker', 'outlier_count', 'outlier_percentage', 'method']]

In [12]:
# Volume analysis
fig = make_subplots(rows=3, cols=1, subplot_titles=[f'{ticker} Volume' for ticker in data_dict.keys()])

for i, (ticker, data) in enumerate(data_dict.items(), 1):
    fig.add_trace(
        go.Scatter(x=data.index, y=data['Volume'], name=f'{ticker} Volume'),
        row=i, col=1
    )

fig.update_layout(height=900, title_text="Trading Volume Comparison")
fig.show()

In [13]:
# Cumulative returns comparison
fig = go.Figure()

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        cumulative_returns = (1 + data[return_col]).cumprod()
        fig.add_trace(go.Scatter(
            x=data.index,
            y=cumulative_returns,
            name=f'{ticker} Cumulative Returns',
            line=dict(width=2)
        ))

fig.update_layout(
    title='Cumulative Returns Comparison',
    xaxis_title='Date',
    yaxis_title='Cumulative Returns',
    hovermode='x unified'
)
fig.show()

In [14]:
# Risk-return scatter plot
risk_return_data = []

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        annual_return = returns.mean() * 252
        annual_vol = returns.std() * np.sqrt(252)
        risk_return_data.append([annual_vol, annual_return, ticker])

if risk_return_data:
    risk_return_df = pd.DataFrame(risk_return_data, columns=['Risk', 'Return', 'Ticker'])
    
    fig = px.scatter(
        risk_return_df, 
        x='Risk', 
        y='Return', 
        text='Ticker',
        title='Risk-Return Profile'
    )
    fig.update_traces(textposition="top center")
    fig.show()

In [15]:
# Create comprehensive summary table
summary_stats = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        
        summary_stats[ticker] = {
            'Data Points': len(data),
            'Date Range': f"{data.index[0].date()} to {data.index[-1].date()}",
            'Mean Daily Return': f"{returns.mean():.6f}",
            'Std Daily Return': f"{returns.std():.6f}",
            'Annualized Return': f"{returns.mean() * 252:.4f}",
            'Annualized Volatility': f"{returns.std() * np.sqrt(252):.4f}",
            'Sharpe Ratio': f"{(returns.mean() * 252) / (returns.std() * np.sqrt(252)):.4f}",
            'Min Return': f"{returns.min():.6f}",
            'Max Return': f"{returns.max():.6f}",
            'Skewness': f"{returns.skew():.4f}",
            'Kurtosis': f"{returns.kurtosis():.4f}"
        }

summary_df = pd.DataFrame(summary_stats).T
summary_df

Unnamed: 0,Data Points,Date Range,Mean Daily Return,Std Daily Return,Annualized Return,Annualized Volatility,Sharpe Ratio,Min Return,Max Return,Skewness,Kurtosis
TSLA,2336,2016-04-15 to 2025-07-30,0.001974,0.037862,0.4975,0.601,0.8278,-0.210628,0.2269,0.3015,4.32
BND,2336,2016-04-15 to 2025-07-30,6.6e-05,0.003552,0.0165,0.0564,0.293,-0.054385,0.042201,-0.9344,35.3625
SPY,2336,2016-04-15 to 2025-07-30,0.00061,0.011506,0.1537,0.1826,0.8417,-0.109424,0.105019,-0.3188,14.9246
