In [28]:
import sys
import os

# Get the absolute path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_path = os.path.join(project_root, 'src')

# Add to Python path
sys.path.insert(0, project_root)
sys.path.insert(0, src_path)

print(f"Project root: {project_root}")
print(f"Src path: {src_path}")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import classes
from src.data.preprocess import DataPreprocessor
from src.utils.visualization import FinancialVisualizer

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

print("All imports successful!")

Project root: c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer
Src path: c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer\src
All imports successful!


In [29]:
# Check data directory contents
import os

data_dir = "data"
print(f"Data directory: {os.path.abspath(data_dir)}")
print(f"Data directory exists: {os.path.exists(data_dir)}")

if os.path.exists(data_dir):
    print("Contents of data directory:")
    for file in os.listdir(data_dir):
        print(f"  {file}")
else:
    print("Data directory does not exist!")

Data directory: c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer\notebooks\data
Data directory exists: False
Data directory does not exist!


In [35]:
# Test the complete modular workflow
from src.data.collect_data import main as collect_data_main
from src.data.preprocess import DataPreprocessor

# Step 1: Collect data (if needed)
print("Step 1: Data Collection")
collect_data_main()

# Step 2: Process data
print("\nStep 2: Data Preprocessing")
preprocessor = DataPreprocessor()
summary = preprocessor.process_all_data()

# Extract processed data
data_dict = summary['processed_data']
stationarity_results = summary['stationarity_results']
outlier_results = summary['outlier_results']

print(f"\nLoaded data for {len(data_dict)} assets")
for ticker, data in data_dict.items():
    print(f"{ticker}: {data.shape[0]} data points, {data.shape[1]} features")

Step 1: Data Collection
PORTFOLIO FORECAST OPTIMIZER - DATA COLLECTION
Output directory: c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer\data\raw
Fetching data from 2015-07-01 to 2025-07-31
Assets: TSLA, BND, SPY

Fetching data for TSLA...
Successfully fetched data for TSLA
Data shape: (2535, 7)
Date range: 2015-07-01 to 2025-07-30
Data saved to c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer\data\raw\TSLA_data.csv

Basic statistics for TSLA:
  - Total trading days: 2535
  - Price range: $9.58 - $479.86
  - Average volume: 114,178,885
----------------------------------------

Fetching data for BND...
Successfully fetched data for BND
Data shape: (2535, 8)
Date range: 2015-07-01 to 2025-07-30
Data saved to c:\Users\aweso\portfolio-forecast-optimizer\portfolio-forecast-optimizer\data\raw\BND_data.csv

Basic statistics for BND:
  - Total trading days: 2535
  - Price range: $60.78 - $77.32
  - Average volume: 4,434,376
---------------------

In [36]:
# DataPreprocessor
preprocessor = DataPreprocessor()
summary = preprocessor.process_all_data()

# Extract processed data
data_dict = summary['processed_data']
stationarity_results = summary['stationarity_results']
outlier_results = summary['outlier_results']

print(f"Loaded data for {len(data_dict)} assets")
for ticker, data in data_dict.items():
    print(f"{ticker}: {data.shape[0]} data points, {data.shape[1]} features")

DATA PREPROCESSING

Processing TSLA...
Data file not found for TSLA

Processing BND...
Data file not found for BND

Processing SPY...
Data file not found for SPY

PREPROCESSING COMPLETED
Loaded data for 0 assets


In [None]:
# Create interactive price comparison
fig = go.Figure()

colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
for i, (ticker, data) in enumerate(data_dict.items()):
    fig.add_trace(go.Scatter(
        x=data.index, 
        y=data['Close'],
        name=ticker,
        line=dict(color=colors[i])
    ))

fig.update_layout(
    title='Interactive Price Comparison',
    xaxis_title='Date',
    yaxis_title='Price ($)',
    hovermode='x unified'
)
fig.show()

In [None]:
# Calculate and display returns statistics
returns_stats = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        returns_stats[ticker] = {
            'Mean': returns.mean(),
            'Std': returns.std(),
            'Min': returns.min(),
            'Max': returns.max(),
            'Skewness': returns.skew(),
            'Kurtosis': returns.kurtosis()
        }

returns_df = pd.DataFrame(returns_stats).T
returns_df

In [None]:
# Interactive volatility comparison
fig = make_subplots(rows=3, cols=1, subplot_titles=[f'{ticker} Volatility' for ticker in data_dict.keys()])

for i, (ticker, data) in enumerate(data_dict.items(), 1):
    vol_col = f'{ticker}_Volatility_20d'
    if vol_col in data.columns:
        fig.add_trace(
            go.Scatter(x=data.index, y=data[vol_col], name=f'{ticker} 20d Vol'),
            row=i, col=1
        )

fig.update_layout(height=900, title_text="Rolling Volatility Comparison")
fig.show()

In [None]:
# Create returns correlation matrix
returns_data = {}
for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns_data[ticker] = data[return_col]

returns_df = pd.DataFrame(returns_data).dropna()
corr_matrix = returns_df.corr()

# Interactive correlation heatmap
fig = px.imshow(
    corr_matrix,
    text_auto=True,
    aspect="auto",
    title="Correlation Matrix of Daily Returns"
)
fig.show()

In [None]:
# Calculate comprehensive risk metrics
risk_metrics = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        
        # Annualized metrics
        annual_return = returns.mean() * 252
        annual_vol = returns.std() * np.sqrt(252)
        sharpe_ratio = annual_return / annual_vol if annual_vol > 0 else 0
        
        # Drawdown
        cumulative_returns = (1 + returns).cumprod()
        running_max = cumulative_returns.expanding().max()
        drawdown = (cumulative_returns - running_max) / running_max
        max_drawdown = drawdown.min()
        
        # VaR and CVaR
        var_95 = returns.quantile(0.05)
        cvar_95 = returns[returns <= var_95].mean()
        
        risk_metrics[ticker] = {
            'Annual Return': annual_return,
            'Annual Volatility': annual_vol,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown': max_drawdown,
            'VaR (95%)': var_95,
            'CVaR (95%)': cvar_95
        }

risk_df = pd.DataFrame(risk_metrics).T
risk_df

In [None]:
# Display stationarity test results
if stationarity_results:
    stationarity_df = pd.DataFrame(stationarity_results)
    stationarity_df[['ticker', 'adf_statistic', 'p_value', 'is_stationary']]

In [None]:
# Display outlier analysis results
if outlier_results:
    outlier_df = pd.DataFrame(outlier_results)
    outlier_df[['ticker', 'outlier_count', 'outlier_percentage', 'method']]

In [None]:
# Volume analysis
fig = make_subplots(rows=3, cols=1, subplot_titles=[f'{ticker} Volume' for ticker in data_dict.keys()])

for i, (ticker, data) in enumerate(data_dict.items(), 1):
    fig.add_trace(
        go.Scatter(x=data.index, y=data['Volume'], name=f'{ticker} Volume'),
        row=i, col=1
    )

fig.update_layout(height=900, title_text="Trading Volume Comparison")
fig.show()

In [None]:
# Cumulative returns comparison
fig = go.Figure()

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        cumulative_returns = (1 + data[return_col]).cumprod()
        fig.add_trace(go.Scatter(
            x=data.index,
            y=cumulative_returns,
            name=f'{ticker} Cumulative Returns',
            line=dict(width=2)
        ))

fig.update_layout(
    title='Cumulative Returns Comparison',
    xaxis_title='Date',
    yaxis_title='Cumulative Returns',
    hovermode='x unified'
)
fig.show()

In [None]:
# Risk-return scatter plot
risk_return_data = []

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        annual_return = returns.mean() * 252
        annual_vol = returns.std() * np.sqrt(252)
        risk_return_data.append([annual_vol, annual_return, ticker])

if risk_return_data:
    risk_return_df = pd.DataFrame(risk_return_data, columns=['Risk', 'Return', 'Ticker'])
    
    fig = px.scatter(
        risk_return_df, 
        x='Risk', 
        y='Return', 
        text='Ticker',
        title='Risk-Return Profile'
    )
    fig.update_traces(textposition="top center")
    fig.show()

In [None]:
# Create comprehensive summary table
summary_stats = {}

for ticker, data in data_dict.items():
    return_col = f'{ticker}_Daily_Return'
    if return_col in data.columns:
        returns = data[return_col].dropna()
        
        summary_stats[ticker] = {
            'Data Points': len(data),
            'Date Range': f"{data.index[0].date()} to {data.index[-1].date()}",
            'Mean Daily Return': f"{returns.mean():.6f}",
            'Std Daily Return': f"{returns.std():.6f}",
            'Annualized Return': f"{returns.mean() * 252:.4f}",
            'Annualized Volatility': f"{returns.std() * np.sqrt(252):.4f}",
            'Sharpe Ratio': f"{(returns.mean() * 252) / (returns.std() * np.sqrt(252)):.4f}",
            'Min Return': f"{returns.min():.6f}",
            'Max Return': f"{returns.max():.6f}",
            'Skewness': f"{returns.skew():.4f}",
            'Kurtosis': f"{returns.kurtosis():.4f}"
        }

summary_df = pd.DataFrame(summary_stats).T
summary_df