In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
class MLModels:
    def __init__(self):
        self.models = {}
        self.scalers = {}
        
    def train_models(self, X, y, ticker):
        """Train multiple ML models"""
        try:
            # Split data
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, shuffle=False
            )
            
            # Initialize scaler
            self.scalers[ticker] = StandardScaler()
            X_train_scaled = self.scalers[ticker].fit_transform(X_train)
            X_test_scaled = self.scalers[ticker].transform(X_test)
            
            # Initialize models dictionary for this ticker
            self.models[ticker] = {}
            
            # Train Random Forest
            rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
            rf_model.fit(X_train_scaled, y_train)
            rf_pred = rf_model.predict(X_test_scaled)
            
            # Train XGBoost
            xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
            xgb_model.fit(X_train_scaled, y_train)
            xgb_pred = xgb_model.predict(X_test_scaled)
            
            # Store models and predictions
            self.models[ticker] = {
                'rf': {'model': rf_model, 'predictions': {'true': y_test, 'pred': rf_pred}},
                'xgb': {'model': xgb_model, 'predictions': {'true': y_test, 'pred': xgb_pred}}
            }
            
            return X_test, y_test
            
        except Exception as e:
            print(f"Error training models for {ticker}: {e}")
            return None, None

In [3]:
class BacktestEngine:
    def __init__(self, initial_capital=100000):
        self.initial_capital = initial_capital
        self.portfolio_value = []
        
    def run_backtest(self, data, predictions, ticker):
        """Simple backtest simulation"""
        try:
            self.portfolio_value = [self.initial_capital]
            current_value = self.initial_capital
            
            for i in range(1, len(predictions)):
                # Simple strategy: invest everything if prediction is positive
                if predictions[i] > 0:
                    current_value *= (1 + data['Returns'].iloc[i])
                self.portfolio_value.append(current_value)
                
            return self.portfolio_value
        except Exception as e:
            print(f"Error in backtest for {ticker}: {e}")
            return [self.initial_capital]
    
    def calculate_metrics(self, portfolio_values):
        """Calculate basic backtest metrics"""
        try:
            returns = pd.Series(portfolio_values).pct_change().dropna()
            
            metrics = {
                'total_return': (portfolio_values[-1] / portfolio_values[0] - 1) * 100,
                'sharpe_ratio': np.sqrt(252) * returns.mean() / returns.std(),
                'max_drawdown': ((pd.Series(portfolio_values) - pd.Series(portfolio_values).expanding().max()) / 
                               pd.Series(portfolio_values).expanding().max()).min() * 100
            }
            
            return metrics
        except Exception as e:
            print(f"Error calculating backtest metrics: {e}")
            return {'total_return': 0, 'sharpe_ratio': 0, 'max_drawdown': 0}

In [4]:
class QuantFinancialAnalysis:
    def __init__(self, tickers, period='2y'):
        self.tickers = tickers
        self.period = period
        self.stock_data = {}
        self.metrics = {}
        self.ml_models = MLModels()
        self.backtest_engine = BacktestEngine()
    
    def fetch_data(self):
        """Fetch and prepare data"""
        for ticker in self.tickers:
            try:
                stock = yf.Ticker(ticker)
                data = stock.history(period=self.period)
                if not data.empty:
                    self._add_features(data)
                    self.stock_data[ticker] = data
                    # Initialize metrics dictionary for this ticker
                    self.metrics[ticker] = {}
            except Exception as e:
                print(f"Error fetching data for {ticker}: {e}")
    
    def _add_features(self, data):
        """Add basic features"""
        try:
            data['Returns'] = data['Close'].pct_change()
            data['Momentum_5'] = data['Close'].pct_change(5)
            data['Momentum_21'] = data['Close'].pct_change(21)
            data['Volatility'] = data['Returns'].rolling(window=20).std()
            data['RSI'] = self._calculate_rsi(data['Close'])
            data['MACD'] = self._calculate_macd(data['Close'])
            data['Signal_Line'] = data['MACD'].ewm(span=9).mean()
        except Exception as e:
            print(f"Error adding features: {e}")
    
    def _calculate_rsi(self, prices, period=14):
        """Calculate RSI"""
        try:
            delta = prices.diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
            rs = gain / loss
            return 100 - (100 / (1 + rs))
        except Exception as e:
            print(f"Error calculating RSI: {e}")
            return pd.Series(0, index=prices.index)
    
    def _calculate_macd(self, prices):
        """Calculate MACD"""
        try:
            exp1 = prices.ewm(span=12, adjust=False).mean()
            exp2 = prices.ewm(span=26, adjust=False).mean()
            return exp1 - exp2
        except Exception as e:
            print(f"Error calculating MACD: {e}")
            return pd.Series(0, index=prices.index)
    
    def calculate_risk_metrics(self):
        """Calculate basic risk metrics"""
        for ticker, data in self.stock_data.items():
            try:
                returns = data['Returns'].dropna()
                
                # Calculate max drawdown properly
                cum_returns = (1 + returns).cumprod()
                rolling_max = cum_returns.expanding().max()
                drawdowns = (cum_returns - rolling_max) / rolling_max
                max_drawdown = drawdowns.min()
                
                self.metrics[ticker].update({
                    'volatility': returns.std() * np.sqrt(252),  # Annualized volatility
                    'sharpe_ratio': np.sqrt(252) * returns.mean() / returns.std(),  # Annualized Sharpe ratio
                    'max_drawdown': float(max_drawdown),  # Convert to float
                    'annualized_return': float(returns.mean() * 252)  # Annualized return
                })
            except Exception as e:
                print(f"Error calculating risk metrics for {ticker}: {e}")
                self.metrics[ticker].update({
                    'volatility': 0.0,
                    'sharpe_ratio': 0.0,
                    'max_drawdown': 0.0,
                    'annualized_return': 0.0
                })
    
    def train_ml_models(self):
        """Train ML models and run backtests"""
        for ticker, data in self.stock_data.items():
            try:
                feature_columns = [
                    'Momentum_5', 'Momentum_21', 'Volatility', 'RSI',
                    'MACD', 'Signal_Line'
                ]
                
                X = data[feature_columns].dropna()
                y = data['Returns'].shift(-1).dropna()
                
                X = X.iloc[:-1]
                y = y.iloc[:len(X)]
                
                if len(X) > 0 and len(y) > 0:
                    X_test, y_test = self.ml_models.train_models(X, y, ticker)
                    
                    if ticker not in self.metrics:
                        self.metrics[ticker] = {}
                    
                    self.metrics[ticker]['ml_performance'] = {}
                    
                    # Calculate and store model performance metrics
                    if ticker in self.ml_models.models:
                        for model_name, model_info in self.ml_models.models[ticker].items():
                            y_pred = model_info['predictions']['pred']
                            y_true = model_info['predictions']['true']
                            
                            self.metrics[ticker]['ml_performance'][model_name] = {
                                'mse': mean_squared_error(y_true, y_pred),
                                'r2': r2_score(y_true, y_pred)
                            }
                            
                            # Run backtest
                            portfolio_values = self.backtest_engine.run_backtest(
                                data.iloc[-len(y_pred):],
                                y_pred,
                                ticker
                            )
                            
                            if 'backtest' not in self.metrics[ticker]:
                                self.metrics[ticker]['backtest'] = {}
                            
                            self.metrics[ticker]['backtest'][model_name] = \
                                self.backtest_engine.calculate_metrics(portfolio_values)
                                
            except Exception as e:
                print(f"Error in ML training for {ticker}: {e}")
    
    def create_visualizations(self, ticker):
        """Create interactive visualizations using plotly"""
        try:
            # Create figure with secondary y-axis
            fig = make_subplots(rows=4, cols=2,
                               subplot_titles=('Stock Price & ML Predictions',
                                             'Model Performance Comparison',
                                             'Portfolio Values from Backtesting',
                                             'Feature Importance',
                                             'Daily Returns Distribution',
                                             'Risk Metrics',
                                             'Technical Indicators',
                                             'Volume Analysis'),
                               vertical_spacing=0.1,
                               specs=[[{"secondary_y": True}, {}],
                                    [{}, {}],
                                    [{}, {}],
                                    [{}, {}]])

            data = self.stock_data[ticker]
            
            # 1. Stock Price & ML Predictions
            fig.add_trace(
                go.Candlestick(x=data.index,
                              open=data['Open'],
                              high=data['High'],
                              low=data['Low'],
                              close=data['Close'],
                              name='OHLC'),
                row=1, col=1
            )
            
            # Add predictions if available
            if ticker in self.ml_models.models:
                for model_name, model_info in self.ml_models.models[ticker].items():
                    pred_data = model_info['predictions']
                    fig.add_trace(
                        go.Scatter(
                            x=data.index[-len(pred_data['pred']):],
                            y=pred_data['pred'],
                            name=f'{model_name} Predictions',
                            line=dict(dash='dash')
                        ),
                        row=1, col=1
                    )
            
            # 2. Model Performance Comparison
            if 'ml_performance' in self.metrics[ticker]:
                model_names = list(self.metrics[ticker]['ml_performance'].keys())
                mse_values = [self.metrics[ticker]['ml_performance'][model]['mse'] 
                             for model in model_names]
                r2_values = [self.metrics[ticker]['ml_performance'][model]['r2'] 
                            for model in model_names]
                
                fig.add_trace(
                    go.Bar(x=model_names, y=mse_values, name='MSE'),
                    row=1, col=2
                )
                fig.add_trace(
                    go.Bar(x=model_names, y=r2_values, name='R2'),
                    row=1, col=2
                )
            
            # 3. Portfolio Values from Backtesting
            if 'backtest' in self.metrics[ticker]:
                for model_name in self.metrics[ticker]['backtest'].keys():
                    portfolio_values = self.backtest_engine.portfolio_value
                    fig.add_trace(
                        go.Scatter(
                            x=data.index[-len(portfolio_values):],
                            y=portfolio_values,
                            name=f'{model_name} Portfolio'
                        ),
                        row=2, col=1
                    )
            
            # 4. Feature Importance (if available from Random Forest)
            if 'rf' in self.ml_models.models[ticker]:
                rf_model = self.ml_models.models[ticker]['rf']['model']
                feature_cols = [
                    'Momentum_5', 'Momentum_21', 'Volatility', 'RSI',
                    'MACD', 'Signal_Line'
                ]
                feature_importance = pd.DataFrame({
                    'feature': feature_cols,
                    'importance': rf_model.feature_importances_
                }).sort_values('importance', ascending=True)
                
                fig.add_trace(
                    go.Bar(
                        x=feature_importance['importance'],
                        y=feature_importance['feature'],
                        orientation='h',
                        name='Feature Importance'
                    ),
                    row=2, col=2
                )
            
            # 5. Daily Returns Distribution
            fig.add_trace(
                go.Histogram(
                    x=data['Returns'].dropna(),
                    name='Returns Distribution',
                    nbinsx=50
                ),
                row=3, col=1
            )
            
            # 6. Risk Metrics
            risk_metrics = {k: v for k, v in self.metrics[ticker].items() 
                           if k not in ['ml_performance', 'backtest']}
            fig.add_trace(
                go.Bar(
                    x=list(risk_metrics.keys()),
                    y=list(risk_metrics.values()),
                    name='Risk Metrics'
                ),
                row=3, col=2
            )
            
            # 7. Technical Indicators
            fig.add_trace(
                go.Scatter(x=data.index, y=data['RSI'], name='RSI'),
                row=4, col=1
            )
            fig.add_trace(
                go.Scatter(x=data.index, y=data['MACD'], name='MACD'),
                row=4, col=1
            )
            
            # 8. Volume Analysis
            fig.add_trace(
                go.Bar(x=data.index, y=data['Volume'], name='Volume'),
                row=4, col=2
            )
            
            # Update layout
            fig.update_layout(height=1800, width=1200,
                             title_text=f"Complete Analysis Dashboard - {ticker}",
                             showlegend=True)
            
            # Show figure
            fig.show()
            
        except Exception as e:
            print(f"Error creating visualizations for {ticker}: {e}")

    def create_all_visualizations(self):
        """Create visualizations for all tickers"""
        for ticker in self.tickers:
            if ticker in self.stock_data:
                print(f"\nCreating visualizations for {ticker}")
                self.create_visualizations(ticker)

    def create_visualizations(self, ticker):
        """Create interactive visualizations using plotly"""
        try:
            # Create figure with secondary y-axis
            fig = make_subplots(rows=4, cols=2,
                            subplot_titles=('Stock Price & ML Predictions',
                                            'Model Performance Comparison',
                                            'Portfolio Values from Backtesting',
                                            'Feature Importance',
                                            'Daily Returns Distribution',
                                            'Risk Metrics',
                                            'Technical Indicators',
                                            'Volume Analysis'),
                            vertical_spacing=0.1,
                            specs=[[{"secondary_y": True}, {}],
                                    [{}, {}],
                                    [{}, {}],
                                    [{}, {}]])

            data = self.stock_data[ticker]
            
            # 1. Stock Price & ML Predictions
            fig.add_trace(
                go.Candlestick(x=data.index,
                            open=data['Open'],
                            high=data['High'],
                            low=data['Low'],
                            close=data['Close'],
                            name='OHLC'),
                row=1, col=1
            )
            
            # Add predictions if available
            if ticker in self.ml_models.models:
                for model_name, model_info in self.ml_models.models[ticker].items():
                    pred_data = model_info['predictions']
                    fig.add_trace(
                        go.Scatter(
                            x=data.index[-len(pred_data['pred']):],
                            y=pred_data['pred'],
                            name=f'{model_name} Predictions',
                            line=dict(dash='dash')
                        ),
                        row=1, col=1
                    )
            
            # 2. Model Performance Comparison
            if 'ml_performance' in self.metrics[ticker]:
                model_names = list(self.metrics[ticker]['ml_performance'].keys())
                mse_values = [self.metrics[ticker]['ml_performance'][model]['mse'] 
                            for model in model_names]
                r2_values = [self.metrics[ticker]['ml_performance'][model]['r2'] 
                            for model in model_names]
                
                fig.add_trace(
                    go.Bar(x=model_names, y=mse_values, name='MSE'),
                    row=1, col=2
                )
                fig.add_trace(
                    go.Bar(x=model_names, y=r2_values, name='R2'),
                    row=1, col=2
                )
            
            # 3. Portfolio Values from Backtesting
            if 'backtest' in self.metrics[ticker]:
                for model_name in self.metrics[ticker]['backtest'].keys():
                    portfolio_values = self.backtest_engine.portfolio_value
                    fig.add_trace(
                        go.Scatter(
                            x=data.index[-len(portfolio_values):],
                            y=portfolio_values,
                            name=f'{model_name} Portfolio'
                        ),
                        row=2, col=1
                    )
            
            # 4. Feature Importance (if available from Random Forest)
            if 'rf' in self.ml_models.models[ticker]:
                rf_model = self.ml_models.models[ticker]['rf']['model']
                feature_cols = [
                    'Momentum_5', 'Momentum_21', 'Volatility', 'RSI',
                    'MACD', 'Signal_Line'
                ]
                feature_importance = pd.DataFrame({
                    'feature': feature_cols,
                    'importance': rf_model.feature_importances_
                }).sort_values('importance', ascending=True)
                
                fig.add_trace(
                    go.Bar(
                        x=feature_importance['importance'],
                        y=feature_importance['feature'],
                        orientation='h',
                        name='Feature Importance'
                    ),
                    row=2, col=2
                )
            
            # 5. Daily Returns Distribution
            fig.add_trace(
                go.Histogram(
                    x=data['Returns'].dropna(),
                    name='Returns Distribution',
                    nbinsx=50
                ),
                row=3, col=1
            )
            
            # 6. Risk Metrics
            risk_metrics = {k: v for k, v in self.metrics[ticker].items() 
                        if k not in ['ml_performance', 'backtest']}
            fig.add_trace(
                go.Bar(
                    x=list(risk_metrics.keys()),
                    y=list(risk_metrics.values()),
                    name='Risk Metrics'
                ),
                row=3, col=2
            )
            
            # 7. Technical Indicators
            fig.add_trace(
                go.Scatter(x=data.index, y=data['RSI'], name='RSI'),
                row=4, col=1
            )
            fig.add_trace(
                go.Scatter(x=data.index, y=data['MACD'], name='MACD'),
                row=4, col=1
            )
            
            # 8. Volume Analysis
            fig.add_trace(
                go.Bar(x=data.index, y=data['Volume'], name='Volume'),
                row=4, col=2
            )
            
            # Update layout
            fig.update_layout(height=1800, width=1200,
                            title_text=f"Complete Analysis Dashboard - {ticker}",
                            showlegend=True)
            
            # Show figure
            fig.show()
        
        except Exception as e:
            print(f"Error creating visualizations for {ticker}: {e}")


In [5]:
def main():
    # Example usage
    tickers = ['AAPL', 'MSFT', 'GOOGL']
    
    # Initialize and run analysis
    analysis = QuantFinancialAnalysis(tickers)
    
    print("Fetching and preparing data...")
    analysis.fetch_data()
    
    print("Calculating risk metrics...")
    analysis.calculate_risk_metrics()
    
    print("Training ML models and running backtests...")
    analysis.train_ml_models()
    
    # Print summary
    for ticker in tickers:
        print(f"\n=== {ticker} Analysis ===")
        
        if ticker in analysis.metrics:
            print("\nRisk Metrics:")
            for metric, value in analysis.metrics[ticker].items():
                if metric not in ['ml_performance', 'backtest']:
                    try:
                        print(f"{metric}: {float(value):.4f}")
                    except (TypeError, ValueError) as e:
                        print(f"{metric}: {value}")
            
            if 'ml_performance' in analysis.metrics[ticker]:
                print("\nML Model Performance:")
                for model_name, metrics in analysis.metrics[ticker]['ml_performance'].items():
                    print(f"\n{model_name.upper()}:")
                    for metric, value in metrics.items():
                        print(f"{metric}: {value:.6f}")
            
            if 'backtest' in analysis.metrics[ticker]:
                print("\nBacktest Results:")
                for model_name, metrics in analysis.metrics[ticker]['backtest'].items():
                    print(f"\n{model_name.upper()} Strategy:")
                    for metric, value in metrics.items():
                        try:
                            print(f"{metric}: {float(value):.2f}")
                        except (TypeError, ValueError) as e:
                            print(f"{metric}: {value}")
        else:
            print(f"No data available for {ticker}")
    
    print("\nGenerating visualizations...")
    analysis.create_all_visualizations()

if __name__ == "__main__":
    main()

Fetching and preparing data...
Calculating risk metrics...
Training ML models and running backtests...

=== AAPL Analysis ===

Risk Metrics:
volatility: 0.2266
sharpe_ratio: 1.0563
max_drawdown: -0.1736
annualized_return: 0.2394

ML Model Performance:

RF:
mse: 0.000236
r2: 0.065736

XGB:
mse: 0.000229
r2: 0.094064

Backtest Results:

RF Strategy:
total_return: 5.76
sharpe_ratio: 0.97
max_drawdown: -7.17

XGB Strategy:
total_return: 10.61
sharpe_ratio: 1.62
max_drawdown: -5.09

=== MSFT Analysis ===

Risk Metrics:
volatility: 0.2328
sharpe_ratio: 1.3408
max_drawdown: -0.1549
annualized_return: 0.3122

ML Model Performance:

RF:
mse: 0.000188
r2: -0.271080

XGB:
mse: 0.000226
r2: -0.529489

Backtest Results:

RF Strategy:
total_return: 2.95
sharpe_ratio: 0.69
max_drawdown: -7.42

XGB Strategy:
total_return: -2.04
sharpe_ratio: -0.38
max_drawdown: -11.34

=== GOOGL Analysis ===

Risk Metrics:
volatility: 0.2904
sharpe_ratio: 1.2499
max_drawdown: -0.2214
annualized_return: 0.3629

ML Mode


Creating visualizations for MSFT



Creating visualizations for GOOGL
