## Trading Simulation Analysis Functions

This script uses different functions to visualize and analyze trade data assuming that a df with the following columns is available: date (index), start_cash, gross_option_trades, gross_stock_trades, gross_trades_value, option_pos_value, stock_pos_value, end_cash, option_PL, stock_PL, net_PL. Additionally, we will also need to import farma-french factors data available from Ken French's website and set the date column as the index.

### Imports

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
from statsmodels.tsa.stattools import adfuller

In [None]:
def calculate_cumulative_pl(data):
    """
    Calculates the cumulative P&L for options, stocks, and the net total.
    
    Inputs:
        data - DataFrame with Date index and columns for option_PL, stock_PL, and net_PL.
    
    Output:
        cumulative_pl - DataFrame with Date index and cumulative P&L columns.
    """
    
    cumulative_pl = pd.DataFrame(index=data.index)
    cumulative_pl['cum_option_PL'] = data['option_PL'].cumsum()
    cumulative_pl['cum_stock_PL'] = data['stock_PL'].cumsum()
    cumulative_pl['cum_net_PL'] = data['net_PL'].cumsum()
    
    return cumulative_pl

#### Use output df from calculate_cumulative_pl function here onwards!

In [4]:
def performance_summary(cumulative_pl):
    """
    Returns the Performance Stats for a given set of data.
    
    Inputs: 
        data - DataFrame with Date index and corresponding financial data.
    
    Output:
        summary_stats - DataFrame with summary statistics.
    """
    daily_returns = cumulative_pl
    
    summary_stats = pd.DataFrame()
    summary_stats['Mean'] = daily_returns.mean()
    summary_stats['Median'] = daily_returns.median()
    summary_stats['Volatility'] = daily_returns.std() 
    summary_stats['Sharpe Ratio'] = summary_stats['Mean'] / summary_stats['Volatility']
    summary_stats['Skewness'] = daily_returns.skew()
    summary_stats['Excess Kurtosis'] = daily_returns.kurtosis()
    summary_stats['Min'] = daily_returns.min()
    summary_stats['Max'] = daily_returns.max()

    wealth_index = 1000 * (1 + daily_returns).cumprod()
    previous_peaks = wealth_index.cummax()
    drawdowns = (wealth_index - previous_peaks) / previous_peaks
    summary_stats['Max Drawdown'] = drawdowns.min()
    
    return summary_stats

In [None]:
def plot_trades_over_time(cumulative_pl):
    """
    Plots gross option trades and gross stock trades over time.
    
    Inputs:
        cumulative_pl - DataFrame with Date index and financial data including
                        gross_option_trades and gross_stock_trades.
    """
    fig, ax = plt.subplots(2, 1, figsize=(12, 8))  

    ax[0].plot(cumulative_pl.index, cumulative_pl['gross_option_trades'], color='blue', marker='o', linestyle='-')
    ax[0].set_title('Gross Option Trades Over Time')  
    ax[0].set_xlabel('Date')  # X-axis label
    ax[0].set_ylabel('Gross Option Trades')  
    ax[0].grid(True)  

    ax[1].plot(cumulative_pl.index, cumulative_pl['gross_stock_trades'], color='red', marker='o', linestyle='-')
    ax[1].set_title('Gross Stock Trades Over Time')  
    ax[1].set_xlabel('Date')  
    ax[1].set_ylabel('Gross Stock Trades')  
    ax[1].grid(True)  

    plt.tight_layout()

    plt.show()

In [None]:
def plot_position_values_over_time(cumulative_pl):
    """
    Plots option position value and stock position value over time.
    
    Inputs:
        cumulative_pl - DataFrame with Date index and financial data including
                        option_pos_value and stock_pos_value.
    """
    fig, ax = plt.subplots(2, 1, figsize=(12, 8))  

    ax[0].plot(cumulative_pl.index, cumulative_pl['option_pos_value'], color='green', marker='o', linestyle='-')
    ax[0].set_title('Option Position Value Over Time')  
    ax[0].set_xlabel('Date')  # X-axis label
    ax[0].set_ylabel('Option Position Value ($)')  
    ax[0].grid(True)  

    ax[1].plot(cumulative_pl.index, cumulative_pl['stock_pos_value'], color='purple', marker='o', linestyle='-')
    ax[1].set_title('Stock Position Value Over Time')  
    ax[1].set_xlabel('Date')  
    ax[1].set_ylabel('Stock Position Value ($)')  
    ax[1].grid(True)  

    plt.tight_layout()

    plt.show()

In [None]:
def plot_pl_over_time(cumulative_pl):
    """
    Plots option, stock, and net pl value over time.
    
    Inputs:
        cumulative_pl - DataFrame with Date index and financial data.
    """
    fig, ax = plt.subplots(3, 1, figsize=(12, 12))
    
    ax[0].plot(cumulative_pl.index, cumulative_pl['option_PL'], color='orange', marker='o', linestyle='-')
    ax[0].set_title('Option P&L Over Time')
    ax[0].set_xlabel('Date')
    ax[0].set_ylabel('Option P&L ($)')
    ax[0].grid(True)
    
    ax[1].plot(cumulative_pl.index, cumulative_pl['stock_PL'], color='cyan', marker='o', linestyle='-')
    ax[1].set_title('Stock P&L Over Time')
    ax[1].set_xlabel('Date')
    ax[1].set_ylabel('Stock P&L ($)')
    ax[1].grid(True)
    
    ax[2].plot(cumulative_pl.index, cumulative_pl['net_PL'], color='magenta', marker='o', linestyle='-')
    ax[2].set_title('Net P&L Over Time')
    ax[2].set_xlabel('Date')
    ax[2].set_ylabel('Net P&L ($)')
    ax[2].grid(True)
    
    plt.tight_layout()
    
    plt.show()

In [None]:
def plot_cumulative_pl_over_time(cumulative_pl):
    """
    Plots cumulative option, stock, and net pl value over time.
    
    Inputs:
        cumulative_pl - DataFrame with Date index and financial data.
    """
    fig, ax = plt.subplots(3, 1, figsize=(12, 12))
    
    ax[0].plot(cumulative_pl.index, cumulative_pl['cum_option_PL'], color='orange', marker='o', linestyle='-')
    ax[0].set_title('Cumulative Option P&L Over Time')
    ax[0].set_xlabel('Date')
    ax[0].set_ylabel('Cumulative Option P&L ($)')
    ax[0].grid(True)
    
    ax[1].plot(cumulative_pl.index, cumulative_pl['cum_stock_PL'], color='cyan', marker='o', linestyle='-')
    ax[1].set_title('Cumulative Stock P&L Over Time')
    ax[1].set_xlabel('Date')
    ax[1].set_ylabel('Cumulative Stock P&L ($)')
    ax[1].grid(True)
    
    ax[2].plot(cumulative_pl.index, cumulative_pl['cum_net_PL'], color='magenta', marker='o', linestyle='-')
    ax[2].set_title('Cumulative Net P&L Over Time')
    ax[2].set_xlabel('Date')
    ax[2].set_ylabel('Cumulative Net P&L ($)')
    ax[2].grid(True)
    
    plt.tight_layout()
    
    plt.show()

In [None]:
def plot_trades_and_test_stationarity(cumulative_pl):
    """
    This function creates two subplots for the 'gross_option_trades' and 'gross_stock_trades' from the 'cumulative_pl' DataFrame.
    It plots the original data along with their 5-day, 30-day, and 100-day rolling averages.
    Additionally, it performs the Augmented Dickey-Fuller (ADF) test on both datasets to test for stationarity and 
    displays the test results (Test Statistic and P-Value) below each subplot.

    Inputs:
        cumulative_pl - DataFrame with columns including 'gross_option_trades' and 'gross_stock_trades'.
    """
    fig, axes = plt.subplots(2, 1, figsize=(20, 10))

    # Plot for gross_option_trades
    axes[0].plot(cumulative_pl['gross_option_trades'], label="Gross Option Trades")
    axes[0].plot(cumulative_pl['gross_option_trades'].rolling(5).mean(), label="5-day MA")
    axes[0].plot(cumulative_pl['gross_option_trades'].rolling(30).mean(), label="30-day MA")
    axes[0].plot(cumulative_pl['gross_option_trades'].rolling(100).mean(), label="100-day MA")
    axes[0].set_title("Gross Option Trades with Rolling Averages", fontsize=18)
    axes[0].legend(fontsize=14)

    # ADF test on gross_option_trades
    adf_result_option = adfuller(cumulative_pl['gross_option_trades'].dropna(), maxlag=1)
    axes[0].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_option[0]:.2f}\nP-Value: {adf_result_option[1]:.2f}', 
                 transform=axes[0].transAxes, fontsize=14)

    # Plot for gross_stock_trades
    axes[1].plot(cumulative_pl['gross_stock_trades'], label="Gross Stock Trades")
    axes[1].plot(cumulative_pl['gross_stock_trades'].rolling(5).mean(), label="5-day MA")
    axes[1].plot(cumulative_pl['gross_stock_trades'].rolling(30).mean(), label="30-day MA")
    axes[1].plot(cumulative_pl['gross_stock_trades'].rolling(100).mean(), label="100-day MA")
    axes[1].set_title("Gross Stock Trades with Rolling Averages", fontsize=18)
    axes[1].legend(fontsize=14)

    # ADF test on gross_stock_trades
    adf_result_stock = adfuller(cumulative_pl['gross_stock_trades'].dropna(), maxlag=1)
    axes[1].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_stock[0]:.2f}\nP-Value: {adf_result_stock[1]:.2f}', 
                 transform=axes[1].transAxes, fontsize=14)

    plt.tight_layout()
    plt.show()

In [None]:
def plot_position_values_and_test_stationarity(cumulative_pl):
    """
    This function creates two subplots for 'option_pos_value' and 'stock_pos_value' from the 'cumulative_pl' DataFrame.
    It plots the original data along with their 5-day, 30-day, and 100-day rolling averages.
    Additionally, it performs the Augmented Dickey-Fuller (ADF) test on both datasets to test for stationarity and 
    displays the test results (Test Statistic and P-Value) below each subplot.

    Inputs:
        cumulative_pl - DataFrame with columns including 'option_pos_value' and 'stock_pos_value'.
    """
    fig, axes = plt.subplots(2, 1, figsize=(20, 10))

    # Plot for option_pos_value
    axes[0].plot(cumulative_pl['option_pos_value'], label="Option Position Value")
    axes[0].plot(cumulative_pl['option_pos_value'].rolling(5).mean(), label="5-day MA")
    axes[0].plot(cumulative_pl['option_pos_value'].rolling(30).mean(), label="30-day MA")
    axes[0].plot(cumulative_pl['option_pos_value'].rolling(100).mean(), label="100-day MA")
    axes[0].set_title("Option Position Value with Rolling Averages", fontsize=18)
    axes[0].legend(fontsize=14)

    # Perform and display ADF test results for option_pos_value
    adf_result_option = adfuller(cumulative_pl['option_pos_value'].dropna(), maxlag=1)
    axes[0].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_option[0]:.2f}\nP-Value: {adf_result_option[1]:.2f}', 
                 transform=axes[0].transAxes, fontsize=14)

    # Plot for stock_pos_value
    axes[1].plot(cumulative_pl['stock_pos_value'], label="Stock Position Value")
    axes[1].plot(cumulative_pl['stock_pos_value'].rolling(5).mean(), label="5-day MA")
    axes[1].plot(cumulative_pl['stock_pos_value'].rolling(30).mean(), label="30-day MA")
    axes[1].plot(cumulative_pl['stock_pos_value'].rolling(100).mean(), label="100-day MA")
    axes[1].set_title("Stock Position Value with Rolling Averages", fontsize=18)
    axes[1].legend(fontsize=14)

    # Perform and display ADF test results for stock_pos_value
    adf_result_stock = adfuller(cumulative_pl['stock_pos_value'].dropna(), maxlag=1)
    axes[1].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_stock[0]:.2f}\nP-Value: {adf_result_stock[1]:.2f}', 
                 transform=axes[1].transAxes, fontsize=14)

    plt.tight_layout()
    plt.show()

In [None]:
def plot_pl_values_and_test_stationarity(cumulative_pl):
    """
    This function creates three subplots for 'option_PL', 'stock_PL', and 'net_PL' from the 'cumulative_pl' DataFrame.
    It plots the original data along with their 5-day, 30-day, and 100-day rolling averages.
    Additionally, it performs the Augmented Dickey-Fuller (ADF) test on all three datasets to test for stationarity and 
    displays the test results (Test Statistic and P-Value) below each subplot.

    Inputs:
        cumulative_pl - DataFrame with columns including 'option_PL', 'stock_PL', and 'net_PL'.
    """
    fig, axes = plt.subplots(3, 1, figsize=(20, 15))

    # Plot for option_PL
    axes[0].plot(cumulative_pl['option_PL'], label="Option P&L")
    axes[0].plot(cumulative_pl['option_PL'].rolling(5).mean(), label="5-day MA")
    axes[0].plot(cumulative_pl['option_PL'].rolling(30).mean(), label="30-day MA")
    axes[0].plot(cumulative_pl['option_PL'].rolling(100).mean(), label="100-day MA")
    axes[0].set_title("Option P&L with Rolling Averages", fontsize=18)
    axes[0].legend(fontsize=14)

    # Perform and display ADF test results for option_PL
    adf_result_option = adfuller(cumulative_pl['option_PL'].dropna(), maxlag=1)
    axes[0].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_option[0]:.2f}\nP-Value: {adf_result_option[1]:.2f}', 
                 transform=axes[0].transAxes, fontsize=14)

    # Plot for stock_PL
    axes[1].plot(cumulative_pl['stock_PL'], label="Stock P&L")
    axes[1].plot(cumulative_pl['stock_PL'].rolling(5).mean(), label="5-day MA")
    axes[1].plot(cumulative_pl['stock_PL'].rolling(30).mean(), label="30-day MA")
    axes[1].plot(cumulative_pl['stock_PL'].rolling(100).mean(), label="100-day MA")
    axes[1].set_title("Stock P&L with Rolling Averages", fontsize=18)
    axes[1].legend(fontsize=14)

    # Perform and display ADF test results for stock_PL
    adf_result_stock = adfuller(cumulative_pl['stock_PL'].dropna(), maxlag=1)
    axes[1].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_stock[0]:.2f}\nP-Value: {adf_result_stock[1]:.2f}', 
                 transform=axes[1].transAxes, fontsize=14)

    # Plot for net_PL
    axes[2].plot(cumulative_pl['net_PL'], label="Net P&L")
    axes[2].plot(cumulative_pl['net_PL'].rolling(5).mean(), label="5-day MA")
    axes[2].plot(cumulative_pl['net_PL'].rolling(30).mean(), label="30-day MA")
    axes[2].plot(cumulative_pl['net_PL'].rolling(100).mean(), label="100-day MA")
    axes[2].set_title("Net P&L with Rolling Averages", fontsize=18)
    axes[2].legend(fontsize=14)

    # Perform and display ADF test results for net_PL
    adf_result_net = adfuller(cumulative_pl['net_PL'].dropna(), maxlag=1)
    axes[2].text(0.01, -0.2, f'ADF Test Statistic: {adf_result_net[0]:.2f}\nP-Value: {adf_result_net[1]:.2f}', 
                 transform=axes[2].transAxes, fontsize=14)

    plt.tight_layout()
    plt.show()

In [None]:
def plot_correlations(cumulative_pl):
    """
    Creates three subplots, each displaying the correlation between two specific variables:
    1. Gross option trades vs. gross stock trades
    2. Option position value vs. stock position value
    3. Option P&L vs. Stock P&L
    
    Inputs:
        cumulative_pl - DataFrame with required financial columns.
    """
    fig, axes = plt.subplots(3, 1, figsize=(8, 15))

    # Correlation and plot for gross_option_trades and gross_stock_trades
    correlation1 = cumulative_pl['gross_option_trades'].corr(cumulative_pl['gross_stock_trades'])
    axes[0].scatter(cumulative_pl['gross_option_trades'], cumulative_pl['gross_stock_trades'],
                    label=f'Correlation: {correlation1:.2f}', color='blue', alpha=0.7)
    axes[0].set_xlabel('Gross Option Trades')
    axes[0].set_ylabel('Gross Stock Trades')
    axes[0].set_title('Correlation Between Gross Option and Stock Trades')
    axes[0].legend()
    axes[0].grid(True)

    # Correlation and plot for option_pos_value and stock_pos_value
    correlation2 = cumulative_pl['option_pos_value'].corr(cumulative_pl['stock_pos_value'])
    axes[1].scatter(cumulative_pl['option_pos_value'], cumulative_pl['stock_pos_value'],
                    label=f'Correlation: {correlation2:.2f}', color='red', alpha=0.7)
    axes[1].set_xlabel('Option Position Value')
    axes[1].set_ylabel('Stock Position Value')
    axes[1].set_title('Correlation Between Option and Stock Position Values')
    axes[1].legend()
    axes[1].grid(True)

    # Correlation and plot for option_PL and stock_PL
    correlation3 = cumulative_pl['option_PL'].corr(cumulative_pl['stock_PL'])
    axes[2].scatter(cumulative_pl['option_PL'], cumulative_pl['stock_PL'],
                    label=f'Correlation: {correlation3:.2f}', color='green', alpha=0.7)
    axes[2].set_xlabel('Option P&L')
    axes[2].set_ylabel('Stock P&L')
    axes[2].set_title('Correlation Between Option and Stock P&L')
    axes[2].legend()
    axes[2].grid(True)

    plt.tight_layout()
    plt.show()

#### Load fff data as a df named fff_data with date set as index

In [None]:
def plot_correlation_matrices(cumulative_pl, fff_data):
    """
    Merges cumulative_pl DataFrame with fff_data DataFrame based on date indices,
    and creates three subplots showing correlation matrices between selected financial metrics
    from cumulative_pl and Fama-French factors from fff_data.

    Parameters:
        cumulative_pl (DataFrame): Financial metrics data.
        fff_data (DataFrame): Fama-French factors data.
    """
    combined_df = cumulative_pl.merge(fff_data, left_index=True, right_index=True, how='left')

    subset1 = combined_df[['gross_option_trades', 'option_pos_value', 'option_PL', 'Mkt-RF', 'SMB', 'HML']]
    subset2 = combined_df[['gross_stock_trades', 'stock_pos_value', 'stock_PL', 'Mkt-RF', 'SMB', 'HML']]
    subset3 = combined_df[['gross_trades_value', 'net_PL', 'Mkt-RF', 'SMB', 'HML']]

    fig, axes = plt.subplots(3, 1, figsize=(10, 24))

    # Correlation matrix for the first subset
    correlation_matrix1 = subset1.corr()
    sns.heatmap(correlation_matrix1, annot=True, fmt=".2f", cmap='coolwarm',
                square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=axes[0])
    axes[0].set_title('Correlation Matrix: Options Data vs Fama-French Factors')

    # Correlation matrix for the second subset
    correlation_matrix2 = subset2.corr()
    sns.heatmap(correlation_matrix2, annot=True, fmt=".2f", cmap='coolwarm',
                square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=axes[1])
    axes[1].set_title('Correlation Matrix: Stocks Data vs Fama-French Factors')

    # Correlation matrix for the third subset
    correlation_matrix3 = subset3.corr()
    sns.heatmap(correlation_matrix3, annot=True, fmt=".2f", cmap='coolwarm',
                square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=axes[2])
    axes[2].set_title('Correlation Matrix: Gross Trades & Net P&L vs Fama-French Factors')

    plt.tight_layout()
    plt.show()

#### Use the combined output df from plot_correlation_matrices function here onwards!

In [None]:
def run_regression_option_PL(combined_df):
    """
    Performs linear regression using the Fama-French three factors as independent variables
    and option_PL from the combined_df as the dependent variable.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes option_PL and the Fama-French factors.
    """
    X = combined_df[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    y = combined_df['option_PL']

    model = sm.OLS(y, X).fit()

    print(f'R-squared of the regression of option_PL on the Fama-French factors: {round(model.rsquared, 6)}.\n')
    print(model.summary())

In [None]:
def run_regression_stock_PL(combined_df):
    """
    Performs linear regression using the Fama-French three factors as independent variables
    and stock_PL from the combined_df as the dependent variable.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes stock_PL and the Fama-French factors.
    """
    X = combined_df[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    y = combined_df['stock_PL']

    model = sm.OLS(y, X).fit()

    print(f'R-squared of the regression of stock_PL on the Fama-French factors: {round(model.rsquared, 6)}.\n')
    print(model.summary())

In [None]:
def run_regression_net_PL(combined_df):
    """
    Performs linear regression using the Fama-French three factors as independent variables
    and net_PL from the combined_df as the dependent variable.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes net_PL and the Fama-French factors.
    """
    X = combined_df[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    y = combined_df['net_PL']

    model = sm.OLS(y, X).fit()

    print(f'R-squared of the regression of net_PL on the Fama-French factors: {round(model.rsquared, 6)}.\n')
    print(model.summary())

In [5]:
def plot_rolling_volatility(combined_df, window_size=30):
    """
    Calculates and plots the rolling standard deviation of returns (volatility)
    from the net profit and loss (net_PL) of a trading strategy.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes the net_PL column.
        window_size (int): The window size for calculating rolling volatility (default is 30 days).
    """
    if 'net_PL' not in combined_df.columns:
        raise ValueError("DataFrame must contain a 'net_PL' column")

    combined_df['daily_returns'] = combined_df['net_PL'].pct_change()

    combined_df['rolling_volatility'] = combined_df['daily_returns'].rolling(window=window_size).std()

    plt.figure(figsize=(14, 7))
    plt.plot(combined_df.index, combined_df['rolling_volatility'], label=f'{window_size}-Day Rolling Volatility')
    plt.title(f'Volatility Clustering: {window_size}-Day Rolling Volatility of Returns')
    plt.xlabel('Date')
    plt.ylabel('Rolling Volatility')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def plot_pnl_distribution(combined_df):
    """
    Creates histograms and density plots for the distribution of daily and monthly P&L 
    from the trading strategy, based on the net_PL column of the combined_df DataFrame.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes the net_PL column.
    """
    if 'net_PL' not in combined_df.columns:
        raise ValueError("DataFrame must contain a 'net_PL' column")

    combined_df['daily_pnl'] = combined_df['net_PL'].diff()

    combined_df['monthly_pnl'] = combined_df['net_PL'].resample('M').last().diff()

    fig, axes = plt.subplots(2, 2, figsize=(14, 10), constrained_layout=True)
    fig.suptitle('Profit and Loss Distribution')

    sns.histplot(combined_df['daily_pnl'].dropna(), bins=50, kde=True, ax=axes[0, 0])
    axes[0, 0].set_title('Daily P&L Distribution')
    axes[0, 0].set_xlabel('Daily P&L')
    axes[0, 0].set_ylabel('Frequency')

    sns.kdeplot(combined_df['daily_pnl'].dropna(), ax=axes[0, 1], fill=True)
    axes[0, 1].set_title('Daily P&L Density')
    axes[0, 1].set_xlabel('Daily P&L')
    axes[0, 1].set_ylabel('Density')

    sns.histplot(combined_df['monthly_pnl'].dropna(), bins=50, kde=True, ax=axes[1, 0])
    axes[1, 0].set_title('Monthly P&L Distribution')
    axes[1, 0].set_xlabel('Monthly P&L')
    axes[1, 0].set_ylabel('Frequency')

    sns.kdeplot(combined_df['monthly_pnl'].dropna(), ax=axes[1, 1], fill=True)
    axes[1, 1].set_title('Monthly P&L Density')
    axes[1, 1].set_xlabel('Monthly P&L')
    axes[1, 1].set_ylabel('Density')

    plt.show()

In [None]:
def plot_trade_activity_comparison(combined_df, frequency='D'):
    """
    Creates bar charts to compare 'gross_option_trades' and 'gross_stock_trades' based on the specified frequency.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes 'gross_option_trades' and 'gross_stock_trades' columns.
        frequency (str): Frequency for resampling data. 'D' for daily, 'W' for weekly.
    """
    resampled_data = combined_df[['gross_option_trades', 'gross_stock_trades']].resample(frequency).sum()

    resampled_data.plot(kind='bar', figsize=(14, 7), width=0.8)
    plt.title(f'Comparison of Trading Activity (Frequency: {frequency})')
    plt.xlabel('Date')
    plt.ylabel('Volume of Trades')
    plt.legend(['Gross Option Trades', 'Gross Stock Trades'])
    plt.grid(axis='y', linestyle='--')

    plt.xticks(rotation=45)

    plt.show()

In [None]:
def plot_pl_boxplots(combined_df):
    """
    Creates box plots to visualize the distribution, median, and outliers of 'option_PL', 
    'stock_PL', and 'net_PL' from the combined_df DataFrame.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes 'option_PL', 'stock_PL', and 'net_PL' columns.
    """
    plt.figure(figsize=(10, 6))

    pl_data = combined_df[['option_PL', 'stock_PL', 'net_PL']]
    pl_data_melted = pl_data.melt(var_name='Type', value_name='P&L')

    sns.boxplot(x='Type', y='P&L', data=pl_data_melted)
    
    plt.title('Distribution of P&L for Options, Stocks, and Net')
    plt.xlabel('P&L Type')
    plt.ylabel('Profit and Loss')
    plt.grid(axis='y', linestyle='--')

    plt.show()

In [None]:
def plot_cash_flow_timeline(combined_df):
    """
    Creates a line plot to visualize 'start_cash' and 'end_cash' over time from the combined_df DataFrame
    to assess liquidity changes.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes 'start_cash' and 'end_cash' columns.
    """
    if 'start_cash' not in combined_df.columns or 'end_cash' not in combined_df.columns:
        raise ValueError("DataFrame must contain both 'start_cash' and 'end_cash' columns")

    plt.figure(figsize=(14, 7))

    plt.plot(combined_df.index, combined_df['start_cash'], label='Start Cash', marker='o')
    plt.plot(combined_df.index, combined_df['end_cash'], label='End Cash', marker='x')

    plt.title('Cash Flow Timeline: Start vs End Cash')
    plt.xlabel('Date')
    plt.ylabel('Cash Amount')
    plt.legend()
    plt.grid(True)

    plt.show()

In [None]:
def calculate_profit_factor_analysis(combined_df):
    """
    Calculates and analyzes the profit factor (total gains / total losses) for both options and stocks
    from the combined_df DataFrame.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes 'option_PL' and 'stock_PL' columns.
    """
    if 'option_PL' not in combined_df.columns or 'stock_PL' not in combined_df.columns:
        raise ValueError("DataFrame must contain both 'option_PL' and 'stock_PL' columns")

    # Calculate total gains and total losses for options
    option_gains = combined_df[combined_df['option_PL'] > 0]['option_PL'].sum()
    option_losses = combined_df[combined_df['option_PL'] < 0]['option_PL'].sum()

    # Avoid division by zero and negative values for losses
    if option_losses == 0:
        option_profit_factor = 'Infinity'  # No losses
    else:
        option_profit_factor = option_gains / abs(option_losses)

    # Calculate total gains and total losses for stocks
    stock_gains = combined_df[combined_df['stock_PL'] > 0]['stock_PL'].sum()
    stock_losses = combined_df[combined_df['stock_PL'] < 0]['stock_PL'].sum()

    # Avoid division by zero and negative values for losses
    if stock_losses == 0:
        stock_profit_factor = 'Infinity'  # No losses
    else:
        stock_profit_factor = stock_gains / abs(stock_losses)

    print(f"Option Profit Factor: {option_profit_factor}")
    print(f"Stock Profit Factor: {stock_profit_factor}")

    return option_profit_factor, stock_profit_factor

In [None]:
def plot_pareto_chart(combined_df):
    """
    Creates a Pareto chart to identify the most significant 'gross_trades_value' segments contributing to 'net_PL'.

    Parameters:
        combined_df (DataFrame): A DataFrame that includes 'net_PL' and 'gross_trades_value' columns.
    """
    if 'net_PL' not in combined_df.columns or 'gross_trades_value' not in combined_df.columns:
        raise ValueError("DataFrame must contain 'net_PL' and 'gross_trades_value' columns")

    # Aggregate net profit and loss by 'gross_trades_value'
    value_impact = combined_df.groupby('gross_trades_value')['net_PL'].sum().sort_values(ascending=False)
    value_impact = value_impact.reset_index()
    value_impact['cumulative_percentage'] = value_impact['net_PL'].cumsum() / value_impact['net_PL'].sum() * 100

    # Create the bar plot for 'gross_trades_value' segments
    fig, ax = plt.subplots()
    ax.bar(value_impact['gross_trades_value'], value_impact['net_PL'], color='C0')

    # Create the line plot for cumulative percentage
    ax2 = ax.twinx()
    ax2.plot(value_impact['gross_trades_value'], value_impact['cumulative_percentage'], color='C1', marker="D", ms=7)
    ax2.yaxis.set_major_formatter(PercentFormatter())

    ax.tick_params(axis='y', colors='C0')
    ax2.tick_params(axis='y', colors='C1')

    plt.title('Pareto Chart: Significant Gross Trade Values Contributing to Net PL')
    ax.set_xlabel('Gross Trades Value')
    ax.set_ylabel('Net PL')
    ax2.set_ylabel('Cumulative Percentage')

    # Rotate X-axis labels for better readability
    plt.xticks(rotation=45)

    plt.show()