In [11]:
######### loading the packages
import pandas as pd
import os
# import re
# import numpy as np
# import matplotlib.pyplot as plt
# from adjustText import adjust_text
import seaborn as sns
# from scipy.interpolate import interp1d
# import textwrap
# import shutil
# import ast
# import matplotlib
# from collections import Counter
# import matplotlib.patches as mpatches
# import time
# from selenium import webdriver
# from selenium.webdriver.common.keys import Keys
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC
# from selenium.webdriver.chrome.options import Options
# from pandas.tseries.offsets import DateOffset
# import csv
# import base64
# from PIL import Image
# import pdfkit
# import fitz
# import pdfplumber
# from collections import defaultdict
# from IPython.display import display
# import matplotlib.dates as mdates
import ipywidgets as widgets

In [6]:
######### Set a global directory path
global_directory = "/Users/ruofeiguo/Desktop/PhD 2.0/Research/CEO_Irene" #### Change this to your local directory!!!
data_directory = os.path.join(global_directory, 'data')
graph_directory = os.path.join(global_directory, 'graph')

# Create the folder if it doesn't exist
os.makedirs(data_directory, exist_ok=True)
os.makedirs(graph_directory, exist_ok=True)
# Now you can use it globally
os.chdir(global_directory)  # Change the current working directory to this path

In [12]:
######### Create input widgets
CIK_widget = widgets.IntText(value=55067, description="CIK:")
participant_widget = widgets.IntText(value=58579, description="Participant ID:")
run_button = widgets.Button(description="Generate Timeline")

# This output area will display the DataFrame and the plot
output_area = widgets.Output()

def plot_compensation_timeline(CIK, participantid):
    """
    Reads the vest CSV files, filters on CIK & participantid,
    displays the combined DataFrame, and plots the timeline.
    """

    # --- Read CSVs ---
    df_abs = pd.read_csv(os.path.join(data_directory, 'GpbaAbs_vest.csv'))
    df_rel = pd.read_csv(os.path.join(data_directory, 'GpbaRel_vest.csv'))

    # --- Filter the DataFrames ---
    df_abs_filtered = df_abs[(df_abs['CIK'] == CIK) & (df_abs['participantid'] == participantid)]
    df_rel_filtered = df_rel[(df_rel['CIK'] == CIK) & (df_rel['participantid'] == participantid)]

    if df_abs_filtered.empty and df_rel_filtered.empty:
        print(f"No data found for CIK={CIK}, participant={participantid}.")
        return  # nothing else to plot
    
    # --- Select columns and label abs/rel ---
    columns_to_display = [
        'CIK', 'companyName', 'metric', 'fiscalYear', 
        'grantDate', 'vestLow', 'vestHigh', 'startDate', 'endDate', 'startYear'
    ]
    df_abs_filtered = df_abs_filtered[columns_to_display].copy()
    df_rel_filtered = df_rel_filtered[columns_to_display].copy()

    df_abs_filtered['absRel'] = 'abs'
    df_rel_filtered['absRel'] = 'rel'

    df_combined = pd.concat([df_abs_filtered, df_rel_filtered], ignore_index=True)
    df_combined = df_combined.sort_values(by=['metric', 'grantDate'], ascending=[True, True])

    # Display the combined DataFrame (in notebook)
    display(df_combined)

    # --- Convert date columns to datetime ---
    df_combined['grantDate'] = pd.to_datetime(df_combined['grantDate'], errors='coerce')
    df_combined['startDate'] = pd.to_datetime(df_combined['startDate'], errors='coerce')
    df_combined['endDate']   = pd.to_datetime(df_combined['endDate'], errors='coerce')

    # --- Sort metrics by frequency ---
    metric_counts = df_combined['metric'].value_counts(ascending=True)
    sorted_metrics = metric_counts.index.tolist()

    df_combined['metric'] = pd.Categorical(
        df_combined['metric'], 
        categories=sorted_metrics, 
        ordered=True
    )
    df_combined = df_combined.sort_values(by=['metric', 'grantDate'])

    # --- Set up color map, positions, etc. ---
    palette = sns.color_palette("colorblind", 10)
    color_map = {'abs': palette[0], 'rel': palette[1]}

    metric_positions = {}
    y_positions = []
    metric_labels = {}

    current_y = 0
    for metric in sorted_metrics:
        metric_rows = df_combined[df_combined['metric'] == metric]
        metric_positions[metric] = current_y
        row_count = len(metric_rows)
        for _ in range(row_count):
            y_positions.append(current_y)
            current_y += 1
        # Center label between these bars
        metric_labels[metric] = current_y - (row_count / 2)
        current_y += 1  # blank row between metrics

    df_combined['y_pos'] = y_positions

    # --- Plot ---
    fig, ax = plt.subplots(figsize=(12, 6))

    legend_labels = {
        'absolute, performance period': False,
        'relative, performance period': False,
        'Grant Date': False
    }

    for _, row in df_combined.iterrows():
        y = row['y_pos']
        if row['absRel'] == 'abs':
            line_label = 'absolute, performance period'
        else:
            line_label = 'relative, performance period'

        # performance period bar
        if not legend_labels[line_label]:
            ax.plot([row['startDate'], row['endDate']], [y, y], 
                    color=color_map[row['absRel']], linewidth=4, 
                    label=line_label)
            legend_labels[line_label] = True
        else:
            ax.plot([row['startDate'], row['endDate']], [y, y],
                    color=color_map[row['absRel']], linewidth=4)

        # grant date marker
        if not legend_labels['Grant Date']:
            ax.scatter(row['grantDate'], y, color='black', marker='o', label='Grant Date')
            legend_labels['Grant Date'] = True
        else:
            ax.scatter(row['grantDate'], y, color='black', marker='o')

    # horizontal dashed lines to separate metrics
    for metric, pos in metric_positions.items():
        ax.axhline(y=pos - 1, color='gray', linestyle='dashed', alpha=0.3)

    ax.set_xlabel("Time")

    # Title (use the first non-null companyName)
    if df_combined['companyName'].dropna().empty:
        title = f"Compensation Timeline (CIK={CIK}, participant={participantid})"
    else:
        title = f"Compensation Timeline for {df_combined['companyName'].iloc[0]}"
    ax.set_title(title)

    # y-ticks
    ax.set_yticks(list(metric_labels.values()))
    ax.set_yticklabels(list(metric_labels.keys()))

    # Format x-axis as Year-Month
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_minor_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    plt.xticks(rotation=45)

    # Legend ordering
    handles, labels = ax.get_legend_handles_labels()
    desired_order = ['absolute, performance period', 'relative, performance period', 'Grant Date']
    ordered_handles = [handles[labels.index(lbl)] for lbl in desired_order if lbl in labels]
    plt.legend(ordered_handles, desired_order, loc='upper left')

    plt.grid(axis='x', linestyle='--', alpha=0.6)
    plt.tight_layout()

    # Optionally save figure
    # This line saves it under timeline/companyName.jpg if you want
    # If there's a valid companyName, otherwise fallback:
    safe_name = title.replace(" ", "_").replace("/", "_")
    os.makedirs(os.path.join(graph_directory, "timeline"), exist_ok=True)
    plot_path = os.path.join(graph_directory, f"timeline/{safe_name}.jpg")
    plt.savefig(plot_path)

    print(f"Plot saved to: {plot_path}")
    plt.show()


def on_run_button_click(b):
    """
    Clears the output area, then runs the plotting function.
    """
    with output_area:
        output_area.clear_output()
        CIK_val = CIK_widget.value
        participant_val = participant_widget.value
        plot_compensation_timeline(CIK_val, participant_val)

run_button.on_click(on_run_button_click)

# Display the widgets & output in the notebook
widgets.VBox([
    widgets.HBox([CIK_widget, participant_widget]),
    run_button,
    output_area
])


VBox(children=(HBox(children=(IntText(value=55067, description='CIK:'), IntText(value=58579, description='Part…