In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.subplots as sp
import pandas as pd

In [None]:
def export_descriptive_stats_to_excel(df, excel_file_name):
    # Calculate descriptive statistics for all columns
    stats_df = df.describe(include='all')
    
    # Create a new Excel writer object
    writer = pd.ExcelWriter(excel_file_name, engine='openpyxl')
    
    # Write each column's statistics to a separate sheet
    for column in df.columns:
        sheet_name = column
        stats_df[column].to_excel(writer, sheet_name=sheet_name)
    
    # Save the Excel file
    writer.save()
    print(f"Descriptive statistics exported to {excel_file_name}")

In [None]:
# Step 1: Data Loading
df=pd.read_csv("../Data/FinalDF/FinalDF.csv", encoding="utf-8", sep="~")
# Keep only the dates after 2019-01-01
df = df[df['Date'] >= '2019-01-01']

In [None]:
# Calculate overall Sentiment for more clear ploting

df['PWD Tickers Overall Sentiment'] = df['PWD Tickers Sentiment Positive'] - df['PWD Tickers Sentiment Negative']
df['PWD Ceos Overall Sentiment'] = df['PWD Ceos Sentiment Positive'] - df['PWD Ceos Sentiment Negative']

df['PWD Tickers Overall Signal'] = df['PWD Tickers Signal Bullish'] - df['PWD Tickers Signal Bearish']
df['PWD Ceos Overall Signal'] = df['PWD Ceos Signal Bullish'] - df['PWD Ceos Signal Bearish']

In [None]:
# Keep only the plotting cols

ticker_cols=['Company','Date','Price Change','PWD Tickers Overall Sentiment', 'PWD Tickers Overall Signal']

ceo_cols=['Company', 'Date','Price Change','PWD Ceos Overall Sentiment', 'PWD Ceos Overall Signal']

plot_cols=ticker_cols+ceo_cols

# Separate the DFs
df_ceos=df[ceo_cols]
df_tickers=df[ticker_cols]

In [None]:
def plot_columns_per_group(dataset, columns_to_plot, mode):
    # Group the DataFrame by the 'Company' column
    grouped = dataset.groupby('Company')

    # Iterate through each group
    for company, group_data in grouped:
        # Exclude 'Date' and 'Company' columns
        subplot_cols = [col for col in columns_to_plot if col not in ['Date', 'Company']]
        
        # Create subplots for each ticker
        num_subplots = len(subplot_cols)
        fig = sp.make_subplots(rows=num_subplots, cols=1, shared_xaxes=True, subplot_titles=subplot_cols)
     
        for idx, col in enumerate(subplot_cols):
            subplot_data = group_data[['Date', col]]
            subplot_title = col.replace('PWD Tickers ', '')
    
            # Add a subplot trace
            trace = go.Scatter(x=subplot_data['Date'], y=subplot_data[col], mode='lines', name=subplot_title)
            fig.add_trace(trace, row=idx + 1, col=1)
    
        # Update subplot layout
        fig.update_xaxes(title_text='Date', row=num_subplots, col=1)
        fig.update_yaxes(title_text='Value', row=1, col=1)
    
        # Update figure layout
        fig.update_layout(title=f'Analysis for {company}', showlegend=False)

        # Save the plots to the company's folder
        plot_filename = f"../EDA/{company}_{mode}_analysis_plot.png"
        plt.savefig(plot_filename)
        plt.close()
        
        # Show the plot
        fig.show()


In [None]:
plot_columns_per_group(df_ceos, ceo_cols, "ceos")

In [None]:
plot_columns_per_group(df_tickers, ticker_cols, "tickers")

In [None]:
descriptive_df=df[['Company','Date','Price Change','PWD Tickers Overall Sentiment', 'PWD Tickers Sentiment Positive',
                   'PWD Tickers Sentiment Negative', 'PWD Ceos Overall Sentiment', 'PWD Ceos Sentiment Positive', 'PWD Ceos Sentiment Negative' ]]

# Get unique company names from the 'company' column
unique_companies = descriptive_df['Company'].unique()

# Create an empty dictionary to store DataFrames
company_dataframes = {}

# Loop through unique companies and create DataFrames
for company in unique_companies:
    # Filter the original DataFrame for the current company
    company_df = descriptive_df[descriptive_df['Company'] == company]
    
    # Store the filtered DataFrame in the dictionary
    company_dataframes[company] = company_df

In [None]:
for company in company_dataframes.keys():

    filename = f"../EDA/{company}_eda_stats.xlsx"
    export_descriptive_stats_to_excel(company_dataframes[company], filename)

In [None]:
for company in company_dataframes.keys():
    # Calculate ACF and PACF
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    acf_plot = plot_acf(company_dataframes[company]['Price Change'], lags=40, ax=ax1, title=f'Price Change ACF - {company}')
    pacf_plot = plot_pacf(company_dataframes[company]['Price Change'], lags=40, ax=ax2, title=f'Price Change PACF - {company}')
    
    # Save the plots to the company's folder
    plot_filename = f"../EDA/Price Change_ACF_PCAF_{company}.png"
    plt.savefig(plot_filename)
    plt.close()