In [41]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats


In [42]:
df = pd.read_csv('./Data/BankSector_ohlcv.csv')


In [43]:
df.info()

df.describe()

df.shape


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176092 entries, 0 to 176091
Data columns (total 14 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   ticker       176092 non-null  object 
 1   date         176092 non-null  object 
 2   close        176092 non-null  float64
 3   high         176092 non-null  float64
 4   low          176092 non-null  float64
 5   open         176092 non-null  float64
 6   volume       176092 non-null  int64  
 7   adjClose     176092 non-null  float64
 8   adjHigh      176092 non-null  float64
 9   adjLow       176092 non-null  float64
 10  adjOpen      176092 non-null  float64
 11  adjVolume    176092 non-null  int64  
 12  divCash      176092 non-null  float64
 13  splitFactor  176092 non-null  float64
dtypes: float64(10), int64(2), object(2)
memory usage: 18.8+ MB


(176092, 14)

In [44]:
def anova_test(data, group_column, return_column):
    groups = [group for _, group in data.groupby(group_column)[return_column]]
    f_value, p_value = stats.f_oneway(*groups)
    return f_value, p_value

def plot_time_effect(avg_returns, f_value, p_value, x_labels, title_prefix, effect_type, output_dir, output_prefix):
    plt.figure(figsize=(12, 7))
    colors = plt.cm.viridis(np.linspace(0, 1, len(x_labels)))
    ax = avg_returns.plot(kind='bar', color=colors, edgecolor='black', width=0.7)
    
    plt.title(f'{title_prefix} Average {"Day" if effect_type == "weekday" else "Month"} Return by {effect_type.capitalize()}', fontsize=16, fontweight='bold')
    plt.xlabel(effect_type.capitalize(), fontsize=12)
    plt.ylabel(f'Average {"Day" if effect_type == "weekday" else "Month"} Return (%)', fontsize=12)
    plt.xticks(range(len(x_labels)), x_labels, rotation=45 if effect_type == 'month' else 0, fontsize=10)
    plt.yticks(fontsize=10)
    
    for i, v in enumerate(avg_returns):
        if v >= 0:
            va = 'bottom'
            y = v + 0.01
        else:
            va = 'top'
            y = v - 0.01
        ax.text(i, y, f'{v:.2f}%', ha='center', va=va, fontsize=9, rotation=0)
    
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    ax.set_facecolor('#f0f0f0')
    plt.subplots_adjust(bottom=0.2)
    
    plt.figtext(0.5, 0.02, f'ANOVA Results: F-value = {f_value:.4f}, p-value = {p_value:.4f}', 
                ha='center', fontsize=10, bbox=dict(boxstyle='round', facecolor='white', edgecolor='gray', alpha=0.8))
    
    y_min, y_max = plt.ylim()
    plt.ylim(y_min - 0.025, y_max + 0.025)
    
    plt.savefig(os.path.join(output_dir, f'{output_prefix}{effect_type}_effect.png'), bbox_inches='tight', dpi=300)
    plt.close()

def analyze_weekday_effect(df, output_dir='./', ticker=None):
    df = df.copy()
    df['day_return'] = df.groupby('ticker')['adjClose'].pct_change()
    df = df.dropna(subset=['day_return'])
    df['weekday'] = pd.to_datetime(df['date']).dt.dayofweek

    df_filtered = df[df['ticker'] == ticker] if ticker else df
    title_prefix = f'{ticker}' if ticker else 'Sector'
    output_prefix = f'{ticker}_' if ticker else 'sector_'
    if ticker:
        output_dir = os.path.join(output_dir, 'weekday_effect')
    
    os.makedirs(output_dir, exist_ok=True)

    avg_returns = df_filtered.groupby('weekday')['day_return'].mean() * 100
    f_value, p_value = anova_test(df_filtered, 'weekday', 'day_return')
    
    x_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    plot_time_effect(avg_returns, f_value, p_value, x_labels, title_prefix, 'weekday', output_dir, output_prefix)

def analyze_month_effect(df, output_dir='./', ticker=None):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df_monthly = df.groupby('ticker').resample('BME').last().drop('ticker', axis=1).reset_index()
    df_monthly['month_return'] = df_monthly.groupby('ticker')['adjClose'].pct_change()
    df_monthly = df_monthly.dropna(subset=['month_return'])
    df_monthly['month'] = df_monthly['date'].dt.month

    df_filtered = df_monthly[df_monthly['ticker'] == ticker] if ticker else df_monthly
    title_prefix = f'{ticker}' if ticker else 'Sector'
    output_prefix = f'{ticker}_' if ticker else 'sector_'
    if ticker:
        output_dir = os.path.join(output_dir, 'month_effect')
    
    os.makedirs(output_dir, exist_ok=True)

    avg_returns = df_filtered.groupby('month')['month_return'].mean() * 100
    f_value, p_value = anova_test(df_filtered, 'month', 'month_return')
    
    x_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    plot_time_effect(avg_returns, f_value, p_value, x_labels, title_prefix, 'month', output_dir, output_prefix)


In [45]:
output_dir = './Banks/stats/'
analyze_weekday_effect(df, output_dir)
analyze_month_effect(df, output_dir)

# Call the functions for individual banks
bulge_brackets = ['JPM', 'BAC', 'C', 'GS', 'MS', 'DB', 'UBS', 'BCS']
for ticker in bulge_brackets:
    analyze_weekday_effect(df, output_dir, ticker=ticker)
    analyze_month_effect(df, output_dir, ticker=ticker)
    

print(f"\nSector and individual bank weekday and month effect plots have been saved in the '{output_dir}' folder.")


Sector and individual bank weekday and month effect plots have been saved in the './Banks/stats/' folder.
