In [None]:
grouped_by_product = df.groupby('Product Type')
df['Delay'] = (df['Target'] == 0).astype(int)
trader_delay_rates = df.groupby(['Product Type', 'Trader Name'])['Delay'].mean().reset_index()
mo_owner_delay_rates = df.groupby(['Product Type', 'Mo Owner Name'])['Delay'].mean().reset_index()


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")
    print(group.groupby('Trader Name')['Delay'].mean().sort_values(ascending=False))


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")
    print(group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False))


In [None]:
book_delay_rates = df.groupby(['Product Type', 'Trading Book'])['Delay'].mean().reset_index()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.barplot(data=trader_delay_rates, x='Trader Name', y='Delay', hue='Product Type')
plt.title('Trader Delay Rates by Product Type')
plt.show()

sns.barplot(data=mo_owner_delay_rates, x='Mo Owner Name', y='Delay', hue='Product Type')
plt.title('Mo Owner Delay Rates by Product Type')
plt.show()


In [None]:
from scipy.stats import f_oneway

# Example for ANOVA across traders within a product type
for product, group in grouped_by_product:
    trader_groups = [group['Delay'][group['Trader Name'] == trader] for trader in group['Trader Name'].unique()]
    f_stat, p_val = f_oneway(*trader_groups)
    print(f"Product Type: {product} - ANOVA F-statistic: {f_stat}, p-value: {p_val}")


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")

    # Get the mean delay per Mo Owner Name
    mean_delays = group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False)

    # Inverse transform the encoded Mo Owner Names
    original_mo_owner_names = label_encoders['Mo Owner Name'].inverse_transform(mean_delays.index)

    # Create a new DataFrame for better visualization
    result_df = pd.DataFrame({
        'Mo Owner Name': original_mo_owner_names,
        'Mean Delay': mean_delays.values
    })

    print(result_df)


In [None]:
import pandas as pd

# Create an empty list to collect all the result DataFrames
all_results = []

for product, group in grouped_by_product:
    # Get the mean delay per Mo Owner Name
    mean_delays = group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False)

    # Inverse transform the encoded Mo Owner Names
    original_mo_owner_names = label_encoders['Mo Owner Name'].inverse_transform(mean_delays.index)

    # Create a new DataFrame for better visualization
    result_df = pd.DataFrame({
        'Product Type': product,
        'Mo Owner Name': original_mo_owner_names,
        'Trader': group['Trader Name'].unique(), # Assuming each group has a single trader
        'Mean Delay': mean_delays.values
    })

    # Append this result_df to the list
    all_results.append(result_df)

# Concatenate all the results into a single DataFrame
final_result_df = pd.concat(all_results, ignore_index=True)

# Save the final DataFrame to an Excel file
final_result_df.to_excel('trader_behavior_analysis.xlsx', index=False)

print("All results have been saved to 'trader_behavior_analysis.xlsx'")
