In [None]:
grouped_by_product = df.groupby('Product Type')
df['Delay'] = (df['Target'] == 0).astype(int)
trader_delay_rates = df.groupby(['Product Type', 'Trader Name'])['Delay'].mean().reset_index()
mo_owner_delay_rates = df.groupby(['Product Type', 'Mo Owner Name'])['Delay'].mean().reset_index()


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")
    print(group.groupby('Trader Name')['Delay'].mean().sort_values(ascending=False))


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")
    print(group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False))


In [None]:
book_delay_rates = df.groupby(['Product Type', 'Trading Book'])['Delay'].mean().reset_index()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.barplot(data=trader_delay_rates, x='Trader Name', y='Delay', hue='Product Type')
plt.title('Trader Delay Rates by Product Type')
plt.show()

sns.barplot(data=mo_owner_delay_rates, x='Mo Owner Name', y='Delay', hue='Product Type')
plt.title('Mo Owner Delay Rates by Product Type')
plt.show()


In [None]:
from scipy.stats import f_oneway

# Example for ANOVA across traders within a product type
for product, group in grouped_by_product:
    trader_groups = [group['Delay'][group['Trader Name'] == trader] for trader in group['Trader Name'].unique()]
    f_stat, p_val = f_oneway(*trader_groups)
    print(f"Product Type: {product} - ANOVA F-statistic: {f_stat}, p-value: {p_val}")


In [None]:
for product, group in grouped_by_product:
    print(f"Product Type: {product}")

    # Get the mean delay per Mo Owner Name
    mean_delays = group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False)

    # Inverse transform the encoded Mo Owner Names
    original_mo_owner_names = label_encoders['Mo Owner Name'].inverse_transform(mean_delays.index)

    # Create a new DataFrame for better visualization
    result_df = pd.DataFrame({
        'Mo Owner Name': original_mo_owner_names,
        'Mean Delay': mean_delays.values
    })

    print(result_df)


In [None]:
import pandas as pd

# Create an empty list to collect all the result DataFrames
all_results = []

for product, group in grouped_by_product:
    # Get the mean delay per Mo Owner Name
    mean_delays = group.groupby('Mo Owner Name')['Delay'].mean().sort_values(ascending=False)

    # Inverse transform the encoded Mo Owner Names
    original_mo_owner_names = label_encoders['Mo Owner Name'].inverse_transform(mean_delays.index)

    # Create a new DataFrame for better visualization
    result_df = pd.DataFrame({
        'Product Type': product,
        'Mo Owner Name': original_mo_owner_names,
        'Trader': group['Trader Name'].unique(), # Assuming each group has a single trader
        'Mean Delay': mean_delays.values
    })

    # Append this result_df to the list
    all_results.append(result_df)

# Concatenate all the results into a single DataFrame
final_result_df = pd.concat(all_results, ignore_index=True)

# Save the final DataFrame to an Excel file
final_result_df.to_excel('trader_behavior_analysis.xlsx', index=False)

print("All results have been saved to 'trader_behavior_analysis.xlsx'")


In [None]:
import pandas as pd

# Assuming your DataFrame is named df and has columns: 'Mo Owner Name', 'Trading Book', 'Delay'

# Calculate Total Number of Books per Mo Owner
total_books = df.groupby('Mo Owner Name')['Trading Book'].nunique().reset_index(name='Total Number of Books')

# Calculate Delay Count per Mo Owner
delay_count = df[df['Delay'] == 0].groupby('Mo Owner Name').size().reset_index(name='Delay Count')

# Calculate Total Count of orders per Mo Owner
total_count = df.groupby('Mo Owner Name').size().reset_index(name='Total Count')

# Merge all the results together
result_df = pd.merge(total_books, delay_count, on='Mo Owner Name', how='left')
result_df = pd.merge(result_df, total_count, on='Mo Owner Name', how='left')

# Fill NaN values in 'Delay Count' with 0 (in case some Mo Owners have no delays)
result_df['Delay Count'] = result_df['Delay Count'].fillna(0)

# Calculate Delay %
result_df['Delay %'] = (result_df['Delay Count'] / result_df['Total Count']) * 100

# Display the result DataFrame
print(result_df)


F-Statistic:
What it measures: The F-statistic is a ratio that compares the variance between the groups (in this case, the variance in delays among different traders) to the variance within the groups (the variance in delays within each trader's delays).
Interpretation:
A higher F-statistic indicates that the variance between the traders' delays is much larger than the variance within each trader's delays, suggesting that at least one trader is significantly different from the others in terms of delays.
A lower F-statistic suggests that the differences between traders are not substantial compared to the variance within traders, meaning their performance may be similar.
p-Value:
What it measures: The p-value tells you the probability that the observed differences between traders' delays happened by chance.