In [1]:
# notebook_2.ipynb
# Advanced Analysis - Web3 Trading Team Data Science Assignment

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# --- 1. Load merged dataset ---
MERGED_DATA_PATH = 'csv_files/merged_trader_sentiment.csv'
merged_df = pd.read_csv(MERGED_DATA_PATH)

print('\n== merged_df columns ==\n', merged_df.columns.tolist())
print(merged_df.head())

# --- 2. Ensure outputs directory exists
os.makedirs('outputs', exist_ok=True)

# --- 3. Leverage Analysis ---
if 'leverage' in merged_df.columns and merged_df['Classification'].notna().sum() > 0:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x='Classification', y='leverage', data=merged_df)
    plt.title('Leverage Distribution by Sentiment')
    plt.savefig('outputs/leverage_dist_fear_greed.png')
    plt.show()
else:
    print("No leverage or classification data for leverage analysis plot.")

# --- 4. Trade Size Analysis ---
size_col = None
for candidate in ['Size', 'size', 'Size Tokens', 'Size USD']:
    if candidate in merged_df.columns:
        size_col = candidate
        break

if size_col and merged_df['Classification'].notna().sum() > 0:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x='Classification', y=size_col, data=merged_df)
    plt.title(f'Trade Size ({size_col}) by Sentiment')
    plt.savefig(f'outputs/{size_col}_dist_fear_greed.png')
    plt.show()
else:
    print("No trade size or classification data found for size analysis plot.")

# --- 5. High-Leverage Frequency Analysis ---
if 'leverage' in merged_df.columns and merged_df['Classification'].notna().sum() > 0:
    high_lev_threshold = 10  # Set threshold as per business context
    merged_df['high_leverage'] = merged_df['leverage'] > high_lev_threshold
    risk_table = merged_df.groupby('Classification')['high_leverage'].mean()
    print("\nFraction of high leverage trades during Fear/Greed:")
    print(risk_table)
    risk_table.plot(kind='bar', title='Fraction of High-Leverage Trades by Sentiment')
    plt.ylabel('Fraction')
    plt.savefig('outputs/high_lev_fraction_fear_greed.png')
    plt.show()
else:
    print("Cannot calculate high-leverage fraction without leverage/Classification data.")

# --- 6. Closed PnL Analysis ---
if 'Closed PnL' in merged_df.columns and merged_df['Classification'].notna().sum() > 0:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x='Classification', y='Closed PnL', data=merged_df)
    plt.title('PnL by Sentiment')
    plt.savefig('outputs/pnl_by_classification.png')
    plt.show()
else:
    print("No PnL or classification data for PnL analysis plot.")

# --- 7. Correlation Matrix ---
num_cols = [col for col in ['Closed PnL', 'leverage', 'Size', 'size', 'Size Tokens', 'Size USD'] if col in merged_df.columns]
if len(num_cols) > 1:
    print('Correlation Matrix:')
    print(merged_df[num_cols].corr())
else:
    print("Not enough numeric columns for correlation analysis.")

# --- 8. Save advanced analysis data if desired ---
# merged_df.to_csv('csv_files/advanced_analysis_results.csv', index=False)

# --- END OF notebook_2.ipynb ---



== merged_df columns ==
 ['Account', 'Coin', 'Execution Price', 'Size Tokens', 'Size USD', 'Side', 'Timestamp IST', 'Start Position', 'Direction', 'Closed PnL', 'Transaction Hash', 'Order ID', 'Crossed', 'Fee', 'Trade ID', 'Timestamp', 'date_only', 'Classification']
                                      Account  Coin  Execution Price  \
0  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9769   
1  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9800   
2  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9855   
3  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9874   
4  0xae5eacaf9c6b9111fd53034a602c192a04e082ed  @107           7.9894   

   Size Tokens  Size USD Side        Timestamp IST  Start Position Direction  \
0       986.87   7872.16  BUY  2024-02-12 22:50:00        0.000000       Buy   
1        16.00    127.68  BUY  2024-02-12 22:50:00      986.524596       Buy   
2       144.09   1150.63  BUY  2024-02-12 22:50:00     1002