# Data Science Assignment - Web3 Trading Team

## Analysis of Trader Behavior vs Market Sentiment

This notebook analyzes the relationship between trading behavior and market sentiment using:
1. **Bitcoin Market Sentiment Dataset** - Fear/Greed Index
2. **Historical Trader Data from Hyperliquid** - Trading transactions

### Objective
Analyze how trading behavior (profitability, risk, volume, leverage) aligns or diverges from overall market sentiment (fear vs greed). Identify hidden trends or signals that could influence smarter trading strategies.

## 1. Setup and Imports

In [None]:
# Install required packages
!pip install pandas numpy matplotlib seaborn plotly scikit-learn

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## 2. Data Loading and Initial Exploration

In [None]:
# Load the datasets
# Note: Upload the CSV files to Colab first

# Load fear/greed index data
sentiment_df = pd.read_csv('fear_greed_index.csv')
print("Fear/Greed Index Data Shape:", sentiment_df.shape)
print("\nFirst few rows:")
display(sentiment_df.head())

# Load historical trading data
trading_df = pd.read_csv('historical_data.csv')
print("\nTrading Data Shape:", trading_df.shape)
print("\nFirst few rows:")
display(trading_df.head())

In [None]:
# Basic information about the datasets
print("=== FEAR/GREED INDEX DATA INFO ===")
print(sentiment_df.info())
print("\n=== TRADING DATA INFO ===")
print(trading_df.info())

In [None]:
# Check for missing values
print("=== MISSING VALUES IN FEAR/GREED DATA ===")
print(sentiment_df.isnull().sum())

print("\n=== MISSING VALUES IN TRADING DATA ===")
print(trading_df.isnull().sum())

## 3. Data Preprocessing

In [None]:
# Convert timestamps to datetime
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
sentiment_df['timestamp'] = pd.to_datetime(sentiment_df['timestamp'], unit='s')

# Convert trading data timestamps - FIXED FORMAT
trading_df['Timestamp IST'] = pd.to_datetime(trading_df['Timestamp IST'], format='%d-%m-%Y %H:%M')

# Handle the second timestamp column more carefully
try:
    trading_df['Timestamp'] = pd.to_datetime(trading_df['Timestamp'], unit='s')
except:
    # If seconds conversion fails, try milliseconds
    try:
        trading_df['Timestamp'] = pd.to_datetime(trading_df['Timestamp'], unit='ms')
    except:
        # If that fails too, just use the IST timestamp
        trading_df['Timestamp'] = trading_df['Timestamp IST']

print("Date ranges:")
print(f"Sentiment data: {sentiment_df['date'].min()} to {sentiment_df['date'].max()}")
print(f"Trading data: {trading_df['Timestamp IST'].min()} to {trading_df['Timestamp IST'].max()}")

In [None]:
# Clean and prepare trading data
# Convert numeric columns
numeric_columns = ['Execution Price', 'Size Tokens', 'Size USD', 'Start Position', 'Closed PnL', 'Fee']

for col in numeric_columns:
    if col in trading_df.columns:
        trading_df[col] = pd.to_numeric(trading_df[col], errors='coerce')

# Create additional trading metrics
trading_df['Trade_Value'] = trading_df['Size USD']
trading_df['Is_Buy'] = (trading_df['Side'] == 'BUY').astype(int)
trading_df['Is_Sell'] = (trading_df['Side'] == 'SELL').astype(int)

# Calculate daily trading metrics
daily_trading = trading_df.groupby(trading_df['Timestamp IST'].dt.date).agg({
    'Trade_Value': ['sum', 'mean', 'count'],
    'Closed PnL': ['sum', 'mean'],
    'Is_Buy': 'sum',
    'Is_Sell': 'sum',
    'Fee': 'sum'
}).round(2)

daily_trading.columns = ['Total_Volume', 'Avg_Trade_Size', 'Trade_Count', 'Total_PnL', 'Avg_PnL', 'Buy_Count', 'Sell_Count', 'Total_Fees']
daily_trading = daily_trading.reset_index()
daily_trading['Date'] = pd.to_datetime(daily_trading['Timestamp IST'])

print("Daily trading metrics shape:", daily_trading.shape)
display(daily_trading.head())

## 4. Exploratory Data Analysis

In [None]:
# Analyze sentiment distribution
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
sentiment_counts = sentiment_df['classification'].value_counts()
plt.pie(sentiment_counts.values, labels=sentiment_counts.index, autopct='%1.1f%%')
plt.title('Distribution of Market Sentiment')

plt.subplot(1, 2, 2)
plt.hist(sentiment_df['value'], bins=30, alpha=0.7, color='skyblue')
plt.xlabel('Fear/Greed Index Value')
plt.ylabel('Frequency')
plt.title('Distribution of Fear/Greed Index Values')

plt.tight_layout()
plt.savefig('outputs/sentiment_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Analyze trading behavior
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
plt.hist(daily_trading['Total_Volume'], bins=30, alpha=0.7, color='green')
plt.xlabel('Daily Trading Volume (USD)')
plt.ylabel('Frequency')
plt.title('Distribution of Daily Trading Volume')

plt.subplot(2, 2, 2)
plt.hist(daily_trading['Total_PnL'], bins=30, alpha=0.7, color='orange')
plt.xlabel('Daily PnL (USD)')
plt.ylabel('Frequency')
plt.title('Distribution of Daily PnL')

plt.subplot(2, 2, 3)
plt.scatter(daily_trading['Buy_Count'], daily_trading['Sell_Count'], alpha=0.6)
plt.xlabel('Buy Count')
plt.ylabel('Sell Count')
plt.title('Buy vs Sell Count Relationship')

plt.subplot(2, 2, 4)
plt.scatter(daily_trading['Total_Volume'], daily_trading['Total_PnL'], alpha=0.6)
plt.xlabel('Total Volume')
plt.ylabel('Total PnL')
plt.title('Volume vs PnL Relationship')

plt.tight_layout()
plt.savefig('outputs/trading_behavior_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Sentiment Analysis Over Time

In [None]:
# Plot sentiment over time
plt.figure(figsize=(15, 8))

# Create color mapping for sentiment
sentiment_colors = {
    'Extreme Fear': 'darkred',
    'Fear': 'red',
    'Neutral': 'yellow',
    'Greed': 'green',
    'Extreme Greed': 'darkgreen'
}

plt.scatter(sentiment_df['date'], sentiment_df['value'], 
           c=sentiment_df['classification'].map(sentiment_colors), 
           alpha=0.6, s=20)
plt.xlabel('Date')
plt.ylabel('Fear/Greed Index Value')
plt.title('Bitcoin Market Sentiment Over Time')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

# Add legend
legend_elements = [plt.Line2D([0], [0], marker='o', color='w', 
                              markerfacecolor=color, markersize=8, label=label)
                  for label, color in sentiment_colors.items()]
plt.legend(handles=legend_elements, loc='upper right')

plt.tight_layout()
plt.savefig('outputs/sentiment_over_time.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Merge Sentiment with Trading Data

In [None]:
# Merge sentiment data with daily trading data
sentiment_daily = sentiment_df.groupby(sentiment_df['date'].dt.date).agg({
    'value': 'mean',
    'classification': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Neutral'
}).reset_index()
sentiment_daily['Date'] = pd.to_datetime(sentiment_daily['date'])

# Merge datasets
merged_data = pd.merge(daily_trading, sentiment_daily, on='Date', how='inner')

print("Merged dataset shape:", merged_data.shape)
print("\nSample of merged data:")
display(merged_data.head())

# Save merged data
merged_data.to_csv('csv_files/merged_sentiment_trading.csv', index=False)

## 7. Trading Behavior by Sentiment

In [None]:
# Analyze trading metrics by sentiment
sentiment_analysis = merged_data.groupby('classification').agg({
    'Total_Volume': ['mean', 'std', 'count'],
    'Total_PnL': ['mean', 'std'],
    'Trade_Count': ['mean', 'std'],
    'Buy_Count': 'mean',
    'Sell_Count': 'mean',
    'Total_Fees': 'mean'
}).round(2)

print("Trading Behavior by Market Sentiment:")
display(sentiment_analysis)

In [None]:
# Visualize trading behavior by sentiment
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Volume by sentiment
sentiment_volume = merged_data.groupby('classification')['Total_Volume'].mean().sort_values(ascending=False)
axes[0, 0].bar(sentiment_volume.index, sentiment_volume.values, color=['red', 'orange', 'yellow', 'lightgreen', 'green'])
axes[0, 0].set_title('Average Daily Trading Volume by Sentiment')
axes[0, 0].set_ylabel('Volume (USD)')
axes[0, 0].tick_params(axis='x', rotation=45)

# PnL by sentiment
sentiment_pnl = merged_data.groupby('classification')['Total_PnL'].mean().sort_values(ascending=False)
axes[0, 1].bar(sentiment_pnl.index, sentiment_pnl.values, color=['red', 'orange', 'yellow', 'lightgreen', 'green'])
axes[0, 1].set_title('Average Daily PnL by Sentiment')
axes[0, 1].set_ylabel('PnL (USD)')
axes[0, 1].tick_params(axis='x', rotation=45)

# Trade count by sentiment
sentiment_trades = merged_data.groupby('classification')['Trade_Count'].mean().sort_values(ascending=False)
axes[1, 0].bar(sentiment_trades.index, sentiment_trades.values, color=['red', 'orange', 'yellow', 'lightgreen', 'green'])
axes[1, 0].set_title('Average Daily Trade Count by Sentiment')
axes[1, 0].set_ylabel('Number of Trades')
axes[1, 0].tick_params(axis='x', rotation=45)

# Buy/Sell ratio by sentiment
merged_data['Buy_Sell_Ratio'] = merged_data['Buy_Count'] / (merged_data['Buy_Count'] + merged_data['Sell_Count'])
sentiment_ratio = merged_data.groupby('classification')['Buy_Sell_Ratio'].mean().sort_values(ascending=False)
axes[1, 1].bar(sentiment_ratio.index, sentiment_ratio.values, color=['red', 'orange', 'yellow', 'lightgreen', 'green'])
axes[1, 1].set_title('Buy/Sell Ratio by Sentiment')
axes[1, 1].set_ylabel('Buy Ratio')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('outputs/trading_by_sentiment.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. Correlation Analysis

In [None]:
# Calculate correlations
correlation_data = merged_data[['value', 'Total_Volume', 'Total_PnL', 'Trade_Count', 'Buy_Count', 'Sell_Count', 'Total_Fees']]
correlation_matrix = correlation_data.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Correlation Matrix: Sentiment vs Trading Metrics')
plt.tight_layout()
plt.savefig('outputs/correlation_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

## 9. Statistical Testing

In [None]:
from scipy import stats

# Perform statistical tests
print("=== STATISTICAL ANALYSIS ===\n")

# 1. ANOVA test for PnL across different sentiments
sentiment_groups = [merged_data[merged_data['classification'] == sent]['Total_PnL'] 
                   for sent in ['Extreme Fear', 'Fear', 'Neutral', 'Greed', 'Extreme Greed']]
f_stat, p_value = stats.f_oneway(*sentiment_groups)
print(f"ANOVA test for PnL across sentiments:")
print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significant difference: {'Yes' if p_value < 0.05 else 'No'}\n")

# 2. Correlation test between sentiment and volume
corr_coef, p_value = stats.pearsonr(merged_data['value'], merged_data['Total_Volume'])
print(f"Correlation between sentiment and volume:")
print(f"Correlation coefficient: {corr_coef:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significant correlation: {'Yes' if p_value < 0.05 else 'No'}\n")

# 3. Correlation test between sentiment and PnL
corr_coef, p_value = stats.pearsonr(merged_data['value'], merged_data['Total_PnL'])
print(f"Correlation between sentiment and PnL:")
print(f"Correlation coefficient: {corr_coef:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significant correlation: {'Yes' if p_value < 0.05 else 'No'}")

## 10. Key Insights and Recommendations

In [None]:
# Generate summary statistics
print("=== KEY INSIGHTS ===\n")

# Best performing sentiment
best_sentiment = merged_data.groupby('classification')['Total_PnL'].mean().idxmax()
best_pnl = merged_data.groupby('classification')['Total_PnL'].mean().max()
print(f"1. Best performing sentiment: {best_sentiment} (Avg PnL: ${best_pnl:.2f})")

# Highest volume sentiment
highest_volume_sentiment = merged_data.groupby('classification')['Total_Volume'].mean().idxmax()
highest_volume = merged_data.groupby('classification')['Total_Volume'].mean().max()
print(f"2. Highest trading volume: {highest_volume_sentiment} (Avg Volume: ${highest_volume:.2f})")

# Most volatile sentiment
most_volatile = merged_data.groupby('classification')['Total_PnL'].std().idxmax()
volatility = merged_data.groupby('classification')['Total_PnL'].std().max()
print(f"3. Most volatile sentiment: {most_volatile} (Std Dev: ${volatility:.2f})")

# Risk-adjusted returns
risk_adjusted = merged_data.groupby('classification').apply(
    lambda x: x['Total_PnL'].mean() / x['Total_PnL'].std() if x['Total_PnL'].std() > 0 else 0
).sort_values(ascending=False)
best_risk_adjusted = risk_adjusted.idxmax()
print(f"4. Best risk-adjusted returns: {best_risk_adjusted} (Sharpe-like ratio: {risk_adjusted.max():.3f})")

# Trading frequency by sentiment
trading_frequency = merged_data.groupby('classification')['Trade_Count'].mean().sort_values(ascending=False)
most_active = trading_frequency.idxmax()
print(f"5. Most active trading sentiment: {most_active} (Avg trades: {trading_frequency.max():.1f})")

print("\n=== RECOMMENDATIONS ===")
print("1. Consider sentiment-based position sizing")
print("2. Implement sentiment-aware risk management")
print("3. Monitor sentiment transitions for entry/exit signals")
print("4. Develop sentiment-based trading strategies")
print("5. Use sentiment data for portfolio diversification")

## 11. Save Results

In [None]:
# Save all results
print("Saving analysis results...")

# Save processed data
merged_data.to_csv('csv_files/final_analysis_data.csv', index=False)

# Save summary statistics
summary_stats = {
    'Total_Days_Analyzed': len(merged_data),
    'Date_Range': f"{merged_data['Date'].min()} to {merged_data['Date'].max()}",
    'Total_Trading_Volume': merged_data['Total_Volume'].sum(),
    'Total_PnL': merged_data['Total_PnL'].sum(),
    'Total_Trades': merged_data['Trade_Count'].sum(),
    'Sentiment_Categories': merged_data['classification'].nunique(),
    'Best_Performing_Sentiment': best_sentiment,
    'Highest_Volume_Sentiment': highest_volume_sentiment,
    'Most_Volatile_Sentiment': most_volatile,
    'Best_Risk_Adjusted_Sentiment': best_risk_adjusted
}

summary_df = pd.DataFrame(list(summary_stats.items()), columns=['Metric', 'Value'])
summary_df.to_csv('csv_files/analysis_summary.csv', index=False)

print("Analysis complete! Check the outputs/ directory for visualizations.")
print("\nFiles saved:")
print("- csv_files/final_analysis_data.csv")
print("- csv_files/analysis_summary.csv")
print("- outputs/ (various visualization files)")