# Sentiment Analysis: Fear vs Greed Trading Patterns

This notebook analyzes how Bitcoin market sentiment affects trader behavior and performance.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime

# Custom style
plt.style.use('seaborn-v0_8-darkgrid')
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#DDA0DD']

In [None]:
# Load processed data
df = pd.read_csv('../data/processed/merged_data.csv')
df['time'] = pd.to_datetime(df['time'])
df['date'] = pd.to_datetime(df['date'])

print(f"Total records: {len(df):,}")
print(f"Date range: {df['time'].min()} to {df['time'].max()}")
print(f"\nSentiment distribution:")
print(df['Classification'].value_counts())

## 1. Sentiment Overview and Timeline

In [None]:
# Create sentiment timeline
sentiment_timeline = df.groupby(['date', 'Classification']).size().reset_index(name='trades')
sentiment_timeline = sentiment_timeline.pivot(index='date', columns='Classification', values='trades').fillna(0)

fig, ax = plt.subplots(figsize=(15, 6))
sentiment_timeline.plot(kind='area', stacked=True, ax=ax, alpha=0.7, color=['#FF6B6B', '#4ECDC4'])
ax.set_title('Trading Activity by Market Sentiment Over Time', fontsize=16)
ax.set_xlabel('Date')
ax.set_ylabel('Number of Trades')
plt.legend(title='Market Sentiment')
plt.tight_layout()
plt.show()

## 2. Trading Behavior Analysis by Sentiment

In [None]:
# Analyze trading patterns by sentiment
sentiment_patterns = df.groupby('Classification').agg({
    'size': ['mean', 'median', 'std'],
    'leverage': ['mean', 'median', 'max'],
    'closedPnL': ['mean', 'median', 'sum', 'std'],
    'is_profitable': ['mean', 'count']
}).round(3)

sentiment_patterns.columns = ['_'.join(col).strip() for col in sentiment_patterns.columns.values]
print("Trading Patterns by Market Sentiment:")
sentiment_patterns

In [None]:
# Visualize key metrics comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Average position size
ax1 = axes[0, 0]
sentiment_data = df.groupby('Classification')['size'].mean()
bars1 = ax1.bar(sentiment_data.index, sentiment_data.values, color=['#FF6B6B', '#4ECDC4'])
ax1.set_title('Average Position Size by Sentiment', fontsize=14)
ax1.set_ylabel('Position Size')
for bar in bars1:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.2f}', ha='center', va='bottom')

# Win rate
ax2 = axes[0, 1]
win_rate = df.groupby('Classification')['is_profitable'].mean() * 100
bars2 = ax2.bar(win_rate.index, win_rate.values, color=['#FF6B6B', '#4ECDC4'])
ax2.set_title('Win Rate by Sentiment', fontsize=14)
ax2.set_ylabel('Win Rate (%)')
for bar in bars2:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.1f}%', ha='center', va='bottom')

# Average leverage
ax3 = axes[1, 0]
avg_leverage = df.groupby('Classification')['leverage'].mean()
bars3 = ax3.bar(avg_leverage.index, avg_leverage.values, color=['#FF6B6B', '#4ECDC4'])
ax3.set_title('Average Leverage by Sentiment', fontsize=14)
ax3.set_ylabel('Leverage')
for bar in bars3:
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.2f}x', ha='center', va='bottom')

# PnL distribution
ax4 = axes[1, 1]
for sentiment in df['Classification'].dropna().unique():
    data = df[df['Classification'] == sentiment]['closedPnL']
    ax4.hist(data, bins=50, alpha=0.6, label=sentiment, density=True)
ax4.set_title('PnL Distribution by Sentiment', fontsize=14)
ax4.set_xlabel('Closed PnL')
ax4.set_ylabel('Density')
ax4.legend()
ax4.set_xlim(-500, 500)

plt.tight_layout()
plt.savefig('../results/figures/sentiment_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 3. Statistical Analysis: Fear vs Greed Performance

In [None]:
# Prepare data for statistical tests
fear_data = df[df['Classification'] == 'Fear']['closedPnL'].dropna()
greed_data = df[df['Classification'] == 'Greed']['closedPnL'].dropna()

print(f"Fear trades: {len(fear_data):,}")
print(f"Greed trades: {len(greed_data):,}")

# Perform statistical tests
# 1. Normality test
_, fear_normal_p = stats.normaltest(fear_data)
_, greed_normal_p = stats.normaltest(greed_data)

print(f"\nNormality test p-values:")
print(f"Fear: {fear_normal_p:.4f} ({'Normal' if fear_normal_p > 0.05 else 'Not normal'})")
print(f"Greed: {greed_normal_p:.4f} ({'Normal' if greed_normal_p > 0.05 else 'Not normal'})")

# 2. T-test (if normal) or Mann-Whitney U test (if not normal)
if fear_normal_p > 0.05 and greed_normal_p > 0.05:
    stat, p_value = stats.ttest_ind(fear_data, greed_data)
    test_name = "T-test"
else:
    stat, p_value = stats.mannwhitneyu(fear_data, greed_data)
    test_name = "Mann-Whitney U test"

# 3. Effect size (Cohen's d)
cohens_d = (fear_data.mean() - greed_data.mean()) / np.sqrt((fear_data.std()**2 + greed_data.std()**2) / 2)

print(f"\n{test_name} Results:")
print(f"Statistic: {stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Cohen's d: {cohens_d:.4f}")
print(f"\nInterpretation: {'Statistically significant difference' if p_value < 0.05 else 'No significant difference'}")

In [None]:
# Create comprehensive comparison table
comparison_stats = pd.DataFrame({
    'Metric': ['Count', 'Mean PnL', 'Median PnL', 'Std Dev', 'Min PnL', 'Max PnL', 
               '25th Percentile', '75th Percentile', 'Positive Trades %'],
    'Fear': [
        len(fear_data),
        fear_data.mean(),
        fear_data.median(),
        fear_data.std(),
        fear_data.min(),
        fear_data.max(),
        fear_data.quantile(0.25),
        fear_data.quantile(0.75),
        (fear_data > 0).mean() * 100
    ],
    'Greed': [
        len(greed_data),
        greed_data.mean(),
        greed_data.median(),
        greed_data.std(),
        greed_data.min(),
        greed_data.max(),
        greed_data.quantile(0.25),
        greed_data.quantile(0.75),
        (greed_data > 0).mean() * 100
    ]
})

comparison_stats['Difference'] = comparison_stats['Greed'] - comparison_stats['Fear']
comparison_stats['% Change'] = ((comparison_stats['Greed'] - comparison_stats['Fear']) / comparison_stats['Fear'] * 100).round(2)

print("Comprehensive Fear vs Greed Comparison:")
comparison_stats

## 4. Time-based Sentiment Analysis

In [None]:
# Analyze trading patterns by hour for each sentiment
hourly_sentiment = df.groupby(['hour', 'Classification']).agg({
    'closedPnL': 'mean',
    'size': 'mean',
    'is_profitable': 'mean'
}).reset_index()

# Create interactive plot
fig = px.line(hourly_sentiment, x='hour', y='closedPnL', color='Classification',
              title='Average PnL by Hour of Day and Market Sentiment',
              labels={'closedPnL': 'Average Closed PnL', 'hour': 'Hour of Day'})
fig.update_layout(hovermode='x unified')
fig.show()

In [None]:
# Symbol performance by sentiment
symbol_sentiment = df.groupby(['symbol', 'Classification']).agg({
    'closedPnL': ['mean', 'sum', 'count'],
    'is_profitable': 'mean'
}).reset_index()

symbol_sentiment.columns = ['symbol', 'Classification', 'avg_pnl', 'total_pnl', 'trade_count', 'win_rate']

# Get top 10 most traded symbols
top_symbols = df['symbol'].value_counts().head(10).index
symbol_sentiment_top = symbol_sentiment[symbol_sentiment['symbol'].isin(top_symbols)]

# Create pivot table for heatmap
pivot_pnl = symbol_sentiment_top.pivot(index='symbol', columns='Classification', values='avg_pnl')

plt.figure(figsize=(10, 8))
sns.heatmap(pivot_pnl, annot=True, fmt='.2f', cmap='RdYlGn', center=0,
            cbar_kws={'label': 'Average PnL'})
plt.title('Average PnL by Symbol and Market Sentiment')
plt.tight_layout()
plt.show()

## 5. Risk Analysis by Sentiment

In [None]:
# Calculate risk metrics by sentiment
risk_metrics = df.groupby('Classification').agg({
    'closedPnL': [
        lambda x: x[x < 0].mean(),  # Average loss
        lambda x: x[x > 0].mean(),  # Average win
        lambda x: len(x[x < 0]) / len(x) * 100,  # Loss rate
        lambda x: x.std(),  # Volatility
        lambda x: x[x < 0].sum(),  # Total losses
        lambda x: x[x > 0].sum(),  # Total wins
    ],
    'leverage': ['mean', 'std', 'max']
})

risk_metrics.columns = ['avg_loss', 'avg_win', 'loss_rate', 'volatility', 
                       'total_losses', 'total_wins', 'avg_leverage', 
                       'leverage_std', 'max_leverage']

# Calculate additional risk metrics
risk_metrics['profit_factor'] = abs(risk_metrics['total_wins'] / risk_metrics['total_losses'])
risk_metrics['risk_reward_ratio'] = abs(risk_metrics['avg_win'] / risk_metrics['avg_loss'])
risk_metrics['sharpe_ratio'] = (df.groupby('Classification')['closedPnL'].mean() / 
                                df.groupby('Classification')['closedPnL'].std())

print("Risk Metrics by Market Sentiment:")
risk_metrics.round(3)

In [None]:
# Visualize risk metrics
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Risk-Reward Ratio
ax1 = axes[0, 0]
risk_metrics['risk_reward_ratio'].plot(kind='bar', ax=ax1, color=['#FF6B6B', '#4ECDC4'])
ax1.set_title('Risk-Reward Ratio by Sentiment', fontsize=14)
ax1.set_ylabel('Ratio')
ax1.axhline(y=1, color='black', linestyle='--', alpha=0.5)

# Profit Factor
ax2 = axes[0, 1]
risk_metrics['profit_factor'].plot(kind='bar', ax=ax2, color=['#FF6B6B', '#4ECDC4'])
ax2.set_title('Profit Factor by Sentiment', fontsize=14)
ax2.set_ylabel('Factor')
ax2.axhline(y=1, color='black', linestyle='--', alpha=0.5)

# Volatility
ax3 = axes[1, 0]
risk_metrics['volatility'].plot(kind='bar', ax=ax3, color=['#FF6B6B', '#4ECDC4'])
ax3.set_title('PnL Volatility by Sentiment', fontsize=14)
ax3.set_ylabel('Standard Deviation')

# Sharpe Ratio
ax4 = axes[1, 1]
risk_metrics['sharpe_ratio'].plot(kind='bar', ax=ax4, color=['#FF6B6B', '#4ECDC4'])
ax4.set_title('Sharpe Ratio by Sentiment', fontsize=14)
ax4.set_ylabel('Ratio')
ax4.axhline(y=0, color='black', linestyle='--', alpha=0.5)

for ax in axes.flat:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=0)

plt.tight_layout()
plt.savefig('../results/figures/risk_metrics_sentiment.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Key Insights and Recommendations

In [None]:
# Generate key insights
insights = {
    'sentiment_impact': {
        'fear_avg_pnl': fear_data.mean(),
        'greed_avg_pnl': greed_data.mean(),
        'difference': greed_data.mean() - fear_data.mean(),
        'statistical_significance': p_value < 0.05,
        'effect_size': cohens_d
    },
    'risk_comparison': {
        'fear_volatility': risk_metrics.loc['Fear', 'volatility'],
        'greed_volatility': risk_metrics.loc['Greed', 'volatility'],
        'fear_sharpe': risk_metrics.loc['Fear', 'sharpe_ratio'],
        'greed_sharpe': risk_metrics.loc['Greed', 'sharpe_ratio']
    },
    'trading_behavior': {
        'fear_avg_leverage': df[df['Classification'] == 'Fear']['leverage'].mean(),
        'greed_avg_leverage': df[df['Classification'] == 'Greed']['leverage'].mean(),
        'fear_avg_size': df[df['Classification'] == 'Fear']['size'].mean(),
        'greed_avg_size': df[df['Classification'] == 'Greed']['size'].mean()
    }
}

print("\n=== KEY INSIGHTS ===")
print(f"\n1. SENTIMENT IMPACT:")
print(f"   - Fear periods: ${insights['sentiment_impact']['fear_avg_pnl']:.2f} avg PnL")
print(f"   - Greed periods: ${insights['sentiment_impact']['greed_avg_pnl']:.2f} avg PnL")
print(f"   - Difference: ${insights['sentiment_impact']['difference']:.2f}")
print(f"   - Statistically significant: {insights['sentiment_impact']['statistical_significance']}")

print(f"\n2. RISK PROFILE:")
print(f"   - Fear volatility: {insights['risk_comparison']['fear_volatility']:.2f}")
print(f"   - Greed volatility: {insights['risk_comparison']['greed_volatility']:.2f}")
print(f"   - Better risk-adjusted returns in: {'Fear' if insights['risk_comparison']['fear_sharpe'] > insights['risk_comparison']['greed_sharpe'] else 'Greed'}")

print(f"\n3. TRADING BEHAVIOR:")
print(f"   - Leverage usage: {'Higher' if insights['trading_behavior']['greed_avg_leverage'] > insights['trading_behavior']['fear_avg_leverage'] else 'Lower'} during greed periods")
print(f"   - Position sizes: {'Larger' if insights['trading_behavior']['greed_avg_size'] > insights['trading_behavior']['fear_avg_size'] else 'Smaller'} during greed periods")

# Save insights
import json
with open('../results/reports/sentiment_insights.json', 'w') as f:
    json.dump(insights, f, indent=4)