In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import warnings

# Import ReportLab modules
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER
from reportlab.lib.units import inch

warnings.filterwarnings('ignore')


In [14]:
class TraderSentimentAnalyzer:
    def __init__(self):
        self.fear_greed_data = None
        self.trader_data = None
        self.merged_data = None
        self.trader_metrics = None
        self.sentiment_performance = None
        self.correlations = None
        self.daily_performance = None
        self.cluster_analysis = None
        self.timing_analysis = None
        self.strategies = None
        self.insights = {}

    def load_data(self, fear_greed_path, trader_data_path):
        print("Loading datasets...")
        self.fear_greed_data = pd.read_csv(fear_greed_path)
        self.fear_greed_data['date'] = pd.to_datetime(self.fear_greed_data['date'])
        self.trader_data = pd.read_csv(trader_data_path)
        if 'Timestamp IST' in self.trader_data.columns:
            self.trader_data['date'] = pd.to_datetime(self.trader_data['Timestamp IST'], format='%d-%m-%Y %H:%M')
            self.trader_data['date'] = self.trader_data['date'].dt.date
        numeric_columns = ['Execution Price', 'Size Tokens', 'Size USD', 'Closed PnL', 'Fee']
        for col in numeric_columns:
            if col in self.trader_data.columns:
                self.trader_data[col] = pd.to_numeric(self.trader_data[col], errors='coerce')
        print(f"Fear & Greed data: {len(self.fear_greed_data)} records")
        print(f"Trader data: {len(self.trader_data)} records")

    def merge_datasets(self):
        self.fear_greed_data['date'] = self.fear_greed_data['date'].dt.date
        self.merged_data = pd.merge(
            self.trader_data,
            self.fear_greed_data[['date', 'value', 'classification']],
            on='date',
            how='inner'
        )
        self.merged_data.rename(columns={
            'value': 'sentiment_score',
            'classification': 'sentiment_category'
        }, inplace=True)
        print(f"Merged data: {len(self.merged_data)} records")

    def calculate_trader_metrics(self):
        print("Calculating trader performance metrics...")
        trader_metrics = []
        for account in self.merged_data['Account'].unique():
            trader_data = self.merged_data[self.merged_data['Account'] == account]
            total_trades = len(trader_data)
            total_volume = trader_data['Size USD'].sum()
            total_pnl = trader_data['Closed PnL'].sum()
            total_fees = trader_data['Fee'].sum()
            net_pnl = total_pnl - total_fees
            winning_trades = len(trader_data[trader_data['Closed PnL'] > 0])
            win_rate = winning_trades / total_trades if total_trades > 0 else 0
            avg_trade_size = total_volume / total_trades if total_trades > 0 else 0
            avg_pnl = total_pnl / total_trades if total_trades > 0 else 0
            pnl_std = trader_data['Closed PnL'].std() if len(trader_data) > 1 else 0
            sharpe_ratio = avg_pnl / pnl_std if pnl_std != 0 else 0
            sentiment_exposure = trader_data['sentiment_score'].mean()
            trader_metrics.append({
                'account': account,
                'total_trades': total_trades,
                'total_volume': total_volume,
                'total_pnl': total_pnl,
                'net_pnl': net_pnl,
                'win_rate': win_rate,
                'avg_trade_size': avg_trade_size,
                'avg_pnl': avg_pnl,
                'pnl_volatility': pnl_std,
                'sharpe_ratio': sharpe_ratio,
                'avg_sentiment_exposure': sentiment_exposure
            })
        self.trader_metrics = pd.DataFrame(trader_metrics)
        return self.trader_metrics

    def analyze_sentiment_performance(self):
        sentiment_analysis = {}
        for sentiment in self.merged_data['sentiment_category'].unique():
            sentiment_data = self.merged_data[self.merged_data['sentiment_category'] == sentiment]
            sentiment_analysis[sentiment] = {
                'total_trades': len(sentiment_data),
                'total_volume': sentiment_data['Size USD'].sum(),
                'total_pnl': sentiment_data['Closed PnL'].sum(),
                'avg_pnl_per_trade': sentiment_data['Closed PnL'].mean(),
                'win_rate': len(sentiment_data[sentiment_data['Closed PnL'] > 0]) / len(sentiment_data) if len(sentiment_data) > 0 else 0,
                'avg_trade_size': sentiment_data['Size USD'].mean(),
                'pnl_volatility': sentiment_data['Closed PnL'].std(),
                'unique_traders': sentiment_data['Account'].nunique(),
                'avg_sentiment_score': sentiment_data['sentiment_score'].mean()
            }
        self.sentiment_performance = sentiment_analysis
        return sentiment_analysis

    def find_correlation_patterns(self):
        daily_performance = self.merged_data.groupby('date').agg({
            'Closed PnL': ['sum', 'mean', 'count'],
            'Size USD': 'sum',
            'sentiment_score': 'first',
            'sentiment_category': 'first'
        }).reset_index()
        daily_performance.columns = ['date', 'daily_pnl_sum', 'daily_pnl_avg', 'trade_count',
                                     'daily_volume', 'sentiment_score', 'sentiment_category']
        correlations = {
            'pnl_sentiment_corr': daily_performance['daily_pnl_sum'].corr(daily_performance['sentiment_score']),
            'volume_sentiment_corr': daily_performance['daily_volume'].corr(daily_performance['sentiment_score']),
            'trades_sentiment_corr': daily_performance['trade_count'].corr(daily_performance['sentiment_score'])
        }
        self.correlations = correlations
        self.daily_performance = daily_performance
        return correlations

    def cluster_traders_by_behavior(self):

        features = ['total_trades', 'avg_trade_size', 'win_rate', 'pnl_volatility',
                    'avg_sentiment_exposure', 'sharpe_ratio']
        clustering_data = self.trader_metrics[features].fillna(0)
        scaler = StandardScaler()
        scaled_features = scaler.fit_transform(clustering_data)
        silhouette_scores = []
        for n_clusters in range(2, 8):
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            cluster_labels = kmeans.fit_predict(scaled_features)
            score = silhouette_score(scaled_features, cluster_labels)
            silhouette_scores.append(score)
        optimal_clusters = silhouette_scores.index(max(silhouette_scores)) + 2
        kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
        self.trader_metrics['cluster'] = kmeans.fit_predict(scaled_features)
        cluster_analysis = {}
        for cluster in range(optimal_clusters):
            cluster_data = self.trader_metrics[self.trader_metrics['cluster'] == cluster]
            cluster_analysis[f'Cluster_{cluster}'] = {
                'size': len(cluster_data),
                'avg_pnl': cluster_data['net_pnl'].mean(),
                'avg_win_rate': cluster_data['win_rate'].mean(),
                'avg_sentiment_exposure': cluster_data['avg_sentiment_exposure'].mean(),
                'avg_sharpe': cluster_data['sharpe_ratio'].mean(),
                'avg_trade_size': cluster_data['avg_trade_size'].mean()
            }
        self.cluster_analysis = cluster_analysis
        return cluster_analysis

    def identify_market_timing_opportunities(self):

        timing_analysis = {}
        self.daily_performance['sentiment_shift'] = self.daily_performance['sentiment_score'].diff()
        self.daily_performance['sentiment_momentum'] = self.daily_performance['sentiment_score'].rolling(3).mean()
        extreme_fear = self.daily_performance[self.daily_performance['sentiment_score'] <= 20]
        extreme_greed = self.daily_performance[self.daily_performance['sentiment_score'] >= 80]
        timing_analysis['extreme_fear_performance'] = {
            'avg_daily_pnl': extreme_fear['daily_pnl_sum'].mean(),
            'win_rate': len(extreme_fear[extreme_fear['daily_pnl_sum'] > 0]) / len(extreme_fear) if len(extreme_fear) > 0 else 0,
            'days_count': len(extreme_fear)
        }
        timing_analysis['extreme_greed_performance'] = {
            'avg_daily_pnl': extreme_greed['daily_pnl_sum'].mean(),
            'win_rate': len(extreme_greed[extreme_greed['daily_pnl_sum'] > 0]) / len(extreme_greed) if len(extreme_greed) > 0 else 0,
            'days_count': len(extreme_greed)
        }
        strong_momentum_up = self.daily_performance[self.daily_performance['sentiment_shift'] > 10]
        strong_momentum_down = self.daily_performance[self.daily_performance['sentiment_shift'] < -10]
        timing_analysis['momentum_up_performance'] = {
            'avg_daily_pnl': strong_momentum_up['daily_pnl_sum'].mean(),
            'next_day_performance': strong_momentum_up['daily_pnl_sum'].shift(-1).mean()
        }
        timing_analysis['momentum_down_performance'] = {
            'avg_daily_pnl': strong_momentum_down['daily_pnl_sum'].mean(),
            'next_day_performance': strong_momentum_down['daily_pnl_sum'].shift(-1).mean()
        }
        self.timing_analysis = timing_analysis
        return timing_analysis

    def generate_trading_strategies(self):

        strategies = {}
        fear_performance = self.sentiment_performance.get('Fear', {}).get('avg_pnl_per_trade', 0)
        extreme_fear_performance = self.sentiment_performance.get('Extreme Fear', {}).get('avg_pnl_per_trade', 0)
        if extreme_fear_performance > fear_performance:
            strategies['contrarian_extreme_fear'] = {
                'signal': 'BUY during Extreme Fear periods',
                'rationale': f'Extreme Fear shows {extreme_fear_performance:.2f} avg PnL vs {fear_performance:.2f} during regular Fear',
                'confidence': 'High' if abs(extreme_fear_performance - fear_performance) > 50 else 'Medium'
            }
        if self.correlations['pnl_sentiment_corr'] > 0.3:
            strategies['momentum_following'] = {
                'signal': 'Follow sentiment direction',
                'rationale': f'Strong positive correlation ({self.correlations["pnl_sentiment_corr"]:.3f}) between sentiment and PnL',
                'confidence': 'High'
            }
        elif self.correlations['pnl_sentiment_corr'] < -0.3:
            strategies['contrarian_sentiment'] = {
                'signal': 'Trade against sentiment',
                'rationale': f'Strong negative correlation ({self.correlations["pnl_sentiment_corr"]:.3f}) between sentiment and PnL',
                'confidence': 'High'
            }
        if self.correlations['volume_sentiment_corr'] > 0.5:
            strategies['volume_sentiment_sync'] = {
                'signal': 'Increase position size during high sentiment periods',
                'rationale': f'Volume strongly correlates with sentiment ({self.correlations["volume_sentiment_corr"]:.3f})',
                'confidence': 'Medium'
            }
        best_cluster = max(self.cluster_analysis.items(), key=lambda x: x[1]['avg_pnl'])
        strategies['cluster_mimicking'] = {
            'signal': f'Mimic behavior of {best_cluster[0]} traders',
            'rationale': f'Best performing cluster: {best_cluster[1]["avg_pnl"]:.2f} avg PnL, {best_cluster[1]["avg_win_rate"]:.3f} win rate',
            'confidence': 'Medium'
        }
        self.strategies = strategies
        return strategies

    def plot_sentiment_performance(self):
        if self.sentiment_performance:
            sentiments = list(self.sentiment_performance.keys())
            avg_pnls = [self.sentiment_performance[s]['avg_pnl_per_trade'] for s in sentiments]
            plt.figure(figsize=(10, 6))
            sns.barplot(x=sentiments, y=avg_pnls, palette='viridis')
            plt.title('Average PnL per Trade by Market Sentiment Category')
            plt.xlabel('Sentiment Category')
            plt.ylabel('Average PnL per Trade (USD)')
            plt.grid(axis='y', linestyle='--')
            plot_file = "sentiment_performance.png"
            plt.savefig(plot_file, bbox_inches='tight')
            plt.close()
            return plot_file

    def plot_sentiment_correlation(self):
        if self.daily_performance is not None:
            fig, axes = plt.subplots(1, 3, figsize=(18, 6))
            sns.scatterplot(x='sentiment_score', y='daily_pnl_sum', data=self.daily_performance, ax=axes[0])
            axes[0].set_title('Daily PnL vs. Sentiment Score')
            axes[0].set_xlabel('Sentiment Score')
            axes[0].set_ylabel('Total Daily PnL (USD)')
            sns.scatterplot(x='sentiment_score', y='daily_volume', data=self.daily_performance, ax=axes[1])
            axes[1].set_title('Daily Volume vs. Sentiment Score')
            axes[1].set_xlabel('Sentiment Score')
            axes[1].set_ylabel('Total Daily Volume (USD)')
            sns.scatterplot(x='sentiment_score', y='trade_count', data=self.daily_performance, ax=axes[2])
            axes[2].set_title('Daily Trade Count vs. Sentiment Score')
            axes[2].set_xlabel('Sentiment Score')
            axes[2].set_ylabel('Daily Trade Count')
            plt.suptitle('Correlation Analysis of Trading Metrics and Market Sentiment')
            plt.tight_layout(rect=[0, 0, 1, 0.96])
            plot_file = "correlation_plots.png"
            plt.savefig(plot_file, bbox_inches='tight')
            plt.close()
            return plot_file

    def plot_trader_clusters(self):
        if 'cluster' in self.trader_metrics.columns:
            sns.pairplot(self.trader_metrics, vars=['net_pnl', 'win_rate', 'avg_trade_size', 'avg_sentiment_exposure'], hue='cluster', palette='tab10')
            plt.suptitle('Trader Clusters Based on Performance and Behavior Metrics', y=1.02)
            plot_file = "trader_clusters.png"
            plt.savefig(plot_file, bbox_inches='tight')
            plt.close()
            return plot_file

    def generate_pdf_report(self, filename="sentiment_analysis_report.pdf"):
        doc = SimpleDocTemplate(filename, pagesize=letter)
        styles = getSampleStyleSheet()
        styles.add(ParagraphStyle(name='CustomHeading1', fontSize=18, leading=22, alignment=TA_CENTER, fontName='Helvetica-Bold'))
        styles.add(ParagraphStyle(name='CustomHeading2', fontSize=14, leading=18, fontName='Helvetica-Bold'))

        story = []
        story.append(Paragraph("Trader Performance vs. Market Sentiment Analysis Report", styles['CustomHeading1']))
        story.append(Spacer(1, 0.2 * inch))
        story.append(Paragraph("<b>📊 DATASET OVERVIEW</b>", styles['CustomHeading2']))
        story.append(Paragraph(f"Total merged records: {len(self.merged_data):,}", styles['Normal']))
        story.append(Paragraph(f"Unique traders: {self.merged_data['Account'].nunique():,}", styles['Normal']))
        story.append(Paragraph(f"Date range: {self.merged_data['date'].min()} to {self.merged_data['date'].max()}", styles['Normal']))
        story.append(Paragraph(f"Total trading volume: ${self.merged_data['Size USD'].sum():,.2f}", styles['Normal']))
        story.append(Paragraph(f"Total PnL: ${self.merged_data['Closed PnL'].sum():,.2f}", styles['Normal']))
        story.append(Spacer(1, 0.2 * inch))
        story.append(Paragraph("<b>📈 SENTIMENT PERFORMANCE BREAKDOWN</b>", styles['CustomHeading2']))
        story.append(Spacer(1, 0.1 * inch))
        for sentiment, metrics in self.sentiment_performance.items():
            story.append(Paragraph(f"<b>{sentiment.upper()}:</b>", styles['Normal']))
            story.append(Paragraph(f"  • Total trades: {metrics['total_trades']:,}", styles['Normal']))
            story.append(Paragraph(f"  • Avg PnL per trade: ${metrics['avg_pnl_per_trade']:.2f}", styles['Normal']))
            story.append(Paragraph(f"  • Win rate: {metrics['win_rate']:.1%}", styles['Normal']))
            story.append(Paragraph(f"  • Unique traders: {metrics['unique_traders']:,}", styles['Normal']))
            story.append(Spacer(1, 0.1 * inch))
        sentiment_plot_path = self.plot_sentiment_performance()
        if sentiment_plot_path:
            story.append(Image(sentiment_plot_path, width=4*inch, height=2.5*inch))
            story.append(Spacer(1, 0.2 * inch))
        story.append(PageBreak())
        story.append(Paragraph("<b>🔗 CORRELATION INSIGHTS</b>", styles['CustomHeading2']))
        story.append(Paragraph(f"PnL-Sentiment correlation: {self.correlations['pnl_sentiment_corr']:.3f}", styles['Normal']))
        story.append(Paragraph(f"Volume-Sentiment correlation: {self.correlations['volume_sentiment_corr']:.3f}", styles['Normal']))
        story.append(Paragraph(f"Trade Count-Sentiment correlation: {self.correlations['trades_sentiment_corr']:.3f}", styles['Normal']))
        story.append(Spacer(1, 0.1 * inch))
        correlation_plot_path = self.plot_sentiment_correlation()
        if correlation_plot_path:
            story.append(Image(correlation_plot_path, width=7*inch, height=3*inch))
            story.append(Spacer(1, 0.2 * inch))
        story.append(PageBreak())
        story.append(Paragraph("<b>👥 TRADER BEHAVIOR CLUSTERS</b>", styles['CustomHeading2']))
        story.append(Spacer(1, 0.1 * inch))
        for cluster_name, cluster_data in self.cluster_analysis.items():
            story.append(Paragraph(f"<b>{cluster_name}</b> ({cluster_data['size']} traders):", styles['Normal']))
            story.append(Paragraph(f"  • Avg Net PnL: ${cluster_data['avg_pnl']:.2f}", styles['Normal']))
            story.append(Paragraph(f"  • Avg Win Rate: {cluster_data['avg_win_rate']:.1%}", styles['Normal']))
            story.append(Paragraph(f"  • Avg Sentiment Exposure: {cluster_data['avg_sentiment_exposure']:.1f}", styles['Normal']))
            story.append(Paragraph(f"  • Avg Sharpe Ratio: {cluster_data['avg_sharpe']:.3f}", styles['Normal']))
            story.append(Spacer(1, 0.1 * inch))
        cluster_plot_path = self.plot_trader_clusters()
        if cluster_plot_path:
            story.append(Image(cluster_plot_path, width=6*inch, height=6*inch))
            story.append(Spacer(1, 0.2 * inch))
        story.append(Paragraph("<b>🎯 RECOMMENDED TRADING STRATEGIES</b>", styles['CustomHeading2']))
        story.append(Spacer(1, 0.1 * inch))
        for strategy_name, strategy_data in self.strategies.items():
            story.append(Paragraph(f"<b>{strategy_name.upper().replace('_', ' ')}</b> ({strategy_data['confidence']} confidence):", styles['Normal']))
            story.append(Paragraph(f"  • Signal: {strategy_data['signal']}", styles['Normal']))
            story.append(Paragraph(f"  • Rationale: {strategy_data['rationale']}", styles['Normal']))
            story.append(Spacer(1, 0.1 * inch))
        doc.build(story)
        print(f"PDF report saved to {filename}")

    def generate_written_report(self):
        print("\n" + "="*80)
        print("Trader Sentiment & Performance Analysis: Our Key Findings")
        print("="*80)



        # Key Insight 1: Sentiment vs. Performance
        print("\n Sentiment's Impact on Performance")
        print("First,  checked how different market moods affected trading. It turns out, some moods are way more profitable than others!")
        best_sentiment = max(self.sentiment_performance.items(), key=lambda x: x[1]['avg_pnl_per_trade'])
        print(f"Data suggests that trading during **{best_sentiment[0]}** periods was the most effective, with an average PnL of **${best_sentiment[1]['avg_pnl_per_trade']:.2f}** per trade. This is definitely something to keep in mind. 💡")

        # Key Insight 2: Correlations
        print("\n Finding the Patterns")
        print("We ran some correlations to see if sentiment and trading activity moved together. Here's the rundown:")
        pnl_corr = self.correlations['pnl_sentiment_corr']
        vol_corr = self.correlations['volume_sentiment_corr']
        trade_count_corr = self.correlations['trades_sentiment_corr']

        if abs(pnl_corr) > 0.3:
            print(f"There's a pretty strong **{'' if pnl_corr > 0 else 'negative'} link** between sentiment and PnL ({pnl_corr:.3f}). This means when sentiment goes {'up' if pnl_corr > 0 else 'down'}, overall PnL tends to {'increase' if pnl_corr > 0 else 'decrease'}. 📉")

        if abs(vol_corr) > 0.3:
            print(f"Interestingly, trading volume has a **{'' if vol_corr > 0 else 'negative'} correlation** with sentiment ({vol_corr:.3f}). It seems traders {'get more active' if vol_corr > 0 else 'pull back'} as sentiment {'rises' if vol_corr > 0 else 'falls'}. 🚀")


        print("\n Trader Profiles Discovered!")
        print("Used a  technique called clustering to group traders by their behavior. This helped us spot different types of trading styles.")
        best_cluster = max(self.cluster_analysis.items(), key=lambda x: x[1]['avg_pnl'])
        print(f"The standout group was **{best_cluster[0]}**! These traders had a fantastic average net PnL of **${best_cluster[1]['avg_pnl']:.2f}** and an impressive win rate of **{best_cluster[1]['avg_win_rate']:.1%}**. Their style is definitely worth studying. 🏆")


        print("\n My Trading Strategy Recommendations")
        print("Based on all this, here are a few ideas for potential trading strategies:")
        for strategy_name, strategy_data in self.strategies.items():
            print(f"\n- **{strategy_name.upper().replace('_', ' ')}** ({strategy_data['confidence']}):")
            print(f"  - **Signal:** {strategy_data['signal']}")
            print(f"  - **Rationale:** {strategy_data['rationale']}")

        print("\nThat's the gist of our analysis! Hope these insights are useful for your trading journey. Good luck out there! 🤞")
        print("="*80)

    def run_complete_analysis(self, fear_greed_path, trader_data_path):
        try:
            self.load_data(fear_greed_path, trader_data_path)
            self.merge_datasets()
            self.calculate_trader_metrics()
            self.analyze_sentiment_performance()
            self.find_correlation_patterns()
            self.cluster_traders_by_behavior()
            self.identify_market_timing_opportunities()
            self.generate_trading_strategies()

            self.generate_written_report()
            self.generate_pdf_report()

            return {
                'trader_metrics': self.trader_metrics,
                'sentiment_performance': self.sentiment_performance,
                'correlations': self.correlations,
                'timing_analysis': self.timing_analysis,
                'strategies': self.strategies,
                'cluster_analysis': self.cluster_analysis
            }
        except Exception as e:
            print(f"Error during analysis: {str(e)}")
            return None

analyzer = TraderSentimentAnalyzer()
fear_greed_path = "/content/drive/MyDrive/Hackthon/feardata"
trader_data_path = "/content/drive/MyDrive/Hackthon/historical"
results = analyzer.run_complete_analysis(fear_greed_path, trader_data_path)

Loading datasets...
Fear & Greed data: 2644 records
Trader data: 211224 records
Merged data: 211218 records
Calculating trader performance metrics...

Trader Sentiment & Performance Analysis: Our Key Findings

 Sentiment's Impact on Performance
First,  checked how different market moods affected trading. It turns out, some moods are way more profitable than others!
Data suggests that trading during **Extreme Greed** periods was the most effective, with an average PnL of **$67.89** per trade. This is definitely something to keep in mind. 💡

 Finding the Patterns
We ran some correlations to see if sentiment and trading activity moved together. Here's the rundown:

 Trader Profiles Discovered!
Used a  technique called clustering to group traders by their behavior. This helped us spot different types of trading styles.
The standout group was **Cluster_2**! These traders had a fantastic average net PnL of **$1010314.32** and an impressive win rate of **35.3%**. Their style is definitely wor