# Cryptocurrency Sentiment Analysis & Price Prediction

**Quantitative Finance Approach to Cryptocurrency Trading**

This notebook demonstrates a comprehensive cryptocurrency sentiment analysis system that combines:
- Real-time market data from CoinMarketCap API
- Social media sentiment from Reddit and Twitter
- Advanced sentiment analysis using NLTK's VADER
- Machine learning models for price prediction

---

In [None]:
# =============================================================================
# Import Libraries and Modules
# =============================================================================

# Core data science libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Custom modules (refactored from original notebook code)
from crypto_sentiment_analyzer import (
    CryptoMarketDataFetcher,
    RedditSentimentFetcher, 
    SentimentAnalyzer,
    CryptoPricePredictionModel
)
from config import config

# Visualization settings
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette("husl")

print("📚 Libraries imported successfully")
print("🔧 Environment configured for cryptocurrency analysis")

## 1. Market Data Collection

Fetch real-time cryptocurrency market data from CoinMarketCap API

In [None]:
# =============================================================================
# Cryptocurrency Market Data Collection
# =============================================================================

# Initialize market data fetcher
# Note: Set COINMARKETCAP_API_KEY environment variable for production
api_key = 'c936ec13-bf84-4335-b1df-aa936ab8d2b5'  # Demo key - replace with your own
market_fetcher = CryptoMarketDataFetcher(api_key)

# Fetch top cryptocurrencies
try:
    crypto_data = market_fetcher.get_crypto_data(start_rank=1, limit=10)
    print(f"✅ Successfully fetched data for {len(crypto_data)} cryptocurrencies")
    
    # Display key market metrics
    display_columns = ['name', 'symbol', 'price', 'percent_change_24h', 
                      'percent_change_7d', 'market_cap', 'volume_24h']
    
    print("\n📊 Current Market Data:")
    print(crypto_data[display_columns].round(4))
    
except Exception as error:
    print(f"❌ Error fetching market data: {error}")
    crypto_data = pd.DataFrame()  # Empty DataFrame for error handling

## 2. Social Media Sentiment Collection

Collect sentiment data from Reddit cryptocurrency discussions

In [None]:
# =============================================================================
# Reddit Sentiment Data Collection
# =============================================================================

# Fetch Reddit posts from cryptocurrency subreddit
reddit_fetcher = RedditSentimentFetcher()

try:
    reddit_posts = reddit_fetcher.get_reddit_posts(
        subreddit='cryptocurrency', 
        post_limit=200
    )
    
    if not reddit_posts.empty:
        print(f"✅ Successfully fetched {len(reddit_posts)} Reddit posts")
        print(f"📅 Date range: {reddit_posts['created_utc'].min()} to {reddit_posts['created_utc'].max()}")
        
        # Show sample posts
        print("\n📝 Sample Reddit Posts:")
        for i, post in reddit_posts.head(3).iterrows():
            print(f"{i+1}. {post['text'][:150]}...")
            print(f"   Posted: {post['created_utc']}\n")
    else:
        print("⚠️  No Reddit posts retrieved - API may be down or rate limited")
        
except Exception as error:
    print(f"❌ Error fetching Reddit data: {error}")
    reddit_posts = pd.DataFrame(columns=['created_utc', 'text'])

## 3. Sentiment Analysis

Analyze sentiment of social media posts using VADER sentiment analyzer

In [None]:
# =============================================================================
# VADER Sentiment Analysis
# =============================================================================

# Initialize sentiment analyzer
sentiment_analyzer = SentimentAnalyzer()

if not reddit_posts.empty:
    # Perform batch sentiment analysis
    print("🧠 Analyzing sentiment of Reddit posts...")
    
    sentiment_scores = sentiment_analyzer.batch_sentiment_analysis(
        reddit_posts['text'].tolist()
    )
    
    # Combine with original posts
    reddit_with_sentiment = pd.concat([
        reddit_posts.reset_index(drop=True), 
        sentiment_scores
    ], axis=1)
    
    # Calculate sentiment summary statistics
    sentiment_summary = {
        'Total Posts': len(sentiment_scores),
        'Average Sentiment': sentiment_scores['compound'].mean(),
        'Sentiment Std Dev': sentiment_scores['compound'].std(),
        'Positive Posts': (sentiment_scores['compound'] > 0.1).sum(),
        'Negative Posts': (sentiment_scores['compound'] < -0.1).sum(),
        'Neutral Posts': ((sentiment_scores['compound'] >= -0.1) & 
                         (sentiment_scores['compound'] <= 0.1)).sum()
    }
    
    print("\n📈 Sentiment Analysis Results:")
    for metric, value in sentiment_summary.items():
        if isinstance(value, float):
            print(f"{metric}: {value:.4f}")
        else:
            print(f"{metric}: {value}")
    
    # Show most positive and negative posts
    most_positive = reddit_with_sentiment.loc[sentiment_scores['compound'].idxmax()]
    most_negative = reddit_with_sentiment.loc[sentiment_scores['compound'].idxmin()]
    
    print(f"\n😊 Most Positive Post (Score: {most_positive['compound']:.3f}):")
    print(f"{most_positive['text'][:200]}...\n")
    
    print(f"😞 Most Negative Post (Score: {most_negative['compound']:.3f}):")
    print(f"{most_negative['text'][:200]}...")
    
else:
    print("⚠️  No posts available for sentiment analysis")
    sentiment_scores = pd.DataFrame()

## 4. Data Visualization

Visualize market data and sentiment analysis results

In [None]:
# =============================================================================
# Data Visualization
# =============================================================================

# Create comprehensive visualization dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Cryptocurrency Market & Sentiment Analysis Dashboard', fontsize=16, fontweight='bold')

# 1. Market Cap Distribution
if not crypto_data.empty:
    top_cryptos = crypto_data.head(8)
    axes[0, 0].bar(top_cryptos['symbol'], top_cryptos['market_cap'] / 1e9)
    axes[0, 0].set_title('Market Capitalization (Billions USD)')
    axes[0, 0].set_ylabel('Market Cap (B USD)')
    axes[0, 0].tick_params(axis='x', rotation=45)

# 2. Price Performance
if not crypto_data.empty:
    axes[0, 1].scatter(crypto_data['percent_change_24h'], crypto_data['percent_change_7d'], 
                      s=crypto_data['volume_24h']/1e8, alpha=0.6)
    axes[0, 1].set_title('Price Performance: 24h vs 7d Changes')
    axes[0, 1].set_xlabel('24h Change (%)')
    axes[0, 1].set_ylabel('7d Change (%)')
    axes[0, 1].grid(True, alpha=0.3)

# 3. Sentiment Distribution
if not sentiment_scores.empty:
    axes[1, 0].hist(sentiment_scores['compound'], bins=30, alpha=0.7, edgecolor='black')
    axes[1, 0].axvline(sentiment_scores['compound'].mean(), color='red', 
                      linestyle='--', label=f'Mean: {sentiment_scores["compound"].mean():.3f}')
    axes[1, 0].set_title('Sentiment Score Distribution')
    axes[1, 0].set_xlabel('Sentiment Score (Compound)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].legend()

# 4. Sentiment Over Time
if not reddit_posts.empty and not sentiment_scores.empty:
    reddit_with_sentiment['date'] = reddit_with_sentiment['created_utc'].dt.date
    daily_sentiment = reddit_with_sentiment.groupby('date')['compound'].mean()
    
    axes[1, 1].plot(daily_sentiment.index, daily_sentiment.values, marker='o')
    axes[1, 1].set_title('Average Daily Sentiment Trend')
    axes[1, 1].set_xlabel('Date')
    axes[1, 1].set_ylabel('Average Sentiment')
    axes[1, 1].tick_params(axis='x', rotation=45)
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("📊 Dashboard visualization completed")

## 5. Price Prediction Model

Build machine learning model incorporating sentiment data for price prediction

In [None]:
# =============================================================================
# Machine Learning Price Prediction
# =============================================================================

# Initialize prediction model
prediction_model = CryptoPricePredictionModel()

if not crypto_data.empty and not sentiment_scores.empty:
    print("🤖 Building machine learning prediction model...")
    
    try:
        # Prepare features combining market data and sentiment
        features = prediction_model.prepare_features(crypto_data, sentiment_scores)
        
        # For demonstration, we'll predict next-period prices
        # In practice, you'd use historical data with time-shifted targets
        target_prices = crypto_data['price'] * (1 + np.random.normal(0, 0.02, len(crypto_data)))
        
        # Train the model
        performance_metrics = prediction_model.train_model(features, target_prices)
        
        print("\n📈 Model Performance Metrics:")
        for metric, value in performance_metrics.items():
            if isinstance(value, float):
                print(f"{metric.replace('_', ' ').title()}: {value:.4f}")
            else:
                print(f"{metric.replace('_', ' ').title()}: {value}")
        
        # Make predictions
        predictions = prediction_model.predict_price(features)
        
        # Create prediction comparison
        prediction_comparison = pd.DataFrame({
            'Cryptocurrency': crypto_data['name'].head(len(predictions)),
            'Current_Price': crypto_data['price'].head(len(predictions)),
            'Predicted_Price': predictions,
            'Price_Change_Prediction': ((predictions - crypto_data['price'].head(len(predictions))) / 
                                      crypto_data['price'].head(len(predictions)) * 100)
        })
        
        print("\n🔮 Price Predictions:")
        print(prediction_comparison.round(4))
        
    except Exception as model_error:
        print(f"❌ Error in prediction model: {model_error}")
        
else:
    print("⚠️  Insufficient data for prediction model")

## 6. Analysis Summary & Trading Insights

Combine market data and sentiment analysis for trading recommendations

In [None]:
# =============================================================================
# Trading Insights and Summary
# =============================================================================

print("🎯 Cryptocurrency Sentiment Analysis Summary")
print("=" * 60)

# Market Overview
if not crypto_data.empty:
    total_market_cap = crypto_data['market_cap'].sum()
    avg_24h_change = crypto_data['percent_change_24h'].mean()
    
    print(f"\n📊 Market Overview:")
    print(f"   Total Market Cap (Top {len(crypto_data)}): ${total_market_cap/1e12:.2f}T")
    print(f"   Average 24h Change: {avg_24h_change:.2f}%")
    print(f"   Market Leader: {crypto_data.iloc[0]['name']} (${crypto_data.iloc[0]['price']:.2f})")

# Sentiment Overview
if not sentiment_scores.empty:
    avg_sentiment = sentiment_scores['compound'].mean()
    sentiment_volatility = sentiment_scores['compound'].std()
    
    print(f"\n🧠 Sentiment Analysis:")
    print(f"   Average Sentiment Score: {avg_sentiment:.4f}")
    print(f"   Sentiment Volatility: {sentiment_volatility:.4f}")
    
    if avg_sentiment > 0.1:
        sentiment_signal = "🟢 BULLISH"
    elif avg_sentiment < -0.1:
        sentiment_signal = "🔴 BEARISH"
    else:
        sentiment_signal = "🟡 NEUTRAL"
    
    print(f"   Overall Market Sentiment: {sentiment_signal}")

# Trading Recommendations
print(f"\n💡 Key Insights:")

if not crypto_data.empty:
    # Find best performing coins
    best_performer = crypto_data.loc[crypto_data['percent_change_24h'].idxmax()]
    worst_performer = crypto_data.loc[crypto_data['percent_change_24h'].idxmin()]
    
    print(f"   📈 Best 24h Performer: {best_performer['name']} (+{best_performer['percent_change_24h']:.2f}%)")
    print(f"   📉 Worst 24h Performer: {worst_performer['name']} ({worst_performer['percent_change_24h']:.2f}%)")

if not sentiment_scores.empty and not crypto_data.empty:
    # Risk assessment
    if sentiment_volatility > 0.3:
        risk_level = "🔴 HIGH"
    elif sentiment_volatility > 0.2:
        risk_level = "🟡 MEDIUM"
    else:
        risk_level = "🟢 LOW"
    
    print(f"   ⚠️  Current Risk Level: {risk_level}")

print(f"\n⚠️  Disclaimer: This analysis is for educational purposes only.")
print(f"   Cryptocurrency trading involves significant risk. Always conduct")
print(f"   your own research and consider consulting with financial advisors.")

print(f"\n✅ Analysis completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## Next Steps for Advanced Analysis

1. **Enhanced Data Sources**: Integrate Twitter sentiment, news sentiment, and on-chain metrics
2. **Advanced Models**: Implement LSTM networks for time-series prediction
3. **Real-time Processing**: Set up automated data pipelines for live analysis
4. **Risk Management**: Develop portfolio optimization and risk assessment tools
5. **Backtesting**: Historical performance validation of trading strategies

---

*This notebook demonstrates a professional-grade cryptocurrency sentiment analysis system suitable for quantitative finance applications.*