# Solana X Sentiment Analysis for Fraud Detection
[Messari Logo](Messari.png) X  ![Helius Logo](https://www.helius.dev/logo.svg) X  ![Superteam Logo](https://earn.superteam.fun/assets/logo.svg)

## What This Project Does
This project checks X (Twitter) sentiment for Solana using Messari's Signal API. It looks for spikes in sentiment, tweet volume, and momentum to spot possible hype or scams. I made this for the Messari Crime Fighting AI Toolkit Hackathon to find suspicious activity on Solana.

It flags a day as suspicious if:
- Sentiment score is higher than average by a bit (mean + 0.5 standard deviation).
- Tweet volume jumps by more than 1.2x compared to the previous day.
- Momentum score is above 0.3 (a big positive change in sentiment).

## What You Need to Run It
- Python 3.11 or higher
- Libraries: requests, pandas, numpy
  Install them by running this in your terminal or 
  command prompt:command prompt:command prompt:command prompt:


## How It works

1. Get a Messari API key.
2. In the code, change `API_KEY = "YOUR_API_KEY"` to your actual Messari API key.
3. Run each cell in the notebook by pressing `Shift + Enter`:
 - First cell: Imports the libraries.
 - Second cell: Has the functions to fetch and analyze data.
 - Third cell: Runs the analysis and saves the results.

## What You’ll Get
- `solana_sentiment_analysis.csv`: A file with the results (dates, sentiment scores, tweet volumes, momentum, and suspicious flags).
- `twitter_thread.txt`: A ready-to-post Twitter thread summarizing the findings for the hackathon.

In [61]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import json
import numpy as np

In [63]:
# Messari API key
API_KEY = "YOUR_API_KEY"

# fetch the asset ID for Solana
def fetch_solana_asset_id():
    url = "https://api.messari.io/signal/v0/sentiment/assets"
    headers = {"x-messari-api-key": API_KEY}
    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            assets = response.json()['data']
            for asset in assets:
                if asset.get('symbol', '').lower() == 'sol' or asset.get('name', '').lower() == 'solana':
                    return asset.get('id')
            print("Solana asset ID not found.")
            return None
        else:
            print(f"Error fetching assets: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

# fetch sentiment time-series data for Solana
def fetch_solana_sentiment(asset_id):
    if not asset_id:
        return None
    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=7)  # Last 7 days
    url = f"https://api.messari.io/signal/v0/sentiment/assets/{asset_id}/time-series/1d"
    headers = {"x-messari-api-key": API_KEY}
    params = {
        "start": start_date.strftime('%Y-%m-%d'),
        "end": end_date.strftime('%Y-%m-%d'),
        "fields": "sentimentScore,tweetVolume"
    }
    try:
        response = requests.get(url, headers=headers, params=params, timeout=10)
        if response.status_code == 200:
            response_data = response.json()
            print("Raw API response:", response_data)
            return response_data
        else:
            print(f"Error fetching sentiment data: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

# Analyze sentiment for spikes
def analyze_sentiment_spikes():
    # Fetch Solana asset ID
    asset_id = fetch_solana_asset_id()
    if not asset_id:
        return None, None
    
    # Fetch sentiment data
    response_data = fetch_solana_sentiment(asset_id)
    if not response_data:
        return None, None
    
    # Extract points and schema
    data = response_data.get('data', {})
    points = data.get('points', [])
    schemas = response_data.get('metadata', {}).get('pointSchemas', [])
    
    if not points or not schemas:
        print("No valid sentiment data found in response.")
        return None, None
    
    # Map schema slugs to indices
    schema_mapping = {schema['slug']: idx for idx, schema in enumerate(schemas)}
    timestamp_idx = schema_mapping.get('time', 0)
    sentiment_score_idx = schema_mapping.get('sentiment-score', 1)
    momentum_score_idx = schema_mapping.get('momentum-score', 3)
    tweet_volume_idx = schema_mapping.get('tweet-volume', 7)
    
    # Process data points
    results = []
    for point in points:
        try:
            timestamp = int(point[timestamp_idx])
            date = datetime.fromtimestamp(timestamp).date()
            sentiment_score = float(point[sentiment_score_idx])
            momentum_score = float(point[momentum_score_idx])
            tweet_volume = int(point[tweet_volume_idx])
            results.append({
                'date': date,
                'sentiment_score': sentiment_score,
                'momentum_score': momentum_score,
                'tweet_volume': tweet_volume
            })
        except (IndexError, ValueError, TypeError) as e:
            print(f"Skipping invalid point: {point} (Error: {e})")
            continue
    
    # Create DataFrame
    df = pd.DataFrame(results)
    if df.empty:
        print("No valid sentiment data processed.")
        return None, None
    
    # Flag suspicious days
    # Sentiment: > mean + 0.5 std dev
    sentiment_mean = df['sentiment_score'].mean()
    sentiment_std = df['sentiment_score'].std()
    sentiment_threshold = sentiment_mean + 0.5 * sentiment_std
    
    # Tweet volume: > 1.2x previous day's volume
    df['prev_tweet_volume'] = df['tweet_volume'].shift(1).fillna(0)
    df['tweet_volume_spike'] = (df['tweet_volume'] > 1.2 * df['prev_tweet_volume']) & (df['prev_tweet_volume'] > 0)
    
    # Momentum: > 0.3 (significant positive shift)
    df['momentum_high'] = df['momentum_score'] > 0.3
    
    df['suspicious'] = (df['sentiment_score'] > sentiment_threshold) | (df['tweet_volume_spike']) | (df['momentum_high'])
    
    # Save results
    df.to_csv('solana_sentiment_analysis.csv', index=False)
    
    # Generate summary
    suspicious_days = len(df[df['suspicious']])
    summary = {
        'total_days': len(df),
        'suspicious_days': suspicious_days,
        'suspicious_percentage': (suspicious_days / len(df) * 100) if len(df) > 0 else 0,
        'tweet_volume_trend': 'increasing' if df['tweet_volume'].iloc[-1] > df['tweet_volume'].iloc[0] else 'stable/decreasing'
    }
    
    return df, summary

# Generate Twitter thread
def generate_twitter_thread(df, summary):
    thread = [
        "🔍 Investigating potential fraud on Solana via X sentiment using Messari's Signal API! Analyzed sentiment, momentum, and tweet volume to spot spikes that might signal hype or pump-and-dump schemes. #Solana #CryptoFraud #Messari",
        f"📊 Analyzed {summary['total_days']} days of X sentiment for Solana. Found {summary['suspicious_days']} days ({summary['suspicious_percentage']:.1f}%) with unusual activity (sentiment > avg + 0.5 std dev, tweet volume > 1.2x prev day, or momentum > 0.3).",
        "⚠️ Suspicious days:\n" + "\n".join(
            f"- {row['date']} (Sentiment: {row['sentiment_score']:.1f}, Tweets: {row['tweet_volume']}, Momentum: {row['momentum_score']:.2f})"
            for _, row in df[df['suspicious']].head(3).iterrows()
        ) if summary['suspicious_days'] > 0 else "⚠️ No major spikes, but tweet volume is " + summary['tweet_volume_trend'] + "—watch for growing hype!",
        "💡 Why it matters: Sudden X activity spikes can signal coordinated hype. Traders/builders, stay cautious! Code is open-sourced here: [Insert GitHub link] #REDACTEDHackathon @messaricrypto @heliuslabs @SuperteamEarn",
    ]
    return thread



In [65]:
# Run the analysis
if __name__ == "__main__":
    result = analyze_sentiment_spikes()
    if result is None:
        print("Analysis failed. Check the logs for details.")
    else:
        df, summary = result
        print("Analysis complete! Summary:")
        print(json.dumps(summary, indent=2))
        print("\nSample of results:")
        print(df[['date', 'sentiment_score', 'tweet_volume', 'momentum_score', 'suspicious']].head())
        
        thread = generate_twitter_thread(df, summary)
        with open('twitter_thread.txt', 'w', encoding='utf-8') as f:
            for i, tweet in enumerate(thread, 1):
                f.write(f"Tweet {i}:\n{tweet}\n\n")
        print("\nTwitter thread saved to twitter_thread.txt")

Raw API response: {'error': None, 'data': {'points': [[1745020800, 50.69561322063715, 119, -0.35985019141964614, 0.570897, 0.086971, 0.342132, 2955], [1745107200, 50.46397129555794, 109, -0.23164192507920944, 0.569629, 0.088185, 0.342186, 2937], [1745193600, 50.11308214101115, 185, -0.3508891545467918, 0.562222, 0.090556, 0.347222, 3600], [1745280000, 50.481686838583286, 192, 0.3686046975721382, 0.562763, 0.090325, 0.346912, 4517], [1745366400, 50.85769741307628, 208, 0.37601057449299446, 0.565835, 0.0947, 0.339465, 5491], [1745452800, 50.848688657094506, 232, -0.009008755981774641, 0.571561, 0.094898, 0.33354, 6449], [1745539200, 50.6973288263398, 257, -0.15135983075470705, 0.572389, 0.094636, 0.332975, 7439], [1745625600, 49.87047279437642, 167, -0.8268560319633806, 0.570921, 0.095609, 0.33347, 7311]]}, 'metadata': {'pointSchemas': [{'name': 'Timestamp', 'slug': 'time', 'description': 'Timestamp of the data point.', 'isTimestamp': True}, {'name': 'Sentiment Score', 'slug': 'sentiment