In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import plotly.express as px
import sys
import joblib
from pathlib import Path
from datetime import datetime

# Add parent directory to sys.path for config imports
parent_dir = str(Path().resolve().parent)
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
from config.settings import SOLANA_PROTOCOLS, CACHE_CONFIG

# Set up visualization directory
vis_dir = os.path.normpath('../data/processed/visualizations')
os.makedirs(vis_dir, exist_ok=True)

# Set plotting style
plt.style.use('seaborn')
sns.set_palette('deep')

print("📊 Solana DeFi Tracker - Analysis")
print(f"Analyzing data for {len(SOLANA_PROTOCOLS)} protocols")
print(f"Data directory: {os.path.abspath('../data')}")
print(f"Visualization directory: {os.path.abspath(vis_dir)}")
print(f"Analysis timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

### Load collected data

In [None]:
print("\n🔍 Loading Data...")

# Load latest consolidated metrics
consolidated_files = [f for f in os.listdir('../data/processed') if f.startswith('consolidated_metrics_') and f.endswith('.joblib')]
if consolidated_files:
    latest_consolidated = max(consolidated_files, key=lambda x: x)
    df_consolidated = joblib.load(f"../data/processed/{latest_consolidated}")
    print(f"✅ Loaded consolidated metrics: {latest_consolidated} (Shape: {df_consolidated.shape})")
else:
    print("❌ No consolidated metrics file found")
    df_consolidated = pd.DataFrame()

# Load latest top 100 holders files
top_100_files = {}
for protocol_key in SOLANA_PROTOCOLS.keys():
    files = [f for f in os.listdir('../data/processed') if f.startswith(f'top_100_holders_{protocol_key}_') and f.endswith('.joblib')]
    if files:
        top_100_files[protocol_key] = max(files, key=lambda x: x)
        print(f"✅ Loaded top 100 holders for {protocol_key}: {top_100_files[protocol_key]}")
    else:
        print(f"⚠️ No top 100 holders file for {protocol_key}")

# Load latest Helius holders data for Gini coefficient
helius_files = [f for f in os.listdir('../data/api_responses') if f.startswith('helius_holders_') and f.endswith('.joblib')]
if helius_files:
    latest_helius = max(helius_files, key=lambda x: x)
    helius_data = joblib.load(f"../data/api_responses/{latest_helius}")
    print(f"✅ Loaded Helius holders data: {latest_helius}")
else:
    print("❌ No Helius holders data found")
    helius_data = {}

#### Calculate Valuation Metrics

In [None]:
print("\n📈 Calculating Valuation Multiples...")

# Initialize results DataFrame
df_analysis = df_consolidated.copy() if not df_consolidated.empty else pd.DataFrame()

if not df_analysis.empty:
    # Calculate P/F ratio (market_cap / annualized fees)
    df_analysis['pf_ratio'] = df_analysis['market_cap'] / (df_analysis['fees_24h'] * 365)
    df_analysis['pf_ratio'] = df_analysis['pf_ratio'].replace([np.inf, -np.inf], np.nan).fillna(0)
    
    # Calculate Price-to-Revenue ratio (market_cap / annualized revenue)
    df_analysis['pr_ratio'] = df_analysis['market_cap'] / (df_analysis['revenue_24h'] * 365)
    df_analysis['pr_ratio'] = df_analysis['pr_ratio'].replace([np.inf, -np.inf], np.nan).fillna(0)
    
    print("✅ Calculated valuation multiples:")
    print(df_analysis[['protocol', 'market_cap', 'fees_24h', 'revenue_24h', 'pf_ratio', 'pr_ratio']].head())
else:
    print("⚠️ No data available for valuation multiples")

#### Analyze Token Holders Distribution

In [None]:
print("\n🔍 Analyzing Token Holder Shares...")

def calculate_gini_coefficient(balances):
    """Calculate Gini coefficient for a list of token balances"""
    if not balances or len(balances) < 2:
        return np.nan
    balances = np.array(balances)
    balances = balances[balances > 0]  # Exclude zero balances
    if len(balances) == 0:
        return np.nan
    n = len(balances)
    mean = np.mean(balances)
    if mean == 0:
        return np.nan
    diffsum = np.sum([np.abs(balances[i] - balances[j]) for i in range(n) for j in range(i+1, n)])
    gini = diffsum / (n * (n-1) * mean)
    return gini

# Initialize holder analysis
holder_analysis = []

for protocol_key, protocol_info in SOLANA_PROTOCOLS.items():
    metrics = {
        'protocol': protocol_info['name'],
        'total_holders': 0,
        'top_100_share': 0,
        'gini_coefficient': np.nan,
        'top_1000_share': np.nan
    }
    
    # Get top 100 share from consolidated DataFrame
    if not df_consolidated.empty:
        protocol_data = df_consolidated[df_consolidated['protocol'] == protocol_info['name']]
        if not protocol_data.empty:
            metrics['total_holders'] = protocol_data['total_holders_helius'].iloc[0]
            metrics['top_100_share'] = protocol_data['top_100_holders_share'].iloc[0]
    
    # Calculate Gini coefficient and top 1000 share from Helius data
    if protocol_key in helius_data and 'token_accounts' in helius_data[protocol_key]:
        balances = [account['amount'] for account in helius_data[protocol_key]['token_accounts']]
        total_tokens = sum(balances)
        if total_tokens > 0:
            metrics['gini_coefficient'] = calculate_gini_coefficient(balances)
            # Calculate top 1000 share if enough holders
            if len(balances) >= 1000:
                top_1000_balance = sum(sorted(balances, reverse=True)[:1000])
                metrics['top_1000_share'] = (top_1000_balance / total_tokens * 100)
    
    holder_analysis.append(metrics)

df_holder_analysis = pd.DataFrame(holder_analysis)
print("✅ Holder analysis completed:")
print(df_holder_analysis[['protocol', 'total_holders', 'top_100_share', 'top_1000_share', 'gini_coefficient']])

#### Create Visuals

In [None]:
print("\n📊 Creating Visualizations...")

# Interactive bar chart for P/F and P/R ratios using Plotly
if not df_analysis.empty:
    # Melt DataFrame for Plotly
    df_melted = df_analysis.melt(id_vars=['protocol'], value_vars=['pf_ratio', 'pr_ratio'], 
                                 var_name='Ratio Type', value_name='Ratio')
    
    fig = px.bar(df_melted, x='protocol', y='Ratio', color='Ratio Type', barmode='group',
                 title='Price-to-Fees and Price-to-Revenue Ratios',
                 labels={'protocol': 'Protocol', 'Ratio': 'Ratio Value'},
                 height=500)
    fig.update_layout(xaxis_title="Protocol", yaxis_title="Ratio", xaxis_tickangle=45)
    
    pf_ratio_path = os.path.join(vis_dir, f'pf_pr_ratios_{datetime.now().strftime("%Y%m%d_%H%M%S")}.html')
    fig.write_html(pf_ratio_path)
    print(f"💾 Saved P/F and P/R ratios bar chart: {pf_ratio_path}")
else:
    print("⚠️ No data for P/F and P/R ratios visualization")

# Interactive bar chart for Gini coefficients using Plotly
if not df_holder_analysis.empty:
    fig = px.bar(df_holder_analysis, x='protocol', y='gini_coefficient',
                 title='Gini Coefficient by Protocol',
                 labels={'protocol': 'Protocol', 'gini_coefficient': 'Gini Coefficient'},
                 height=500, color='protocol')
    fig.update_layout(xaxis_title="Protocol", yaxis_title="Gini Coefficient", xaxis_tickangle=45)
    
    gini_path = os.path.join(vis_dir, f'gini_coefficient_{datetime.now().strftime("%Y%m%d_%H%M%S")}.html')
    fig.write_html(gini_path)
    print(f"💾 Saved Gini coefficient bar chart: {gini_path}")
else:
    print("⚠️ No data for Gini coefficient visualization")

# Pie charts for top 100 holders share per protocol (static, matplotlib)
for protocol_key, protocol_info in SOLANA_PROTOCOLS.items():
    protocol_name = protocol_info['name']
    protocol_data = df_consolidated[df_consolidated['protocol'] == protocol_name]
    if not protocol_data.empty and protocol_data['top_100_holders_share'].iloc[0] > 0:
        plt.figure(figsize=(8, 8))
        labels = ['Top 100 Holders', 'Other Holders']
        sizes = [protocol_data['top_100_holders_share'].iloc[0], 100 - protocol_data['top_100_holders_share'].iloc[0]]
        colors = ['royalblue', 'lightgray']
        plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
        plt.title(f'Token Holder Distribution: {protocol_name}')
        plt.axis('equal')
        
        pie_path = os.path.join(vis_dir, f'holder_distribution_{protocol_key}_{datetime.now().strftime("%Y%m%d_%H%M%S")}.png')
        plt.savefig(pie_path)
        plt.close()
        print(f"💾 Saved holder distribution pie chart for {protocol_name}: {pie_path}")
    else:
        print(f"⚠️ No holder data for {protocol_name}, skipping pie chart")

#### Save Analysis Results

In [None]:
print("\n💾 Saving Analysis Results...")
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Merge valuation and holder analysis
df_results = df_analysis[['protocol', 'pf_ratio', 'pr_ratio']].merge(
    df_holder_analysis[['protocol', 'total_holders', 'top_100_share', 'top_1000_share', 'gini_coefficient']],
    on='protocol',
    how='outer'
)

if not df_results.empty:
    # Save as joblib and CSV
    results_path = os.path.normpath(f'../data/processed/analysis_results_{timestamp}.joblib')
    joblib.dump(df_results, results_path, compress='zlib')
    print(f"💾 Saved analysis results: {results_path}")
    
    df_results.to_csv(results_path.replace('.joblib', '.csv'), index=False)
    print(f"💾 Saved analysis results CSV: {results_path.replace('.joblib', '.csv')}")
    
    print("\nSample results:")
    print(df_results.head())
else:
    print("⚠️ No analysis results to save")