In [24]:
"""
Santiment API Client
Retrieve social volume, weighted sentiment, exchange flows, addresses, MVRV, and NPL data
"""

import requests
import pandas as pd
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Union
import logging
import pytz
from pydantic import BaseModel
from google.colab import userdata

API_KEY = userdata.get('SANTIMENT_API_KEY')

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SantimentData(BaseModel):
    """Data model for Santiment metrics"""
    slug: str
    metric: str
    datetime: datetime
    value: Union[float, int, None]
    metadata: Optional[Dict] = {}

class SantimentClient:
    """Client for Santiment API"""

    def __init__(self, api_key: str):
        """
        Initialize Santiment client

        Args:
            api_key: Santiment API key
        """
        self.api_key = api_key
        self.base_url = "https://api.santiment.net/graphql"
        self.session = requests.Session()
        self.session.headers.update({
            'Authorization': f'Apikey {api_key}',
            'Content-Type': 'application/json'
        })

        # Available metrics - ONLY the ones you specified (corrected based on API errors)
        self.metrics = {
            'daily_active_addresses': 'daily_active_addresses',
            'circulation': 'circulation',
            'mvrv_usd': 'mvrv_usd_intraday',
            'whale_transaction_count': 'whale_transaction_count_1m_usd_to_inf',
            'transaction_volume_in_profit_or_loss': 'transaction_volume_in_profit',
            'mean_dollar_invested_age': 'mean_dollar_invested_age',
            'npl': 'network_profit_loss',
            'supply_on_exchanges': 'supply_on_exchanges',
            'exchange_flow_balance': 'exchange_balance',
            'social_volume': 'social_volume_total',
            'social_dominance': 'social_dominance_total',
            'weighted_sentiment': 'sentiment_balance_total',
            'market_cap_usd': 'marketcap_usd'
        }

    def query_graphql(self, query: str) -> Dict:
        """Execute GraphQL query"""
        try:
            response = self.session.post(
                self.base_url,
                json={'query': query},
                timeout=30
            )
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"GraphQL query failed: {e}")
            raise

    def get_metric_data(
        self,
        slug: str,
        metric: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """
        Get metric data for a specific asset

        Args:
            slug: Asset slug (e.g., 'bitcoin', 'ethereum')
            metric: Metric name from self.metrics
            from_date: Start date
            to_date: End date
            interval: Time interval ('1h', '1d', '7d')
        """

        # Convert metric name to Santiment metric
        santiment_metric = self.metrics.get(metric, metric)

        # Format dates
        from_str = from_date.strftime('%Y-%m-%dT%H:%M:%SZ')
        to_str = to_date.strftime('%Y-%m-%dT%H:%M:%SZ')

        query = f"""
        {{
            getMetric(metric: "{santiment_metric}") {{
                timeseriesData(
                    slug: "{slug}"
                    from: "{from_str}"
                    to: "{to_str}"
                    interval: "{interval}"
                ) {{
                    datetime
                    value
                }}
            }}
        }}
        """

        try:
            result = self.query_graphql(query)

            if 'errors' in result:
                logger.error(f"GraphQL errors: {result['errors']}")
                return []

            data_points = result.get('data', {}).get('getMetric', {}).get('timeseriesData', [])

            santiment_data = []
            for point in data_points:
                if point['value'] is not None:
                    santiment_data.append(SantimentData(
                        slug=slug,
                        metric=metric,
                        datetime=datetime.fromisoformat(point['datetime'].replace('Z', '+00:00')),
                        value=point['value'],
                        metadata={'santiment_metric': santiment_metric, 'interval': interval}
                    ))

            logger.info(f"Retrieved {len(santiment_data)} data points for {slug} {metric}")
            return santiment_data

        except Exception as e:
            logger.error(f"Error retrieving {metric} for {slug}: {e}")
            return []

    def get_social_volume(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get social volume data"""
        return self.get_metric_data(slug, 'social_volume', from_date, to_date, interval)

    def get_weighted_sentiment(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get weighted sentiment data"""
        return self.get_metric_data(slug, 'weighted_sentiment', from_date, to_date, interval)

    def get_exchange_flows(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> Dict[str, List[SantimentData]]:
        """Get exchange inflow and outflow data"""
        inflow = self.get_metric_data(slug, 'exchange_inflow', from_date, to_date, interval)
        outflow = self.get_metric_data(slug, 'exchange_outflow', from_date, to_date, interval)

        return {
            'inflow': inflow,
            'outflow': outflow
        }

    def get_address_metrics(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> Dict[str, List[SantimentData]]:
        """Get address creation and usage metrics"""
        active = self.get_metric_data(slug, 'active_addresses', from_date, to_date, interval)
        new = self.get_metric_data(slug, 'new_addresses', from_date, to_date, interval)

        return {
            'active_addresses': active,
            'new_addresses': new
        }

    def get_mvrv(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get MVRV (Market Value to Realized Value) data"""
        return self.get_metric_data(slug, 'mvrv_usd', from_date, to_date, interval)

    def get_npl(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Network Profit/Loss data"""
        return self.get_metric_data(slug, 'npl', from_date, to_date, interval)

    def get_daily_active_addresses(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Daily Active Addresses data"""
        return self.get_metric_data(slug, 'daily_active_addresses', from_date, to_date, interval)

    def get_circulation(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Circulation data"""
        return self.get_metric_data(slug, 'circulation', from_date, to_date, interval)

    def get_funding_rate(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Funding Rate data"""
        return self.get_metric_data(slug, 'funding_rate', from_date, to_date, interval)

    def get_whale_transaction_count(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Whale Transaction Count data"""
        return self.get_metric_data(slug, 'whale_transaction_count', from_date, to_date, interval)

    def get_supply_distribution(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Supply Distribution data"""
        return self.get_metric_data(slug, 'supply_distribution', from_date, to_date, interval)

    def get_transaction_volume_in_profit_or_loss(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Transaction Volume in Profit or Loss data"""
        return self.get_metric_data(slug, 'transaction_volume_in_profit_or_loss', from_date, to_date, interval)

    def get_mean_dollar_invested_age(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Mean Dollar Invested Age data"""
        return self.get_metric_data(slug, 'mean_dollar_invested_age', from_date, to_date, interval)

    def get_supply_on_exchanges(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Supply on Exchanges data"""
        return self.get_metric_data(slug, 'supply_on_exchanges', from_date, to_date, interval)

    def get_exchange_flow_balance(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Exchange Flow Balance data"""
        return self.get_metric_data(slug, 'exchange_flow_balance', from_date, to_date, interval)

    def get_social_dominance(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Social Dominance data"""
        return self.get_metric_data(slug, 'social_dominance', from_date, to_date, interval)

    def get_market_cap_usd(
        self,
        slug: str,
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> List[SantimentData]:
        """Get Market Cap USD data"""
        return self.get_metric_data(slug, 'market_cap_usd', from_date, to_date, interval)

    def get_all_metrics(
        self,
        slugs: List[str],
        from_date: datetime,
        to_date: datetime,
        interval: str = '1d'
    ) -> Dict[str, Dict[str, List[SantimentData]]]:
        """
        Get all available metrics for multiple assets using a single GraphQL query

        Args:
            slugs: List of asset slugs (e.g., ['bitcoin', 'ethereum'])
            from_date: Start date
            to_date: End date
            interval: Time interval ('1h', '1d', '7d')

        Returns:
            Dictionary with structure: {slug: {metric: [SantimentData]}}
        """
        logger.info(f"Fetching all metrics for {len(slugs)} assets from {from_date} to {to_date}")

        # Format dates
        from_str = from_date.strftime('%Y-%m-%dT%H:%M:%SZ')
        to_str = to_date.strftime('%Y-%m-%dT%H:%M:%SZ')

        # Build single GraphQL query for all slugs and all metrics
        query_parts = []

        for slug in slugs:
            for metric_key, santiment_metric in self.metrics.items():
                query_parts.append(f"""
                {slug}_{metric_key}: getMetric(metric: "{santiment_metric}") {{
                    timeseriesData(
                        slug: "{slug}"
                        from: "{from_str}"
                        to: "{to_str}"
                        interval: "{interval}"
                    ) {{
                        datetime
                        value
                    }}
                }}""")

        query = f"{{{''.join(query_parts)}}}"

        try:
            result = self.query_graphql(query)

            if 'errors' in result:
                logger.error(f"GraphQL errors: {result['errors']}")
                return {}

            # Parse results into structured format
            all_data = {}

            for slug in slugs:
                all_data[slug] = {}

                for metric_key in self.metrics.keys():
                    query_key = f"{slug}_{metric_key}"
                    data_points = result.get('data', {}).get(query_key, {}).get('timeseriesData', [])

                    santiment_data = []
                    for point in data_points:
                        if point['value'] is not None:
                            santiment_data.append(SantimentData(
                                slug=slug,
                                metric=metric_key,
                                datetime=datetime.fromisoformat(point['datetime'].replace('Z', '+00:00')),
                                value=point['value'],
                                metadata={'santiment_metric': self.metrics[metric_key], 'interval': interval}
                            ))

                    all_data[slug][metric_key] = santiment_data

            # Summary
            total_points = sum(
                len(data)
                for slug_data in all_data.values()
                for data in slug_data.values()
            )
            logger.info(f"Retrieved {total_points} total data points across {len(slugs)} assets and {len(self.metrics)} metrics")

            return all_data

        except Exception as e:
            logger.error(f"Error retrieving all metrics: {e}")
            return {}

    def to_dataframe(self, data: List[SantimentData]) -> pd.DataFrame:
        """Convert SantimentData list to pandas DataFrame"""
        if not data:
            return pd.DataFrame()

        df_data = []
        for item in data:
            df_data.append({
                'slug': item.slug,
                'metric': item.metric,
                'datetime': item.datetime,
                'value': item.value,
                'santiment_metric': item.metadata.get('santiment_metric'),
                'interval': item.metadata.get('interval')
            })

        df = pd.DataFrame(df_data)
        df['datetime'] = pd.to_datetime(df['datetime'])
        df = df.set_index('datetime')

        return df

    def get_available_assets(self) -> List[str]:
        """Get list of available asset slugs"""
        query = """
        {
            allProjects {
                slug
                name
                ticker
            }
        }
        """

        try:
            result = self.query_graphql(query)
            projects = result.get('data', {}).get('allProjects', [])

            asset_list = []
            for project in projects:
                asset_list.append({
                    'slug': project['slug'],
                    'name': project['name'],
                    'ticker': project['ticker']
                })

            logger.info(f"Found {len(asset_list)} available assets")
            return asset_list

        except Exception as e:
            logger.error(f"Error retrieving available assets: {e}")
            return []

    def get_metric_intervals(self, slug: str) -> pd.DataFrame:
        """
        Brute-force all metrics and common intervals to see which are available
        for a given asset (trial key compatible). Returns a DataFrame with
        metrics as rows, intervals as columns, and ‚úÖ/‚ùå.
        """
        intervals_to_test = ['1m', '5m', '15m', '30m', '1h', '6h', '12h', '1d', '7d']
        metrics = list(self.metrics.keys())

        results = []

        # Use a short date range to stay within trial limits
        to_date = datetime.now() - timedelta(days=30)
        from_date = to_date - timedelta(days=7)

        for metric in metrics:
            row = {'metric': metric}
            for interval in intervals_to_test:
                try:
                    data = self.get_metric_data(slug, metric, from_date, to_date, interval)
                    row[interval] = '‚úÖ' if data else '‚ùå'
                except Exception:
                    row[interval] = '‚ùå'
            results.append(row)

        df = pd.DataFrame(results)
        df.set_index('metric', inplace=True)
        return df


In [27]:
# Initialize client
client = SantimentClient(API_KEY)

# Time range
to_date = datetime.utcnow() - timedelta(days=60)     # 2 months ago
from_date = datetime.utcnow() - timedelta(days=240)  # 8 months ago

slug = "bitcoin"

print("üì° Fetching Bitcoin metrics...")

# Fetch price (via market_cap_usd)
price_data = client.get_market_cap_usd(slug, from_date, to_date, interval="1d")
price_df = client.to_dataframe(price_data)[["value"]].rename(columns={"value": "price"})

metric_dfs = {"price": price_df}

# Fetch all other metrics available in the client
for metric in client.metrics.keys():
    if metric == "market_cap_usd":
        continue  # Already used as price

    print(f" ‚Üí Fetching {metric} ...")
    data = client.get_metric_data(slug, metric, from_date, to_date, interval="1d")
    df = client.to_dataframe(data)
    print(f"   Returned shape for metric {metric}: {df.shape}")

    if df.empty:
        print(f"   ‚ö†Ô∏è No data returned for {metric}")
        continue

    df = df[["value"]].rename(columns={"value": metric})
    metric_dfs[metric] = df

# Merge all metrics on datetime
print("\nüîÑ Merging all time series...")
merged_df = pd.concat(metric_dfs.values(), axis=1, join="inner")

print(f"üìä Final dataset shape: {merged_df.shape}")

# -----------------------------
# FULL CORRELATION MATRIX
# -----------------------------
full_corr_matrix = merged_df.corr()

# -----------------------------
# CORRELATION vs PRICE (sorted)
# -----------------------------
corr_vs_price = full_corr_matrix[["price"]].sort_values("price", ascending=False)

print("\n===== üìà Correlation vs Bitcoin Price (sorted) =====\n")
display(corr_vs_price)

print("\n===== üîç Full Correlation Matrix (all variables vs each other) =====\n")
display(full_corr_matrix)


  to_date = datetime.utcnow() - timedelta(days=60)     # 2 months ago
  from_date = datetime.utcnow() - timedelta(days=240)  # 8 months ago


üì° Fetching Bitcoin metrics...
 ‚Üí Fetching daily_active_addresses ...
   Returned shape for metric daily_active_addresses: (181, 5)
 ‚Üí Fetching circulation ...
   Returned shape for metric circulation: (181, 5)
 ‚Üí Fetching mvrv_usd ...
   Returned shape for metric mvrv_usd: (181, 5)
 ‚Üí Fetching whale_transaction_count ...
   Returned shape for metric whale_transaction_count: (181, 5)
 ‚Üí Fetching transaction_volume_in_profit_or_loss ...
   Returned shape for metric transaction_volume_in_profit_or_loss: (181, 5)
 ‚Üí Fetching mean_dollar_invested_age ...
   Returned shape for metric mean_dollar_invested_age: (181, 5)
 ‚Üí Fetching npl ...
   Returned shape for metric npl: (181, 5)
 ‚Üí Fetching supply_on_exchanges ...
   Returned shape for metric supply_on_exchanges: (181, 5)
 ‚Üí Fetching exchange_flow_balance ...
   Returned shape for metric exchange_flow_balance: (181, 5)
 ‚Üí Fetching social_volume ...
   Returned shape for metric social_volume: (181, 5)
 ‚Üí Fetching soc

Unnamed: 0,price
price,1.0
circulation,0.806458
mvrv_usd,0.503916
whale_transaction_count,0.38683
social_volume,0.150983
social_dominance,0.139093
transaction_volume_in_profit_or_loss,0.126774
npl,0.110791
exchange_flow_balance,0.108293
daily_active_addresses,0.071629



===== üîç Full Correlation Matrix (all variables vs each other) =====



Unnamed: 0,price,daily_active_addresses,circulation,mvrv_usd,whale_transaction_count,transaction_volume_in_profit_or_loss,mean_dollar_invested_age,npl,supply_on_exchanges,exchange_flow_balance,social_volume,social_dominance,weighted_sentiment
price,1.0,0.071629,0.806458,0.503916,0.38683,0.126774,-0.837511,0.110791,-0.748142,0.108293,0.150983,0.139093,-0.057272
daily_active_addresses,0.071629,1.0,-0.075291,0.254456,0.641221,0.643959,0.035446,0.385391,0.009117,-0.159348,0.517036,0.013714,0.470653
circulation,0.806458,-0.075291,1.0,-0.086154,0.294952,-0.050461,-0.843303,-0.047086,-0.740375,0.110329,-0.272444,-0.160234,-0.223137
mvrv_usd,0.503916,0.254456,-0.086154,1.0,0.253941,0.307618,-0.105882,0.25229,-0.110541,0.020518,0.649985,0.383076,0.289428
whale_transaction_count,0.38683,0.641221,0.294952,0.253941,1.0,0.640413,-0.263573,0.372338,-0.226475,-0.078309,0.539578,0.087386,0.460538
transaction_volume_in_profit_or_loss,0.126774,0.643959,-0.050461,0.307618,0.640413,1.0,0.006285,0.658952,-0.017311,-0.276089,0.576078,0.08047,0.525463
mean_dollar_invested_age,-0.837511,0.035446,-0.843303,-0.105882,-0.263573,0.006285,1.0,-0.06974,0.952158,-0.066185,0.064006,-0.064212,0.291787
npl,0.110791,0.385391,-0.047086,0.25229,0.372338,0.658952,-0.06974,1.0,-0.079778,-0.565855,0.413281,0.131161,0.242197
supply_on_exchanges,-0.748142,0.009117,-0.740375,-0.110541,-0.226475,-0.017311,0.952158,-0.079778,1.0,-0.030395,0.023989,-0.063149,0.270213
exchange_flow_balance,0.108293,-0.159348,0.110329,0.020518,-0.078309,-0.276089,-0.066185,-0.565855,-0.030395,1.0,-0.032762,0.079706,-0.150322
