## Augmento Sentiment

In [1]:
# Summing Bitcoin sentiment data from all 3 platforms including Twitter, Reddit, and Bitcointalk
from collections import defaultdict
import re
import pandas as pd

raw_sentiment = '../raw_data/augmento_btc.csv'

df = pd.read_csv(raw_sentiment, parse_dates=['date'])
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')

# Separate columns by platform
twitter_cols = [col for col in df.columns if col.startswith('twitter_')]
reddit_cols = [col for col in df.columns if col.startswith('reddit_')]
bitcointalk_cols = [col for col in df.columns if col.startswith('bitcointalk_')]
other_cols = [col for col in df.columns if col not in twitter_cols + reddit_cols + bitcointalk_cols]

# List out the end part of the column names
def get_column_endings(columns):
    endings = defaultdict(list)
    for col in columns:
        match = re.search(r'_(\w+)$', col)
        if match:
            endings[match.group(1)].append(col)
    return endings

# Return endings for each platform
twitter_endings = get_column_endings(twitter_cols)
reddit_endings = get_column_endings(reddit_cols)
bitcointalk_endings = get_column_endings(bitcointalk_cols)

# Store all unique endings
all_endings = set(twitter_endings.keys()).union(set(reddit_endings.keys()), set(bitcointalk_endings.keys()))
# Print length of unique endings
print(f"Number of unique endings: {len(all_endings)}")

def sum_platform_columns(df, endings):
    """
    Sums values across platforms (twitter, reddit, bitcointalk) for columns with the same endings.

    Args:
        df (pd.DataFrame): Input dataframe containing platform-specific columns
        endings (set): Set of unique column endings

    Returns:
        pd.DataFrame: Dataframe with summed values across platforms for each ending
    """
    result_df = pd.DataFrame()

    for ending in endings:
        # Find all columns across platforms with this ending
        columns = [col for col in df.columns if col.endswith(f"_{ending}")]
        if columns:
            # Sum values across platforms
            result_df[ending] = df[columns].sum(axis=1)

    return result_df

# Apply the function to sum columns across platforms
summed_df = sum_platform_columns(df, all_endings)

# Concatenate the summed columns with other_cols
summed_df = pd.concat([df['date'], summed_df], axis=1)

# Convert hourly data to daily frequency
# First set date as index for resampling
summed_df.set_index('date', inplace=True)
# Resample to daily frequency taking the sum of each day
summed_df = summed_df.resample('D').sum()
# Reset index to keep date as a column
summed_df.reset_index(inplace=True)

summed_df = summed_df.reindex(sorted(summed_df.columns), axis=1)

# Move date column to front if it exists
if 'date' in summed_df.columns:
    cols = list(summed_df.columns)
    cols.remove('date')
    summed_df = summed_df[['date'] + cols]

# Save the new dataframe to a CSV file
summed_df.to_csv('../data/bitcoin_sentiment.csv', index=False)
print("Summed dataframe saved to '../data/bitcoin_sentiment.csv'")

Number of unique endings: 93
Summed dataframe saved to '../data/bitcoin_sentiment.csv'


In [None]:
# Combine Sentiment Features
# Define column groups
column_groups = {
    'positive_sentiment': ['positive', 'happy', 'optimistic', 'hopeful', 'euphoric_excited'],
    'negative_sentiment': ['negative', 'sad', 'pessimistic_doubtful', 'mistrustful', 'angry', 'annoyed_frustrated', 'fearful_concerned'],
    'bullish_sentiment': ['bullish'],
    'bearish_sentiment': ['bearish'],
    'risk_uncertainty_sentiment': ['risk', 'uncertain', 'warning', 'fud_theme', 'bubble correction', 'rumor'],
    'problem_malicious_sentiment': ['hacks', 'scam_fraud', 'ban', 'problems_and_issues', 'bug', 'market_manipulation', 'pump_and_dump'],
    'active_trading_sentiment': ['buying', 'selling', 'short_term_trading', 'dip', 'leverage', 'going_short', 'technical_analysis', 'prediction'],
    'long_term_investment_sentiment': ['long_term_investing', 'hodling', 'strategy', 'investing_trading', 'portfolio', 'advice_support', 'learning', 'due_diligence'],
    'market_narrative_sentiment': ['bottom', 'cheap', 'fomo', 'fomo_theme', 'whales'],
    'core_technology_sentiment': ['technology', 'scaling', 'governance', 'de_centralisation', 'open_source', 'token_economics', 'whitepaper', 'security'],
    'development_ecosystem_sentiment': ['progress', 'roadmap', 'fork', 'mining', 'wallet', 'stablecoin', 'ico', 'competition', 'use_case_applications', 'adoption', 'team'],
    'news_events_sentiment': ['launch', 'announcements', 'partnerships', 'listing', 'airdrop', 'rebranding', 'good_news', 'bad_news'],
    'regulations_sentiment': ['regulation_politics', 'tax', 'etf', 'institutional_money', 'banks'],
    'community_social_sentiment': ['community', 'marketing', 'shilling', 'bots', 'waiting'],
    'price_sentiment': ['price'],
    'volume_sentiment': ['volume'],
    'marketcap_sentiment': ['marketcap']
}

def combine_features(input_file, output_file):
    # Read the CSV file with Date column as datetime
    df = pd.read_csv(input_file, parse_dates=['date'])
    
    # Create a new dataframe for grouped features
    grouped_features = pd.DataFrame()
    
    # Keep the Date column
    grouped_features['Date'] = df['date']
    
    # Calculate mean for each group
    for group_name, columns in column_groups.items():
        # Filter out columns that don't exist in the dataframe
        existing_columns = [col for col in columns if col in df.columns]
        if existing_columns:
            grouped_features[group_name] = df[existing_columns].mean(axis=1)
    
    # Save the grouped features to a new CSV file
    grouped_features.to_csv(output_file, index=False)
    print(f"Grouped features saved to {output_file}")

input_file = "./raw_data/bitcoin_sentiment.csv"  # Replace with your input CSV file path
output_file = "./data/sentiment_grouped.csv"  # Output file path
combine_features(input_file, output_file)

## Bull Run Index (CBBI)

In [None]:
import json
import csv
import datetime

with open("data/Bitcoin Bull Run Index (CBBI).json", "r") as file:
    data = file.read()
# Load JSON
data = json.loads(data)

def extract_and_write_cbbi_data(json_data, csv_path):
    """Extracts multiple indicators from CBBI JSON data and writes them to a CSV file.

    Args:
        json_data (dict): The loaded JSON data containing indicator time series.
        csv_path (str): The file path for the output CSV.

    Raises:
        KeyError: If any expected indicator is missing from the JSON data.
        ValueError: If timestamps are not consistent across indicators.
    """
    indicators = [
        "Price", "PiCycle", "RUPL", "RHODL", "Puell",
        "2YMA", "Trolololo", "MVRV", "ReserveRisk", "Woobull", "Confidence"
    ]

    # Ensure all indicators exist in the data
    for indicator in indicators:
        if indicator not in json_data:
            raise KeyError(f"Indicator '{indicator}' not found in JSON data.")

    # Get all timestamps (assume all indicators have the same timestamps)
    timestamps = set(json_data[indicators[0]].keys())
    for indicator in indicators[1:]:
        if set(json_data[indicator].keys()) != timestamps:
            raise ValueError(f"Timestamps mismatch in indicator '{indicator}'.")

    # Sort timestamps for chronological order
    sorted_timestamps = sorted(timestamps, key=int)

    # Write to CSV
    with open(csv_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Write header
        header = ["Date"] + indicators
        writer.writerow(header)

        for timestamp in sorted_timestamps:
            date = datetime.datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d')
            row = [date]
            for indicator in indicators:
                value = json_data[indicator][timestamp]
                row.append(value)
            writer.writerow(row)

# Usage
extract_and_write_cbbi_data(data, 'data/Bitcoin Bull Run Index (CBBI).csv')