In [4]:
import pandas as pd
from datetime import datetime, timedelta
from binance.client import Client
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Binance API credentials
api_key = 'XXX'
api_secret = 'YYY'

# Constants. Start Hour (00:00) is setting to retrieve data upto midnight last day. Hours to subtract is the look back duration. If 1 day is 24hr + 1
HOURS_TO_SUBTRACT = 25
START_HOUR = 0

# Initialize Binance client
client = Client(api_key, api_secret)

# Function to fetch historical data for a symbol
def fetch_symbol_data(symbol, start_time, end_time):
    # Fetch historical klines data for the specified symbol
    klines = client.get_historical_klines(
        symbol=symbol,
        interval=Client.KLINE_INTERVAL_1HOUR,
        start_str=start_time.strftime('%Y-%m-%d %H:%M:%S'),
        end_str=end_time.strftime('%Y-%m-%d %H:%M:%S'),
    )
    # Extract relevant data and create a DataFrame
    data = [[symbol, pd.to_datetime(kline[0], unit='ms'), float(kline[4]), float(kline[5])] for kline in klines]
    df = pd.DataFrame(data, columns=['Symbol', 'DataTime', 'ClosingPrice', 'Volume'])
    return df

def calculate_hourly_data(symbols):
    # Define the start and end time for data retrieval
    end_time = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
    start_time = end_time - timedelta(hours=HOURS_TO_SUBTRACT, minutes=0, seconds=0, microseconds=0)

    # Fetch and combine data for all symbols
    data_list = [fetch_symbol_data(symbol, start_time, end_time) for symbol in symbols]
    all_data_df = pd.concat(data_list, ignore_index=True)
    all_data_df['DataTime'] = pd.to_datetime(all_data_df['DataTime'])

    # Adjust the timestamp for 00:00 to the previous minute
    all_data_df = adjust_timestamp(all_data_df)

    # Calculate returns and remove rows with NaN returns
    all_data_df = calculate_returns(all_data_df)

    # Extract the hour from the timestamp
    all_data_df['Hour'] = all_data_df['DataTime'].dt.strftime('%H:%M')

    # Remove the first row (00:00) for each symbol
    all_data_df = remove_first_row(all_data_df)

    # Aggregate data on an hourly basis
    hourly_data_df = aggregate_hourly_data(all_data_df)

    # Pivot the data for returns and volume percentages
    returns_df, volumes_percentage_df = pivot_data(hourly_data_df)

    return returns_df, volumes_percentage_df

def adjust_timestamp(df):
    # Adjust the timestamp for 00:00 to the previous minute
    df.loc[df['DataTime'].dt.hour == START_HOUR, 'DataTime'] -= timedelta(minutes=1)
    return df

def calculate_returns(df):
    # Calculate returns and remove rows with NaN returns
    df['Return'] = df.groupby('Symbol')['ClosingPrice'].pct_change()
    df = df.dropna()
    return df

def remove_first_row(df):
    # Remove the first row (00:00) for each symbol
    df = df.groupby('Symbol').apply(lambda x: x.iloc[1:]).reset_index(drop=True)
    return df

def aggregate_hourly_data(df):
    # Aggregate data on an hourly basis
    hourly_data_df = df.groupby(['Symbol', 'Hour']).agg({'ClosingPrice': 'last', 'Volume': 'mean', 'Return': 'mean'}).reset_index()
    hourly_data_df['Return'] *= 100
    hourly_data_df['TotalVolume'] = hourly_data_df.groupby(['Symbol'])['Volume'].transform('sum')
    hourly_data_df['VolumePercentage'] = (hourly_data_df['Volume'] / hourly_data_df['TotalVolume']) * 100

    # Adjust the hour for rows with 23:59
    hourly_data_df.loc[hourly_data_df['Hour'] == datetime.strptime('23:59', '%H:%M').time(), 'Hour'] = (datetime.strptime('23:59', '%H:%M') - timedelta(days=1)).time()
    hourly_data_df['Hour'] = pd.to_datetime(hourly_data_df['Hour'], format='%H:%M').dt.time

    return hourly_data_df

def pivot_data(df):
    # Pivot the data for returns and volume percentages
    returns_df = df.pivot(index='Hour', columns='Symbol', values='Return')
    volumes_percentage_df = df.pivot(index='Hour', columns='Symbol', values='VolumePercentage')
    return returns_df, volumes_percentage_df


# List of symbols to retrieve data for
symbols = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT']

# Calculate hourly data for the specified symbols
returns_df, volumes_percentage_df = calculate_hourly_data(symbols)

# Create a PDF file to save the heatmaps
with PdfPages('1 Day.pdf') as pdf:
    # Create a 1x2 grid for subplots
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 8))

    # Generate heatmap for returns DataFrame with reversed color order
    sns.heatmap(returns_df[symbols], cmap='RdBu', annot=True, fmt='.2f', cbar=False, xticklabels=symbols, ax=axes[0], annot_kws={'size': 9})

    # Set the y-axis tick labels to show only hour and minute
    axes[0].set_yticklabels(returns_df.index, rotation=0)  # No need for strftime here

    # Remove the gridlines from Y-axis tickers for the first subplot
    axes[0].tick_params(axis='y', which='both', length=0, pad=10, labelsize=9)

    # Show X-axis tick labels on top
    axes[0].xaxis.tick_top()

    # Remove the gridlines on the X-axis tickers for the first subplot
    axes[0].tick_params(axis='x', which='both', length=0, labelsize=9)

    # Set font size for annotations
    for _, _, cell in np.array(axes[0].figure.get_children())[15:]:
        cell.set_fontsize(8)  # Adjust the font size of the annotations

    # Remove y-axis and x-axis titles for the first subplot
    axes[0].set_ylabel('')
    axes[0].set_xlabel('')

    # Set title and add space between title and heatmap
    axes[0].set_title('Hourly Returns [ 1 day ]', pad=30, fontsize=11)

    # Generate heatmap for volumes percentage DataFrame with custom color range (white to blue) and without color bar
    cmap = sns.color_palette("Blues", as_cmap=True)
    heatmap_volumes_percentage = sns.heatmap(volumes_percentage_df[symbols], cmap=cmap, annot=True, fmt='.1f', cbar=False, xticklabels=symbols, ax=axes[1], annot_kws={'size': 9})

    # Remove y-axis tick labels
    heatmap_volumes_percentage.set_yticklabels([])

    # Remove the gridlines from Y-axis tickers for the second subplot
    axes[1].tick_params(axis='y', which='both', length=0, labelsize=9)

    # Show X-axis tick labels on top
    axes[1].xaxis.tick_top()

    # Remove the gridlines on the X-axis tickers for the second subplot
    axes[1].tick_params(axis='x', which='both', length=0, labelsize=9)

    # Set font size for annotations
    for _, _, cell in np.array(axes[1].figure.get_children())[15:]:
        cell.set_fontsize(8)  # Adjust the font size of the annotations

    # Adjust the left margin to provide more space for the dates
    plt.subplots_adjust(left=0.2)

    # Remove y-axis and x-axis titles for the first subplot
    axes[1].set_ylabel('')
    axes[1].set_xlabel('')

    # Set title and add space between title and heatmap
    axes[1].set_title('Hourly Volumes [ 1 day ]', pad=30, fontsize=11)

    # Add footnotes to the PDF
    footnote_text = "*Figures are in percentage, i.e., 2.4 = 2.4%\n**Volume table illustrates the hourly percentage of total vol per day"
    fig.text(0.5, 0.05, footnote_text, ha='center', fontsize=9)

    # Save the figure to the PDF
    pdf.savefig()

    # Close the current figure
    plt.close()

