In [1]:
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
import pandas as pd
import json
from tqdm import tqdm

_transport = RequestsHTTPTransport(
    url=url1,
    use_json=True,
)

client = Client(
    transport=_transport,
    fetch_schema_from_transport=True,
)

import pandas as pd
from urllib3.exceptions import IncompleteRead
import time

## DATA

In [80]:
# Create an empty DataFrame
df = pd.DataFrame()

# Iterate over eras from 1 to 1125
for era_id in tqdm(range(1, 1126)):
    # Construct the query for the current era
    query = '''
    query MyQuery {
      eraNominations(where: {era: {id_eq: "%d"}}) {
        nominatorId
        validator {
          nominatorsCount
          selfBonded
          totalBonded
          validatorId
        }
        id
        amount
      }
    }
    ''' % era_id

    # Retry loop for handling incomplete reads
    retries = 20
    while retries > 0:
        try:
            # Execute the query and get the result
            result = client.execute(gql(query))
            break
        except IncompleteRead as e:
            print(f"IncompleteRead error occurred, retrying... {retries} retries left")
            retries -= 1
            time.sleep(10)

    # Check if the result contains the 'eraNominations' key
    if 'eraNominations' in result:
        era_nominations = result['eraNominations']

        # Convert the nominations data to a DataFrame
        era_df = pd.DataFrame(era_nominations)

        # Split the 'validator' column into separate columns
        validator_df = era_df['validator'].apply(pd.Series)
        era_df = pd.concat([era_df.drop(['validator'], axis=1), validator_df], axis=1)

        # Add the era_id column to the DataFrame
        era_df['era_id'] = era_id

        # Concatenate the current era DataFrame with the main DataFrame
        df = pd.concat([df, era_df], ignore_index=True)

100%|██████████| 183/183 [33:16<00:00, 10.91s/it]


In [88]:
# final_df.to_csv('nominator_data_full.csv')

In [None]:
df = pd.read_pickle('nominator_data_full.pkl')

In [85]:
df.era_id.nunique()

1125

In [86]:
df.validatorId.nunique()

1185

In [87]:
df.nominatorId.nunique()

80709

In [89]:
len(final_df)

19157515

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display
from ipywidgets import interact, IntSlider

# Load data
#df = pd.read_csv('your_dataset.csv')

# For the purpose of example, I'll create a random dataframe.
np.random.seed(0)
df = pd.DataFrame({
    'nominatorId': np.random.choice(range(80709), 1000),
    'validatorId': np.random.choice(range(1185), 1000),
    'totalBonded': np.random.rand(1000),
})

# Create a pivot table
pivot_table = df.pivot_table(values='totalBonded', index='nominatorId', columns='validatorId')

@interact
def plot_heatmap(topk=IntSlider(min=1, max=50, value=10)):
    # Select top k nominators and validators
    top_nominators = pivot_table.sum(axis=1).nlargest(topk).index
    top_validators = pivot_table.sum(axis=0).nlargest(topk).index

    # Select corresponding rows and columns from pivot table
    subset = pivot_table.loc[top_nominators, top_validators]

    # Plot heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(subset, cmap='viridis')
    plt.title(f"Heatmap for Top {topk} Validators and Nominators")
    plt.xlabel("Validators")
    plt.ylabel("Nominators")
    plt.show()

In [109]:
df_agg = final_df

In [None]:
import plotly.graph_objects as go

# Create a list of labels (nominatorId and validatorId)
labels = list(df_agg['nominatorId'].unique()) + list(df_agg['validatorId'].unique())

# Create a list of colors for the labels, here for simplicity, nominators are colored blue and validators red
colors = ['blue'] * len(df_agg['nominatorId'].unique()) + ['red'] * len(df_agg['validatorId'].unique())

# Create a dictionary mapping for the labels
label_dict = {label: idx for idx, label in enumerate(labels)}

# Prepare source, target and value lists
source = df_agg['nominatorId'].map(label_dict)
target = df_agg['validatorId'].map(label_dict)
value = df_agg['amount']

# Create a Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color='black', width=0.5),
        label=labels,
        color=colors
    ),
    link=dict(
        source=source,  # indices correspond to labels
        target=target,  # indices correspond to labels
        value=value
    )
)])

fig.update_layout(title_text='Flow of Amounts from Nominators to Validators', font_size=10)
fig.show()


In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import imageio

# Preprocess the data: normalize 'amount' for each era
df['normalized_amount'] = df.groupby('era_id')['amount'].apply(lambda x: x / x.max())

# Get the list of eras
eras = df['era_id'].unique()

# Create a directory for storing images if not exists
import os
if not os.path.exists('images'):
    os.makedirs('images')

# Create and save a heatmap for each era
for era in eras:
    # Get the data for the era
    df_era = df[df['era_id'] == era]

    # Create a pivot table
    pivot_table = df_era.pivot_table(values='normalized_amount', index='nominatorId', columns='validatorId', fill_value=0)

    # Plot heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(pivot_table, cmap='viridis')
    plt.title(f"Era {era}")

    # Save the heatmap as an image
    plt.savefig(f'images/heatmap_{era}.png')
    plt.close()

# Create a video from images
images = [imageio.imread(f'images/heatmap_{era}.png') for era in eras]
imageio.mimsave('heatmap_video.gif', images, fps=10)
