## Loading data

### Imports

In [None]:
from datetime import datetime
import sys
import os
import plotly.io as pio
import pandas as pd # type: ignore
import random
pio.renderers.default = "png"
sys.path.append('../')
import xarray as xr # type: ignore
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns # type: ignore
import json

### Reading the voting data into a dataframe

In [None]:
from nqg_model.experiment import full_historical_data_run_plus_counterfactual
from nqg_model.load_data import retrieve_id_maps, retrieve_vote_data
from helper_functions import generate_users_file, generate_submissions_file
## Simulation Originated Data
round_no = 27
folder_path = f'../data/r{round_no}/'

votes = pd.read_csv(folder_path + 'votes.csv')

# if users.csv not in folder_path, generate it
if not os.path.exists(folder_path + 'users.csv'):
    generate_users_file(votes, folder_path)

# if submissions.csv not in folder_path, generate it
if not os.path.exists(folder_path + 'submissions.csv'):
    generate_submissions_file(votes, folder_path)



In [None]:
maps = retrieve_id_maps(folder_path)

(sim_backtest_df, df) = full_historical_data_run_plus_counterfactual(folder_path=folder_path, round_no=round_no)
sim_df = pd.concat([sim_backtest_df], ignore_index=True)

df.rename(columns={'user': 'user_ref', 'project': 'submission', 'vote_type': 'vote_type'}, inplace=True)

# fixing seed for reproducibility
random.seed(42)


In [None]:
# extracting the neuron power tensor to render sim_df quickly
neuron_power_tensor_df = sim_df[['neuron_power_tensor', 'label']]
sim_df.drop(columns=['neuron_power_tensor'], inplace=True)

sim_df.head()

### Functions to process and visualize the data 

In [None]:
from helper_functions import *

## Project Voting Summary

In [None]:
print(f"Unique Submissions:\t{df.submission.nunique()}")
print(f"Vote Count:\t{len(df)}")
print(f"Delegated Votes Count:\t{(df.vote_type == 'Delegate').sum()}")
print(f"'Yes' Direct Votes Count:\t{(df.vote_type == 'Yes').sum()}  ({(df.vote_type == 'Yes').sum() / ((df.vote_type == 'Yes') | (df.vote_type == 'No')).sum() :.1%} of direct)")
print(f"'No' Direct Votes Count :\t{(df.vote_type == 'No').sum()} ({(df.vote_type == 'No').sum() / ((df.vote_type == 'Yes') | (df.vote_type == 'No')).sum() :.1%} of direct)")
print("---")
print(f"Total Voter Count: {df.user_ref.nunique()}")
print(f"Voters that did choose to delegate to at least 1 project: {df[df.vote_type == 'Delegate'].user_ref.nunique()}")

print("### Delegation Outcomes ###")



In [None]:
backtesting_df = sim_df.query("label == 'backtesting' and timestep == 1").iloc[0]
from nqg_model.types import ProjectAction, Vote



n_delegate_yes = 0
n_delegate_no = 0
n_delegate_abstain = 0
for user, actions in backtesting_df.action_matrix.items():
    for submission, action in actions.items():
        if action == ProjectAction.Delegate:
            outcome = backtesting_df.vote_decision_matrix[user][submission]

            if outcome == Vote.Yes:
                n_delegate_yes += 1
            elif outcome == Vote.No:
                n_delegate_no += 1
            elif outcome == Vote.Abstain:
                n_delegate_abstain += 1

delegate_set = set()
for v in backtesting_df.delegatees.values():
    delegate_set |= set(v)
print(f"Delegations mapped to `Yes`: {n_delegate_yes}")
print(f"Delegations mapped to `No`: {n_delegate_no}")
print(f"Delegations mapped to `Abstain`: {n_delegate_abstain}")
print(f"Unique Delegates: {len(delegate_set)}")



In [None]:
project_voting_summary(df, round_no)

## User Engagement Overview

In [None]:
user_engagement_overview(df, round_no)

In [None]:
df.vote_type.value_counts()

In [None]:
yes_votes = df.vote_type.value_counts()['Yes']
no_votes = df.vote_type.value_counts()['No']
delegated_votes = df.vote_type.value_counts()['Delegate']
total_votes = yes_votes + no_votes + delegated_votes
direct_votes = yes_votes + no_votes
perc_yes = yes_votes / direct_votes*100
perc_no = no_votes / direct_votes*100

unique_delegators = df[df.vote_type == 'Delegate'].user_ref.nunique()
print(f"Total votes: {total_votes}")
print(f"Yes votes: {yes_votes}")
print(f"No votes: {no_votes}")
print(f"Delegated votes: {delegated_votes}")
print(f"Percentage of Yes votes: {perc_yes}")
print(f"Percentage of No votes: {perc_no}")
print(f"Unique users who delegated atleast once: {unique_delegators}")


In [None]:
# During SCF #27, 13 submissions received 453 direct votes, while 318 were delegated. The direct votes can be split into 326 (72%) `Yes` and 127 (28%) `No` votes. 

# A total of 41 unique voters participated. Out of these, 29 voters chose to delegate their vote for at least one project


print(f'''
      During SCF #{round_no}, {df['submission'].nunique()} submissions received {direct_votes} direct votes, while {delegated_votes} were delegated. 
      The direct votes can be split into {yes_votes} ({perc_yes:.2f}%) `Yes` and {no_votes} ({perc_no:.2f}%) `No` votes. 
      A total of {df['user_ref'].nunique()} unique voters participated. Out of these, {unique_delegators} voters chose to delegate their vote for at least one project''')

In [None]:
user_vote_summary=df.groupby('user_ref').vote_type.value_counts().unstack().fillna(0).astype(int)
user_vote_summary
# how many users have 0 in delegate
user_vote_summary[user_vote_summary['Delegate']!=0].shape


## Trust Network Analysis
- Visualization of trust relationships and their impact on voting results.


In [None]:
trust_network_analysis(sim_df)

## Scenario Cross Comparision



Terminology:
- Standard Voting: This refers to a voting mechanism wherein each participant possesses equal voting power, and the process does not involve Quorum Delegation.
- NQG: An acronym for Neural Quorum Governance, a system in which voting weight is determined through a combination of factors such as Trust, Expertise, and Voting History Neurons, in addition to the inclusion of Quorum Delegation.
- NG w/o QD: Stands for Neural Governance without Quorum Delegation. In this framework, voting weight is assigned based on Trust, Expertise, and Voting History Neurons; however, it excludes the aspect of Quorum Delegation.
- QD w/o NG: Denotes Quorum Delegation without Neural Governance. Under this system, each voter retains an equal vote, but the process allows for the delegation of votes through Quorum Delegation.

In [None]:
scenario_map = {
    'backtesting': 'NQG',
    'no_QD': 'NG w/o QD',
    'no_NG': 'QD w/o NG',
    'no_NQG': '1 person, 1 vote'}

(_, counterfact_df, full_df, ranked_full_df) = get_results_counterfact_full_ranked_dfs(
    df, sim_df, scenario_map)

In [None]:
px.bar(full_df.reset_index().sort_values('result', ascending=False), x='index', y='result', labels={'result': 'Power', 'index': 'Submission'}).update_layout(height=600)

In [None]:
counterfactual_heatmap_1(full_df)

In [None]:
counterfactual_heatmap2(full_df)

In [None]:
counterfactual_heatmap_3(ranked_full_df)

In [None]:
counterfactual_heatmap_3(ranked_full_df[['result', '1 person, 1 vote']].sort_values('result'))

In [None]:
counterfactual_heatmap_3(ranked_full_df[['NQG', 'NG w/o QD']].sort_values('NQG'))

## Neuron Power Tensor Analysis

In [None]:
neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]

In [None]:
neuron_power_tensor.coords

In [None]:
# neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
# neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]

# neuron_power_tensor_neuron0_layer1=neuron_power_tensor.sel(layer='layer_1',neuron='past_round')/neuron_power_tensor.sel(layer='layer_1').sum(dim='neuron') 

# # Replace NaN values with 0
# neuron_power_tensor_neuron0_layer1 = neuron_power_tensor_neuron0_layer1.fillna(0)
# # Replace inf values with 0
# neuron_power_tensor_neuron0_layer1 = neuron_power_tensor_neuron0_layer1.where(np.isfinite(neuron_power_tensor_neuron0_layer1), 0)

# neuron_power_tensor.loc[dict(neuron='past_round', layer='layer_1')] = neuron_power_tensor_neuron0_layer1.values

In [None]:
# neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
# neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]


vote_decision_matrix=pd.DataFrame(sim_df['vote_decision_matrix'][1])

vote_tensor = xr.DataArray(vote_decision_matrix, dims=['project', 'user'])

# Ensure matching coordinates (This step is crucial for correct broadcasting and multiplication)
vote_tensor = vote_tensor.assign_coords(project=neuron_power_tensor.project.values, user=neuron_power_tensor.user.values)

# Broadcast 'vote_tensor' across the 'layer' and 'neuron' dimensions
vote_tensor = vote_tensor.expand_dims({'layer': neuron_power_tensor.layer, 'neuron': neuron_power_tensor.neuron}, axis=[-2, -1])
def calculate_final_metrics(neuron_power_tensor, vote_tensor):
    # Now multiply
    resulting_tensor = neuron_power_tensor * vote_tensor

    # Select layers
    layer1 = resulting_tensor.sel(layer='layer_0')
    layer2 = resulting_tensor.sel(layer='layer_1')

    # Sum across neurons in layer1
    layer1_sum = layer1.sum(dim='neuron')

    # Product across neurons in layer2
    layer2_prod = layer2.prod(dim='neuron')

    # Calculate final power
    final_power = abs(layer2_prod) * abs(layer1_sum) * (layer2_prod / abs(layer2_prod))
    final_power=final_power.drop_vars('layer')

    # Step 2: Adjusted Calculations
    # NQG_wo_L1_trust_score: Assume layer1 neuron1 is 0
    layer1_wo_neuron1 = layer1.copy()
    layer1_wo_neuron1.loc[dict(neuron='trust_score')] = 0  # Set neuron1 to 0
    NQG_wo_L1_trust_score = (layer1_wo_neuron1.sum(dim='neuron')) * (layer2.prod(dim='neuron'))

    # NQG_wo_L1_reputation_score: Assume layer1 neuron2 is 0
    layer1_wo_neuron2 = layer1.copy()
    layer1_wo_neuron2.loc[dict(neuron='reputation_score')] = 0  # Set neuron2 to 0
    NQG_wo_L1_reputation_score = (layer1_wo_neuron2.sum(dim='neuron')) * (layer2.prod(dim='neuron'))

    # NQG_wo_L2_past_round: Assume layer2 neuron1 is 1
    layer2_wo_neuron1 = layer2.copy()
    layer2_wo_neuron1.loc[dict(neuron='past_round')] = 1  # Set neuron1 to 1
    NQG_wo_L2_past_round = (layer1.sum(dim='neuron')) * (layer2_wo_neuron1.prod(dim='neuron'))

    # Convert these xarray operations into a new DataArray or Dataset
    metrics = xr.Dataset({
        'NQG': final_power,
        'NQG_wo_L1_trust_score': NQG_wo_L1_trust_score,
        'NQG_wo_L1_reputation_score': NQG_wo_L1_reputation_score,
        'NQG_wo_L2_past_round': NQG_wo_L2_past_round
    })

    # Aggregate by user (summing over projects for simplicity, adjust as needed)
    final_metrics = metrics.sum(dim='user')
    return final_metrics


final_metrics = calculate_final_metrics(neuron_power_tensor, vote_tensor)

In [None]:
final_metrics

In [None]:
pd.Series(sim_df['per_project_voting'][1]) - final_metrics.to_dataframe()['NQG']
# there is some difference of 0.1 and 0.2 for some reason

In [None]:
final_metrics.to_dataframe()['NQG']

In [None]:
ranked_metrics = final_metrics.to_dataframe().reset_index().sort_values('NQG', ascending=False)

ranked_df = ranked_metrics.set_index('project').rank(method='min', ascending=False)


ranked_df = ranked_df.reset_index()

plt.figure(figsize=(10, 8))
sns.heatmap(ranked_df.set_index('project').astype(float), annot=True, cmap='viridis', fmt=".2f")
plt.title('Heatmap of Neuron Power Ranks by Projects')
plt.xlabel('Neuron counterfactuals')
plt.ylabel('Projects')
plt.show()

In [None]:
ranked_df = ranked_df.set_index('project')

# Calculate the rank differences
rank_changes = ranked_df.subtract(ranked_df['NQG'], axis=0)

# Calculate absolute rank changes for identifying large movements
rank_changes_abs = rank_changes.abs()

# Maximum rank change for each project
ranked_df['Max_Rank_Change'] = rank_changes_abs.max(axis=1)

# Projects with the highest difference in ranks
max_change_value = ranked_df['Max_Rank_Change'].max()
projects_high_change = ranked_df[ranked_df['Max_Rank_Change'] == max_change_value]

# Projects with unchanged ranks across all conditions
unchanged_ranks = ranked_df[ranked_df['Max_Rank_Change'] == 0]

# Descriptive statistics
rank_descriptive_stats = rank_changes_abs.describe()


In [None]:
# Print results
print("Projects with Highest Rank Change:")
projects_high_change


In [None]:
print("\nProjects with Unchanged Ranks:")
unchanged_ranks

In [None]:
print("\nDescriptive Statistics of Rank Changes:")
rank_descriptive_stats

## Learning curve


In [None]:
final_metrics

## Testing against the JSON files 

In [None]:
def load_data(file_name, neuron_name=None):
    with open(folder_path + file_name) as f:
        data = json.load(f)
    return pd.DataFrame(list(data.items()), columns=['user', neuron_name])

assigned_reputation_neuron = load_data('assigned_reputation_neuron.json', 'reputation_score')
prior_voting_history_neuron = load_data('prior_voting_history_neuron.json', 'past_round')
trust_graph_neuron = load_data('trust_graph_neuron.json', 'trust_score')
# merge on user

neuron_data = assigned_reputation_neuron.merge(prior_voting_history_neuron, on='user').merge(trust_graph_neuron, on='user')

neuron_data.set_index('user', inplace=True)
neuron_data=neuron_data.astype(float)/10**18
neuron_data.head()

In [None]:
trust_neuron_npt=neuron_power_tensor.mean(dim='project').sel(neuron = 'trust_score', layer= 'layer_0')
prior_voting_neuron_npt=neuron_power_tensor.mean(dim='project').sel(neuron = 'past_round', layer= 'layer_1')
reputation_neuron_npt=neuron_power_tensor.mean(dim='project').sel(neuron = 'reputation_score', layer= 'layer_0')

trust_neuron_npt.name='trust_score'
prior_voting_neuron_npt.name='past_round'
reputation_neuron_npt.name='reputation_score'

trust_neuron_npt=trust_neuron_npt.to_dataframe().reset_index()[['user', 'trust_score']]
prior_voting_neuron_npt=prior_voting_neuron_npt.to_dataframe().reset_index()[['user', 'past_round']]
reputation_neuron_npt=reputation_neuron_npt.to_dataframe().reset_index()[['user', 'reputation_score']]

sim_df_neuron_data = trust_neuron_npt.merge(prior_voting_neuron_npt, on='user').merge(reputation_neuron_npt, on='user')
sim_df_neuron_data.sort_values('user', ascending=True, inplace=True)


In [None]:
sim_df_neuron_data.head()

In [None]:
# compare the two dataframes

compare_neuron_results=sim_df_neuron_data.merge(neuron_data, on='user', suffixes=('_sim', '_nqg'))
compare_neuron_results.set_index('user', inplace=True)



In [None]:
# plot 3 bar charts for each neuron
compare_neuron_results[['trust_score_sim', 'trust_score_nqg']].plot(kind='bar', figsize=(10, 6), title='Trust Score Comparison')

In [None]:
# past_round_sim - (trust_sim + reputation_sim) = past_round_nqg

compare_neuron_results['raw_past_round_sim']=(compare_neuron_results['past_round_sim'] - (compare_neuron_results['trust_score_sim'] + compare_neuron_results['reputation_score_sim']))
compare_neuron_results[['raw_past_round_sim', 'past_round_nqg']].plot(kind='bar', figsize=(10, 6), title='Trust Score Comparison')

In [None]:
compare_neuron_results[['reputation_score_sim', 'reputation_score_nqg']].plot(kind='bar', figsize=(10, 6), title='Reputation Score Comparison')

In [None]:
records = []

vote_type_map = {ProjectAction.Vote: 'Vote', ProjectAction.Delegate: 'Delegate'}
vote_decision_map = {Vote.Yes: 'Yes', Vote.Abstain: 'Abstain', Vote.No: 'No'}

for user, user_decisions in backtesting_df.action_matrix.items():
    for project, decision in user_decisions.items():
        delegation_result = backtesting_df.vote_decision_matrix[user][project]
        tally_vote_power = backtesting_df.vote_power_matrix[user][project]


        if decision == ProjectAction.Vote:
            vote_type = vote_decision_map[backtesting_df.vote_decision_matrix[user][project]]
            delegation_result = 'Non_delegated'
        elif decision == ProjectAction.Delegate:
            vote_type = 'Delegate'
            delegation_result = vote_decision_map[backtesting_df.vote_decision_matrix[user][project]]
        else:
            pass
            

        
        records.append({'user_ref': user,
                        'submission': project, 
                        'vote_type': vote_type,
                        'delegation_result': delegation_result,
                        'tally_vote_power': tally_vote_power})


sim_vote_result_df = pd.DataFrame(records)

sim_vote_result_df.head(3)

In [None]:
df_direct_sim = sim_vote_result_df.query('vote_type == "Yes" or vote_type == "No"').set_index(['user_ref', 'submission']).sort_index()
df_direct_sim.head(3)

In [None]:
df_delegated_sim = sim_vote_result_df.query('vote_type == "Delegate"').set_index(['user_ref', 'submission']).sort_index()
df_delegated_sim.head(3)

In [None]:
df_delegated = df.copy().query("vote_type == 'Delegate'").set_index(['user_ref', 'submission']).loc[:, df_delegated_sim.columns].sort_index()
df_delegated.head(3)

In [None]:
df_direct = df.copy().query("vote_type == 'Yes' or vote_type == 'No'").set_index(['user_ref', 'submission']).loc[:, df_direct_sim.columns].sort_index()
df_direct.head(3)

In [None]:
print(df_delegated_sim)
print()
print("Sim df")
print(df_delegated_sim.delegation_result.value_counts())
print()
print("Result df")
print(df_delegated.delegation_result.value_counts())

In [None]:
print("Sim df")
print(df_direct_sim.vote_type.value_counts())
print()
print("Result df")
print(df_direct.vote_type.value_counts())

In [None]:
assert np.isclose(df_direct.tally_vote_power, df_direct_sim.tally_vote_power, rtol=1e-3,atol=1e-2).mean() == 1.0


In [None]:
#assert np.isclose(df_delegated.tally_vote_power, df_delegated_sim.tally_vote_power, rtol=1e-3,atol=1e-2).mean() == 1.0

In [None]:
#pd.testing.assert_frame_equal(df_delegated, df_delegated_sim)

In [None]:
df_delegated.iloc[0]

In [None]:
filtered_df = df_delegated[df_delegated.delegation_result != df_delegated_sim.delegation_result]
joined_df = df_delegated.join(df_delegated_sim, rsuffix="_sim").loc[filtered_df.index]
joined_df.sample(5)

In [None]:
print(len(filtered_df))
joined_df.groupby(by=['delegation_result', 'delegation_result_sim']).size()

In [None]:
src_usr = 'GCZH4PHHWFNZGZH76UUOZE6X75HTPBOSXSRZXCGUSDGFM2XQE3JC2E5U'
proj = 'NAUTA (P2P) SECONDARY MARKET'
delegates = backtesting_df.delegatees[src_usr]
inds = (df.user_ref.isin(delegates)) & (df.submission == proj)
df_int = df[inds]

print("Sim-originated outcome")
print(df_delegated_sim.loc[(src_usr, proj)])
print("Results outcome")
print(df_delegated.loc[(src_usr, proj)])

df_int