## Loading data

### Imports

In [None]:
from datetime import datetime
import sys
import plotly.io as pio
import pandas as pd # type: ignore
import random
pio.renderers.default = "png"
sys.path.append('../..')
import xarray as xr # type: ignore
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns # type: ignore

### Reading the voting data into a dataframe

In [None]:
from nqg_model.experiment import full_historical_data_run_plus_counterfactual
from nqg_model.load_data import retrieve_id_maps, retrieve_vote_data

## Simulation Originated Data
round_no = 26
folder_path = f'../../data/r{round_no}/'

maps = retrieve_id_maps(folder_path)

sim_backtest_df = full_historical_data_run_plus_counterfactual(folder_path=folder_path, round_no=round_no)
sim_df = pd.concat([sim_backtest_df], ignore_index=True)


# sim_df.to_pickle(f"../data/r{round_no}/sim_results/results_{datetime.now()}.pkl.zip")

## Empirical Data
df = pd.read_csv(f'../../data/r{round_no}/votes.csv')
df['user'] = df['user'].astype(str).map(lambda x: maps.UserID2Wallet.get(x, f'unknown-{x}'))
df['project'] = df['project'].astype(str).map(lambda x: maps.SubmissionID2Label.get(x, f'unknown-{x}'))
df.rename(columns={'user': 'user_ref', 'project': 'submission', 'vote_type': 'vote_type'}, inplace=True)

# fixing seed for reproducibility
random.seed(42)


In [None]:
init_str = "GAD27O"


# filtered_df = df[df.user_ref.str.contains(init_str)]
# filtered_df.nqg_vote_power.describe()
df[df.user_ref.str.contains(init_str)].iloc[0].user_ref

In [None]:
sim_df.iloc[1].vote_power_matrix['GAD27OJXIGYJ3LMZ462G6OZ22P5ICENV7TAMMF2DVPR3NYYX2SHIVVZ6']

In [None]:
# extracting the neuron power tensor to render sim_df quickly
neuron_power_tensor_df = sim_df[['neuron_power_tensor', 'label']]
sim_df.drop(columns=['neuron_power_tensor'], inplace=True)

sim_df.head()

### Functions to process and visualize the data 

In [None]:
from helper_functions import *

## Project Voting Summary

In [None]:
project_voting_summary(df, round_no)

## User Engagement Overview

In [None]:
user_engagement_overview(df, round_no)

In [None]:
df.vote_type.value_counts()

In [None]:
user_vote_summary=df.groupby('user_ref').vote_type.value_counts().unstack().fillna(0).astype(int)
user_vote_summary
# how many users have 0 in delegate
user_vote_summary[user_vote_summary['Delegate']!=0].shape


## Trust Network Analysis
- Visualization of trust relationships and their impact on voting results.


In [None]:
trust_network_analysis(sim_df)

## Scenario Cross Comparision



Terminology:
- Standard Voting: This refers to a voting mechanism wherein each participant possesses equal voting power, and the process does not involve Quorum Delegation.
- NQG: An acronym for Neural Quorum Governance, a system in which voting weight is determined through a combination of factors such as Trust, Expertise, and Voting History Neurons, in addition to the inclusion of Quorum Delegation.
- NG w/o QD: Stands for Neural Governance without Quorum Delegation. In this framework, voting weight is assigned based on Trust, Expertise, and Voting History Neurons; however, it excludes the aspect of Quorum Delegation.
- QD w/o NG: Denotes Quorum Delegation without Neural Governance. Under this system, each voter retains an equal vote, but the process allows for the delegation of votes through Quorum Delegation.

In [None]:
scenario_map = {
    'backtesting': 'NQG',
    'no_QD': 'NG w/o QD',
    'no_NG': 'QD w/o NG',
    'no_NQG': '1 person, 1 vote'}

(_, counterfact_df, full_df, ranked_full_df) = get_results_counterfact_full_ranked_dfs(
    df, sim_df, scenario_map)

In [None]:
counterfactual_heatmap_1(full_df)

In [None]:
counterfactual_heatmap2(full_df)

In [None]:
px.bar(full_df.reset_index().sort_values('result', ascending=False), x='index', y='result', labels={'result': 'Power', 'index': 'Submission'}).update_layout(height=600)


In [None]:
counterfactual_heatmap_3(ranked_full_df)

In [None]:
counterfactual_heatmap_3(ranked_full_df[['result', '1 person, 1 vote']].sort_values('result'))

In [None]:
counterfactual_heatmap_3(ranked_full_df[['NQG', 'NG w/o QD']].sort_values('NQG'))

## Neuron Power Tensor Analysis

In [None]:
neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]

In [None]:
neuron_power_tensor.mean(dim='user').mean(dim='project')

In [None]:
neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]

neuron_power_tensor_neuron0_layer1=neuron_power_tensor.sel(layer=1,neuron=0)/neuron_power_tensor.sel(layer=0).sum(dim='neuron') 

# Replace NaN values with 0
neuron_power_tensor_neuron0_layer1 = neuron_power_tensor_neuron0_layer1.fillna(0)
# Replace inf values with 0
neuron_power_tensor_neuron0_layer1 = neuron_power_tensor_neuron0_layer1.where(np.isfinite(neuron_power_tensor_neuron0_layer1), 0)

neuron_power_tensor.loc[dict(neuron=0, layer=1)] = neuron_power_tensor_neuron0_layer1.values

In [None]:

vote_decision_matrix=pd.DataFrame(sim_df['vote_decision_matrix'][1])

vote_tensor = xr.DataArray(vote_decision_matrix, dims=['project', 'user'])

# Ensure matching coordinates (This step is crucial for correct broadcasting and multiplication)
vote_tensor = vote_tensor.assign_coords(project=neuron_power_tensor.project.values, user=neuron_power_tensor.user.values)

# Broadcast 'vote_tensor' across the 'layer' and 'neuron' dimensions
vote_tensor = vote_tensor.expand_dims({'layer': neuron_power_tensor.layer, 'neuron': neuron_power_tensor.neuron}, axis=[-2, -1])

# Now multiply
resulting_tensor = neuron_power_tensor * vote_tensor


# Compute the mean across users as you might want to see the overall trend across projects
mean_power = resulting_tensor.mean(dim='user')

# Convert to DataFrame for easier manipulation
df_mean_power = mean_power.to_dataframe(name='power').reset_index()

# Explicitly convert 'layer' and 'neuron' to string if they are not already
df_mean_power['layer'] = df_mean_power['layer'].astype(str)
df_mean_power['neuron'] = df_mean_power['neuron'].astype(str)

# Create a new column that combines 'layer' and 'neuron' for column labels
df_mean_power['layer_neuron'] = df_mean_power['layer'] + '_' + df_mean_power['neuron']

# drop 1_1
df_mean_power = df_mean_power[df_mean_power['layer_neuron'] != '1_1']

# rename the column 0_0 to Trust, 0_1 to Reputation, 1_0 to PastHistory
df_mean_power['layer_neuron'] = df_mean_power['layer_neuron'].replace({'0_0': 'Trust', '0_1': 'Reputation', '1_0': 'PastHistory'})

# Pivot the DataFrame to get 'project' on rows and 'layer_neuron' on columns
heatmap_data_power_submissions = df_mean_power.pivot_table(index='project', columns='layer_neuron', values='power')

# column reorder to trust, reputation, past history
heatmap_data_power_submissions = heatmap_data_power_submissions[['Trust', 'Reputation', 'PastHistory']]


# Plotting the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data_power_submissions, annot=True, cmap='viridis', fmt=".2f")
plt.title('Mean Power by Project, Layer, and Neuron')
plt.ylabel('Project')
plt.xlabel('Layer and Neuron')
plt.show()

In [None]:
# Compute the sum across users as you might want to see the overall trend across projects
df_sum_power = resulting_tensor.sum(dim='user')

# Convert to DataFrame for easier manipulation
df_sum_power = df_sum_power.to_dataframe(name='power').reset_index()

# Explicitly convert 'layer' and 'neuron' to string if they are not already
df_sum_power['layer'] = df_sum_power['layer'].astype(str)
df_sum_power['neuron'] = df_sum_power['neuron'].astype(str)

# Create a new column that combines 'layer' and 'neuron' for column labels
df_sum_power['layer_neuron'] = df_sum_power['layer'] + '_' + df_sum_power['neuron']

# drop 1_1
df_sum_power = df_sum_power[df_sum_power['layer_neuron'] != '1_1']

# rename the column 0_0 to Trust, 0_1 to Reputation, 1_0 to PastHistory
df_sum_power['layer_neuron'] = df_sum_power['layer_neuron'].replace({'0_0': 'Trust', '0_1': 'Reputation', '1_0': 'PastHistory'})

# Pivot the DataFrame to get 'project' on rows and 'layer_neuron' on columns
heatmap_data_sum_power_submissions = df_sum_power.pivot_table(index='project', columns='layer_neuron', values='power')

# column reorder to trust, reputation, past history

heatmap_data_sum_power_submissions = heatmap_data_sum_power_submissions[['Trust', 'Reputation', 'PastHistory']]

# Plotting the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data_sum_power_submissions, annot=True, cmap='viridis', fmt=".2f")
plt.title('Sum Power by Project, Layer, and Neuron')
plt.ylabel('Project')
plt.xlabel('Layer and Neuron')
plt.show()


In [None]:
# neuron_power_tensor = neuron_power_tensor_df[neuron_power_tensor_df['label']=='backtesting']
# neuron_power_tensor=neuron_power_tensor['neuron_power_tensor'].values[1]


vote_decision_matrix=pd.DataFrame(sim_df['vote_decision_matrix'][1])

vote_tensor = xr.DataArray(vote_decision_matrix, dims=['project', 'user'])

# Ensure matching coordinates (This step is crucial for correct broadcasting and multiplication)
vote_tensor = vote_tensor.assign_coords(project=neuron_power_tensor.project.values, user=neuron_power_tensor.user.values)

# Broadcast 'vote_tensor' across the 'layer' and 'neuron' dimensions
vote_tensor = vote_tensor.expand_dims({'layer': neuron_power_tensor.layer, 'neuron': neuron_power_tensor.neuron}, axis=[-2, -1])

# Now multiply
resulting_tensor = neuron_power_tensor * vote_tensor

# Select layers
layer1 = resulting_tensor.sel(layer=0)
layer2 = resulting_tensor.sel(layer=1)

# Sum across neurons in layer1
layer1_sum = layer1.sum(dim='neuron')

# Product across neurons in layer2
layer2_prod = layer2.prod(dim='neuron')

# Calculate final power
final_power = abs(layer2_prod) * abs(layer1_sum) * (layer2_prod / abs(layer2_prod))
final_power=final_power.drop_vars('layer')

# Step 2: Adjusted Calculations
# NQG_wo_L1_trust_score: Assume layer1 neuron1 is 0
layer1_wo_neuron1 = layer1.copy()
layer1_wo_neuron1.loc[dict(neuron=0)] = 0  # Set neuron1 to 0
NQG_wo_L1_trust_score = (layer1_wo_neuron1.sum(dim='neuron')) * (layer2.prod(dim='neuron'))

# NQG_wo_L1_reputation_score: Assume layer1 neuron2 is 0
layer1_wo_neuron2 = layer1.copy()
layer1_wo_neuron2.loc[dict(neuron=1)] = 0  # Set neuron2 to 0
NQG_wo_L1_reputation_score = (layer1_wo_neuron2.sum(dim='neuron')) * (layer2.prod(dim='neuron'))

# NQG_wo_L2_past_round: Assume layer2 neuron1 is 1
layer2_wo_neuron1 = layer2.copy()
layer2_wo_neuron1.loc[dict(neuron=0)] = 1  # Set neuron1 to 1
NQG_wo_L2_past_round = (layer1.sum(dim='neuron')) * (layer2_wo_neuron1.prod(dim='neuron'))

# Convert these xarray operations into a new DataArray or Dataset
metrics = xr.Dataset({
    'NQG': final_power,
    'NQG_wo_L1_trust_score': NQG_wo_L1_trust_score,
    'NQG_wo_L1_reputation_score': NQG_wo_L1_reputation_score,
    'NQG_wo_L2_past_round': NQG_wo_L2_past_round
})

# Aggregate by user (summing over projects for simplicity, adjust as needed)
final_metrics = metrics.sum(dim='user')


In [None]:
final_metrics

In [None]:
pd.Series(sim_df['per_project_voting'][1]) - final_metrics.to_dataframe()['NQG']
# there is some difference of 0.1 and 0.2 for some reason

In [None]:
final_metrics.to_dataframe()['NQG']

In [None]:
ranked_metrics = final_metrics.to_dataframe().reset_index().sort_values('NQG', ascending=False)

ranked_df = ranked_metrics.set_index('project').rank(method='min', ascending=False)


ranked_df = ranked_df.reset_index()

plt.figure(figsize=(10, 8))
sns.heatmap(ranked_df.set_index('project').astype(float), annot=True, cmap='viridis', fmt=".2f")
plt.title('Heatmap of Neuron Power Ranks by Projects')
plt.xlabel('Neuron counterfactuals')
plt.ylabel('Projects')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'ranked_df' is already loaded with your data and 'Project' is set as index.
# If not, use the following to set it:
ranked_df = ranked_df.set_index('project')

# Sample data to illustrate


# Calculate the rank differences
rank_changes = ranked_df.subtract(ranked_df['NQG'], axis=0)

# Calculate absolute rank changes for identifying large movements
rank_changes_abs = rank_changes.abs()

# Maximum rank change for each project
ranked_df['Max_Rank_Change'] = rank_changes_abs.max(axis=1)

# Projects with the highest difference in ranks
max_change_value = ranked_df['Max_Rank_Change'].max()
projects_high_change = ranked_df[ranked_df['Max_Rank_Change'] == max_change_value]

# Projects with unchanged ranks across all conditions
unchanged_ranks = ranked_df[ranked_df['Max_Rank_Change'] == 0]

# Descriptive statistics
rank_descriptive_stats = rank_changes_abs.describe()


In [None]:
# Print results
print("Projects with Highest Rank Change:")
projects_high_change


In [None]:
print("\nProjects with Unchanged Ranks:")
unchanged_ranks

In [None]:
print("\nDescriptive Statistics of Rank Changes:")
rank_descriptive_stats