In [None]:
input_files = ""
#WARNING: When re-running the notebook for audit, change the injected path below to "./output_praiseDist_test.ipynb"
#then go to "Cell > Run all" -- This only works for the notebook in 
#"distribution_results/round ?/results/analysis_outputs/output_general_RAD_report.ipynb"

In [None]:

import os
import sys

import pandas as pd 
import numpy as np 
from collections import OrderedDict
from natsort import natsorted

from analysis_tools.module_libraries import general_tool_module as tools
from analysis_tools.module_libraries import praise_tool_module as praise_tools


import holoviews as hv
from holoviews import opts
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

import base64
from IPython.display import HTML

import scrapbook as sb

In [None]:
datadir = input_files["cross_period_root"]
roundname_list = natsorted(os.listdir(datadir))
allrounds_df = {}
rounds = 0
for round_name in roundname_list:
    if not os.path.isdir(f'{datadir}/{round_name}'):
        roundname_list.remove('round_name')
        continue
    rounds+=1
    allrounds_df[round_name]=pd.read_csv(f'{datadir}/{round_name}/distribution_results/raw_csv_exports/praise_outliers.csv')


In [None]:
allrounds_finaldist = {}
for round_name in roundname_list:
    allrounds_finaldist[round_name] = pd.read_csv(f'{datadir}/{round_name}/distribution_results/raw_csv_exports/final_praise_token_allocation.csv')

# General statistics

In [None]:
round_stats = pd.DataFrame(index=allrounds_df.keys())

In [None]:
round_stats['round_start_time'] =  [allrounds_df[round_name]['DATE'].min()[:10] for round_name in roundname_list]
round_stats['round_end_time'] =  [allrounds_df[round_name]['DATE'].max()[:10] for round_name in roundname_list]

In [None]:
round_stats

## Praise involvement

### How many praise? 

In [None]:
round_stats['total_praise'] = [len(allrounds_df[round_name]) for round_name in roundname_list]
px.line(round_stats,x='round_start_time',y='total_praise',markers=True)
#plt.xlabel(round_stats['round_start_time'].values)

### How many people give praise?

In [None]:
round_stats['total_praise_giver'] = [len(np.unique(allrounds_df[round_name]['FROM USER ACCOUNT'])) for round_name in roundname_list]
px.line(round_stats,x='round_start_time',y='total_praise_giver',markers=True)


### How many people receiving praise?

In [None]:
round_stats['total_praise_receiver'] = [len(np.unique(allrounds_df[round_name]['TO USER ACCOUNT'])) for round_name in roundname_list]
px.line(round_stats,x='round_start_time',y=['total_praise_receiver','total_praise_giver'],markers=True,title='total praise giver and receiver')


## Quantifier involvement

In [None]:
round_stats['total_quantifier'] = [len(np.unique(allrounds_df[round_name].filter(like='QUANTIFIER'))) for round_name in roundname_list]
px.line(round_stats,x='round_start_time',y=['total_quantifier'],markers=True,title='total quantifiers')

# System health evaluation


## new Giveth members

In [None]:
round_stats['round_user_list'] = [set(np.unique(allrounds_df[round_name].filter(like='ACCOUNT')))
            .union(set(np.unique(allrounds_df[round_name].filter(like='QUANTIFIER')))) for round_name in roundname_list]

In [None]:
round_stats['round_user_new'] = np.nan
round_stats['round_user_new'].iloc[1:] = [len(round_stats.loc[roundname_list[kr+1],'round_user_list'] - 
                                         round_stats.loc[roundname_list[kr],'round_user_list']) for kr in np.arange(len(roundname_list)-1)]

round_stats['round_user_left'] = np.nan
round_stats['round_user_left'].iloc[1:] = [len(round_stats.loc[roundname_list[kr],'round_user_list'] - 
                                         round_stats.loc[roundname_list[kr+1],'round_user_list']) for kr in np.arange(len(roundname_list)-1)]

In [None]:
round_stats['round_net_user_diff']=round_stats['round_user_new']-round_stats['round_user_left']

In [None]:
px.line(round_stats,x='round_start_time',y=['round_user_new','round_user_left','round_net_user_diff'])

## distribution equality

### Nakamoto Coefficient

The Nakamato Coefficient is defined as the smallest number of accounts who control at least 50% of the resource. Although its significance relates to the prospect of a 51% attack on a network, which may not be relevant in our context, we can still use it as an intuitive measure of how many individuals received the majority of a resource.

Bigger coefficient means more equal (i.e. needs more people to pass 50%), smaller means more concentrated power. The number should always be an integer

In [None]:
def nakamoto_coeff(x, key):
    sorted_x = x.sort_values(by=key, ascending=False)
    tot_sum = np.array(sorted_x[key].cumsum())
    try:
        winner = np.array([k for k in range(len(tot_sum))
                          if tot_sum[k] > 0.5]).min() + 1
    except:
        winner = -1
    return winner
def nakamoto_coeff_ratio(x, key):
    winner = nakamoto_coeff(x, key)
    ratio = winner / len(x)
    return ratio

In [None]:
for round_name in roundname_list:
    totalreceive = allrounds_finaldist[round_name]['TOTAL TO RECEIVE']
    allrounds_finaldist[round_name]['PERCENTAGE'] =  totalreceive/ sum(totalreceive)

In [None]:
round_stats['nakamoto']  = [nakamoto_coeff(allrounds_finaldist[round_name],'PERCENTAGE') for round_name in roundname_list]
round_stats['nakamoto_ratio']= [nakamoto_coeff_ratio(allrounds_finaldist[round_name],'PERCENTAGE') for round_name in roundname_list]
px.line(round_stats,x='round_start_time',y='nakamoto',markers=True,title='minimum number of people to receive 50% of total rewards')


In [None]:
px.line(round_stats,x='round_start_time',y='nakamoto_ratio',markers=True,title='ratio of people to receive 50% of total rewards')


## quantification agreement

TODO: measure how well quantifiers agree with each other. metrics like ratio of agreement on duplication and dismissal. overall average spread. etc.