In [None]:

import os
import sys

import pandas as pd 
import numpy as np 

from analysis_tools.module_libraries import general_tool_module as tools
from analysis_tools.module_libraries import praise_tool_module as praise_tools


import holoviews as hv
from holoviews import opts
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

import base64
from IPython.display import HTML

import scrapbook as sb

In [None]:
rounds=np.arange(7)+1
datadir = 'distribution_results'
allrounds_df = {}
for kr in rounds:
     allrounds_df['round-'+str(kr)]=pd.read_csv(f'{datadir}/round-{kr}/results/distribution/praise_outliers.csv')
    

In [None]:
allrounds_df['round-4'].keys()

In [None]:
allrounds_finaldist = {}
for kr in rounds:
    allrounds_finaldist['round-'+str(kr)] = pd.read_csv(f'{datadir}/round-{kr}/results/distribution/final_praise_token_allocation.csv')

In [None]:
allrounds_finaldist['round-1'].columns

# General statistics

In [None]:
round_stats = pd.DataFrame(index=allrounds_df.keys())

In [None]:
round_stats['round_start_time'] =  [allrounds_df['round-'+str(kr)]['DATE'].min()[:10] for kr in rounds]
round_stats['round_end_time'] =  [allrounds_df['round-'+str(kr)]['DATE'].max()[:10] for kr in rounds]

In [None]:
round_stats

## Praise involvement

### How many praise? 

In [None]:
round_stats['total_praise'] = [len(allrounds_df['round-'+str(kr)]) for kr in rounds]
px.line(round_stats,x='round_start_time',y='total_praise',markers=True)
#plt.xlabel(round_stats['round_start_time'].values)

### How many people give praise?

In [None]:
round_stats['total_praise_giver'] = [len(np.unique(allrounds_df['round-'+str(kr)]['FROM USER ACCOUNT'])) for kr in rounds]
px.line(round_stats,x='round_start_time',y='total_praise_giver',markers=True)


### How many people receiving praise?

In [None]:
round_stats['total_praise_receiver'] = [len(np.unique(allrounds_df['round-'+str(kr)]['TO USER ACCOUNT'])) for kr in rounds]
px.line(round_stats,x='round_start_time',y=['total_praise_receiver','total_praise_giver'],markers=True,title='total praise giver and receiver')


## Quantifier involvement

In [None]:
round_stats['total_quantifier'] = [len(np.unique(allrounds_df['round-'+str(kr)].filter(like='QUANTIFIER'))) for kr in rounds]
px.line(round_stats,x='round_start_time',y=['total_quantifier'],markers=True,title='total quantifiers')

# System health evaluation


## new TEC members

In [None]:
round_stats['round_user_list'] = [set(np.unique(allrounds_df['round-'+str(kr)].filter(like='ACCOUNT')))
            .union(set(np.unique(allrounds_df['round-'+str(kr)].filter(like='QUANTIFIER')))) for kr in rounds]

In [None]:
round_stats['round_user_new'] = np.nan
round_stats['round_user_new'].iloc[1:] = [len(round_stats.loc['round-'+str(kr+2),'round_user_list'] - 
                                         round_stats.loc['round-'+str(kr+1),'round_user_list']) for kr in np.arange(max(rounds)-1)]
round_stats['round_user_left'] = np.nan
round_stats['round_user_left'].iloc[1:] = [len(round_stats.loc['round-'+str(kr+1),'round_user_list'] - 
                                         round_stats.loc['round-'+str(kr+2),'round_user_list']) for kr in np.arange(max(rounds)-1)]

In [None]:
round_stats['round_net_user_diff']=round_stats['round_user_new']-round_stats['round_user_left']

In [None]:
px.line(round_stats,x='round_start_time',y=['round_user_new','round_user_left','round_net_user_diff'])

## distribution equality

### Nakamoto Coefficient

The Nakamato Coefficient is defined as the smallest number of accounts who control at least 50% of the resource. Although its significance relates to the prospect of a 51% attack on a network, which may not be relevant in our context, we can still use it as an intuitive measure of how many individuals received the majority of a resource.

Bigger coefficient means more equal (i.e. needs more people to pass 50%), smaller means more concentrated power. The number should always be an integer

In [None]:
def nakamoto_coeff(x, key):
    sorted_x = x.sort_values(by=key, ascending=False)
    tot_sum = np.array(sorted_x[key].cumsum())
    try:
        winner = np.array([k for k in range(len(tot_sum))
                          if tot_sum[k] > 0.5]).min() + 1
    except:
        winner = -1
    return winner
def nakamoto_coeff_ratio(x, key):
    winner = nakamoto_coeff(x, key)
    ratio = winner / len(x)
    return ratio

In [None]:
for kr in rounds:
    totalreceive = allrounds_finaldist['round-'+str(kr)]['TOTAL TO RECEIVE']
    allrounds_finaldist['round-'+str(kr)]['PERCENTAGE'] =  totalreceive/ sum(totalreceive)

In [None]:
round_stats['nakamoto']  = [nakamoto_coeff(allrounds_finaldist['round-'+str(kr)],'PERCENTAGE') for kr in rounds]
round_stats['nakamoto_ratio']= [nakamoto_coeff_ratio(allrounds_finaldist['round-'+str(kr)],'PERCENTAGE') for kr in rounds]
px.line(round_stats,x='round_start_time',y='nakamoto',markers=True,title='minimum number of people to receive 50% of total rewards')


In [None]:
px.line(round_stats,x='round_start_time',y='nakamoto_ratio',markers=True,title='ratio of people to receive 50% of total rewards')


## quantification agreement

TODO: measure how well quantifiers agree with each other. metrics like ratio of agreement on duplication and dismissal. overall average spread. etc.