# Analysis of Decentraland

In [None]:
#Imports
import pandas as pd
from datetime import datetime
import statistics
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# This step is only needed if raw data has just being obtained
If working with the research data, this step can be skipped

We upload the Decentraland data, both votes and proposals.

In [None]:
votes = pd.read_csv('votes.csv')
proposals = pd.read_csv('proposals.csv', encoding='latin1')

We check for duplicate rows and remove them if they exist.

In [None]:
#duplicates -> There are no duplicates

print("Original rows:", len(proposals))
proposals = proposals.drop_duplicates()
print("Rows after removing duplicates:", len(proposals))

#Votes
print("Original rows:", len(votes))
votes = votes.drop_duplicates()
print("Rows after removing duplicates:", len(votes))

We verify that there are no null or negative values.

In [None]:
print("Number of rows with negative votes:", len(votes[votes['vp']<0]))
print("Number of rows with invalid votes:", (votes['vp'].isna().sum()))

We parse the “proposal” column of the votes to remove the word id, as well as any curly brackets it contains. We also convert the identifier to lowercase. 

In [None]:
votes['proposal'] = votes['proposal'].apply(lambda x: x.replace("{'id': '", ""))
votes['proposal'] = votes['proposal'].apply(lambda x: x.replace("'}", ""))
votes['proposal'] = votes['proposal'].str.lower()

We convert the id, author, and voter columns of the corresponding dataframes to lowercase.

In [None]:
def lowercase_columns(df, cols):
    for c in cols:
        if c in df.columns:
            df[c] = df[c].apply(lambda x: x.lower() if isinstance(x, str) else x)
    return df

proposals = lowercase_columns(proposals, ['id', 'author'])
votes = lowercase_columns(votes, ['id', 'voter'])


### Calculation of new metrics 

Adding a new field with the total VP for each proposal.

In [None]:
# Add totalVP 

vp_prop = votes.groupby('proposal')['vp'].sum()
vp_prop = pd.DataFrame(vp_prop).reset_index()
vp_prop = vp_prop.rename(columns={'proposal' : 'id'})

proposals = proposals.merge(vp_prop, how='left', on='id')
proposals = proposals.rename(columns={'vp' : 'totalVP'})
proposals['totalVP'] = proposals['totalVP'].fillna(0)
proposals

We save the clean dataframes.

In [None]:
proposals.to_csv('./proposalsClean.csv', index=False)
votes.to_csv('./votesClean.csv', index=False)

# Calculation of metrics
In this section, we extract some metrics that are useful for analysis.

In [None]:
print("Number of unique voters:", len(set(votes['voter'])))
print("Total number of votes:", len(votes))
print("Total number of proposals:", len(proposals))

We extract the IDs of the proposals with unanimity.

In [None]:
unanimidad = 0
prop_id_list = []

for prop_id in proposals['id']:
    
    num_votos = list(proposals[proposals['id'] == prop_id]['votes'])[0]

    if num_votos != 0:
        num_opt = len(set(votes[votes['proposal']==prop_id]['choice']))
        if num_opt == 1:
                unanimidad +=1
                prop_id_list.append(prop_id)


pd.DataFrame(prop_id_list).to_csv("props_ids.csv", index = False)
print(f"Number of unanimous proposals: {unanimidad}")


We establish the period of time to be analyzed and order the proposals chronologically for analysis.

In [None]:
#Time frame to be studied
ini = min(votes["created"])
fin = datetime(2024, 6, 3, 0, 0, 0, 0)
allMonths = pd.date_range(start=ini, end=fin, freq=pd.DateOffset(months=1)).strftime("%Y-%m")
allMonthsNew = pd.date_range(start=ini, end=fin, freq=pd.DateOffset(months=1)).strftime("%Y-%b")

#Auxiliary structure used to display graphics in a more aesthetic way
DateAxis = []
for k in allMonthsNew:
  sp = k.split("-")
  if sp[1] != "Jan":
    DateAxis.append(sp[1])
  else:
    DateAxis.append(sp[1]+"\n"+sp[0])
    
#Sort proposals by creation date to review them in order
tupla = zip(proposals['id'], proposals['date'])
# Convert zip to list and keep only entries with parseable dates, then sort by datetime
tupla_list = list(tupla)

tupla_clean = []
for id_, date_str in tupla_list:
  if pd.isna(date_str):
    continue
  dt = pd.to_datetime(date_str, errors='coerce')
  if pd.isna(dt):
    continue
  tupla_clean.append((id_, dt))

IDsPropsOrdenados = sorted(tupla_clean, key=lambda x: x[1])

# Simple metrics

In [None]:
print(f"Total number of months: {len(allMonths)}")
print("----------------------------------")

print(f"Total number of proposals: {len(proposals)}")
print(f"Total number of voters: {len(set(votes['voter']))}")
print(f"Total number of votes: {len(votes)}")
print("----------------------------------")

print(f"Total number of votes: {votes['id'].count()}")
proposals['votes'] = pd.to_numeric(proposals['votes'], errors='coerce')
print(f"Max number of votes in a proposal: {max(proposals['votes'])}")
print(f"Min number of votes in a proposal: {min(proposals['votes'])}")
print(f"Median number of votes: {proposals['votes'].median()}")
print(f"Mean number of votes: {proposals['votes'].mean():.2f}")
print("----------------------------------")

votos_gr = votes.groupby('voter')['id'].count()
print(f"Max number of votes for a voter: {max(votos_gr)}")
print(f"Min number of votes for a voter: {min(votos_gr)}")
print(f"Median number of votes: {statistics.median(votos_gr)}")
print(f"Mean number of votes: {statistics.mean(votos_gr)}")

# Simple metrics related to VP

In [None]:
vp = []

for voter in set(votes['voter']):
    vp.append(statistics.median(votes[votes['voter'] == voter]['vp']))

total_median_vp = sum(vp)

print(f"Sum of the median values of the voters' vp: {total_median_vp}")

In [None]:
#voting power per voter
vp_gr = votes.groupby('voter')['vp'].median()

print(f"Max VP for a voter (median): {max(vp_gr)}, {max(vp_gr)  /total_median_vp*100}")
print(f"Min VP for a voter (median): {min(vp_gr)}, {min(vp_gr) /total_median_vp*100}")
print(f"Median VP for a voter (median): {statistics.median(vp_gr)}, {statistics.median(vp_gr) /total_median_vp*100}")
print(f"Mean VP for a voter (median): {statistics.mean(vp_gr)}, {statistics.mean(vp_gr) /total_median_vp*100}")

In [None]:
#VP per proposal
print(f"Total number of vp: {proposals['totalVP'].sum()}")

print(f"Max VP in a proposal: {max(proposals['totalVP'])}, {max(proposals['totalVP'])/total_median_vp*100}")
print(f"Min VP in a proposal: {min(proposals['totalVP'])}, {min(proposals['totalVP'])/total_median_vp*100}")
print(f"Median VP: {statistics.median(proposals['totalVP'])}, {statistics.median(proposals['totalVP']) /total_median_vp*100}")
print(f"Mean VP: {statistics.mean(proposals['totalVP'])}, {statistics.mean(proposals['totalVP'])  /total_median_vp*100}")

## Analysis of proposal results

Number of proposals seconded, not seconded, unanimously approved, or not voted on.

In [None]:
secundada = 0
nosecundada = 0
novotos = 0
unanimidadConAutor = 0
unanimidadSinAutor = 0

for prop_id in proposals['id']:
    autor = list(proposals[proposals['id'] == prop_id]['author'])[0]

    autor_aux = votes['voter'] == autor
    prop_aux = votes['proposal'] == prop_id

    num_votos = list(proposals[proposals['id'] == prop_id]['votes'])[0]
    filter = votes[ np.logical_and(prop_aux , autor_aux)]['created']

  #grey: no one votes
    if num_votos == 0:
        novotos +=1
    #the author votes on the proposal
    elif len(filter) > 0: 
        opt_autor = votes[ np.logical_and(prop_aux , autor_aux)]['choice'].to_list()[0]
        win = proposals[proposals['id'] == prop_id]['scores'].to_list()[0]
        maxi = max(win)
        opt_winner = [x for x in range(len(win)) if win[x] == maxi][0]
        

        #green -> the winner option is the one the author voted
        if opt_winner == opt_autor-1 :
          secundada += 1

        #red
        else:
          nosecundada +=1
    else:
        #author does not vote
        num_opt = len(set(votes[votes['proposal']==prop_id]['choice']))
        if num_opt == 1:
                unanimidadSinAutor +=1

print(f"The option voted for by the author wins: {secundada}")
print(f"The option voted for by the author does not win: {nosecundada}")
print(f"Proposals without votes: {novotos}")
print(f"Unanimous proposals: {unanimidadSinAutor + unanimidadConAutor}")

In [None]:
votos = []
for prop_id in proposals['id']:
  if num_votos != 0:
    maxi = 0
    total = 0
    for key, val in dict(all_dao_info['results_per_proposals'][prop_id]).items():
      if val > maxi:
        maxi = val
      total += val
    if total == 0:
      continue

    porcentaje = maxi * 100 / total
    votos.append(porcentaje)


print(f"Average votes for winning option: {statistics.mean(votos)}")

Number and percentage of voters with more than 50% of the VP

In [None]:
totalVP = 0
vps = []

for voter in set(votes['voter']):
    sum_vp = votes[votes['voter']==voter]['vp'].median()
    totalVP += sum_vp
    vps.append(sum_vp)

ordenado = sorted(vps, reverse = True)

sumi = 0
for i in range(len(ordenado)):
    sumi += ordenado[i]
    if sumi > totalVP/2:
        print("Voters who accumulate > 50% of the DAO VP")
        #la i empieza en 0
        res = i+1
        print(res)
        break

print("Percentage")
print(res/len(ordenado)*100)

## Inequality analysis

In [None]:
#gini
# based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
# from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    
def gini(array):
    
    array = array.flatten() #all values are treated equally, arrays must be 1d
    if np.amin(array) < 0:
        array -= np.amin(array) #values cannot be negative
    array += 1e-9 #values cannot be 0
    array = np.sort(array) #values must
    index = np.arange(1,array.shape[0]+1) #index per array element
    n = array.shape[0]#number of array elements
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient


data = np.array(votes.groupby(["voter"])["vp"].median())
print(f"The Gini coefficient is (groupped by voter): {gini(data):.2f}")

Lorenz curve to represent the cumulative distribution of VP

In [None]:
def lorenz_curve(X):
  X_lorenz = X.cumsum() / X.sum()
  X_lorenz = np.insert(X_lorenz, 0, 0)
  X_lorenz[0], X_lorenz[-1]

  fig, ax = plt.subplots(figsize=[6,6])
  ## scatter plot of Lorenz curve
  ax.scatter(np.arange(X_lorenz.size)/(X_lorenz.size-1), X_lorenz,
           s=80, facecolors='none', edgecolors='darkorange')
  ## line plot of equality
  ax.plot([0,1], [0,1], color='k')
  plt.axhline(y=0.5, color='black', lw=0.5)
  #plt.title("Lorenz curve")
  plt.ylabel("Accumulated voting power")
  plt.xlabel("Accumulated voters")
  plt.savefig("lorenz.pdf", format="pdf", bbox_inches="tight")
  plt.show()


plt.style.use('bmh')
X = dict(votes.groupby(["voter"])["vp"].median())
X = np.array(sorted(list(X.values())))

print("Lorenz curve with VP median")
lorenz_curve(X)


## Vote analysis

Bar chart showing voter turnout over time

In [None]:
myDict = defaultdict(int)
myDict2 = defaultdict(int)

for voter in set(votes['voter']):
  date_aux = datetime.strptime(min(votes[votes['voter'] == voter]['created']), "%Y-%m-%d %H:%M:%S").strftime("%Y-%m")
  myDict[date_aux] +=1

acum = 0
for date in allMonths:
  acum += myDict[date]
  myDict2[date]=acum

fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
ax.bar(list(myDict2.keys()), list(myDict2.values()))
plt.xticks(range(len(DateAxis)), DateAxis)
#plt.yticks(range(0, 66, 5))
plt.title('Cumulative new voters')
plt.show()


Scatter plot of median VP of a voter versus number of votes

In [None]:
VP_median_per_voter = votes.groupby(["voter"])["vp"].median()

votes_per_voter = votes.groupby(["voter"])["id"].count()


plt.style.use('bmh')
plt.scatter(VP_median_per_voter, votes_per_voter, alpha=0.25, s=75)
plt.xlabel('Median voting power of the voter')
plt.ylabel('Number of votes casted by a voter')
plt.savefig("votesVSvp.pdf", format="pdf", bbox_inches="tight")

Scatter plot showing the percentage of votes versus the VP of the winning option for each proposal

In [None]:
myDictVotos = {}
for prop, val in all_dao_info['results_per_proposals'].items():
    if sum(val.values()) != 0:
      percVotos = max(val.values())*100/sum(val.values())
      myDictVotos[prop]=percVotos


myDictVP = {}
for i in range(len(all_dao_info["df_proposals"])):
    if all_dao_info["df_proposals"]["scores_total"][i] != 0:
      percVP = max(all_dao_info["df_proposals"]["scores"][i])*100 / all_dao_info["df_proposals"]["scores_total"][i]
      myDictVP[all_dao_info["df_proposals"]["id"][i]] = percVP


x = []
y = []

for vp in myDictVP.items():
  for votos in myDictVotos.items():
    if vp[0] == votos[0]:
      y.append(vp[1])
      x.append(votos[1])

plt.style.use('bmh')
plt.axes().set_aspect('equal')
plt.scatter(x, y, alpha=0.3, s=150)
plt.xlim(0, 100)
plt.ylim(0, 100)


#plt.title('Percentage of votes vs voting power\n of the winning option (for all proposals)')
plt.xlabel('Percentage of votes')
plt.ylabel('Percentage of voting power')
plt.savefig("agreement.pdf", format="pdf", bbox_inches="tight")
plt.show()