In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("../Data_Source/optimism_delegate.csv")

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 226569 entries, 0 to 226568
Data columns (total 2 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   delegate      226569 non-null  object 
 1   voting_power  226569 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.5+ MB


In [4]:
# Load data from CSV files
citizens_round2 = pd.read_csv("../Data_Source/Retro Round 2.csv", encoding='latin1')
citizens_round3 = pd.read_csv("../Data_Source/Retro Round 3.csv", encoding='latin1')
citizens_round4 = pd.read_csv("../Data_Source/Retro Round 4.csv", encoding='latin1')
grants = pd.read_csv("../Data_Source/Grants_Council.csv", encoding='latin1')
grants_mm_season5 = pd.read_csv("../Data_Source/Grants_Council_MM.csv", encoding='latin1')
dab_season5 = pd.read_csv("../Data_Source/Developer_Advisory_Board.csv", encoding='latin1')
coc_season5 = pd.read_csv("../Data_Source/Code_of_Conduct_Council.csv", encoding='latin1')

In [5]:
# Drop rows with null values in the 'address' column for each DataFrame
citizens_round2.dropna(subset=['address'], inplace=True)
citizens_round3.dropna(subset=['address'], inplace=True)
citizens_round4.dropna(subset=['address'], inplace=True)
grants.dropna(subset=['address'], inplace=True)
grants_mm_season5.dropna(subset=['address'], inplace=True)
dab_season5.dropna(subset=['address'], inplace=True)
coc_season5.dropna(subset=['address'], inplace=True)

In [6]:
# Filter grants data by season
grants_season3 = grants[grants['season'] == 3]
grants_season4 = grants[grants['season'] == 4]
grants_season5 = grants[grants['season'] == 5]

In [7]:
# Helper function to add missing delegates
def add_missing_delegates(data, new_addresses):
    # Find addresses not in the current delegate list
    missing_addresses = set(new_addresses['address'].str.lower()) - set(data['delegate'].str.lower())
    
    # Create a DataFrame for these missing addresses with voting_power = 0
    missing_df = pd.DataFrame({
        'delegate': list(missing_addresses),
        'voting_power': 0
    })
    
    # Merge with the original data
    data = pd.concat([data, missing_df], ignore_index=True)
    return data

In [8]:
# Add missing delegates
data = add_missing_delegates(data, citizens_round2)
data = add_missing_delegates(data, citizens_round3)
data = add_missing_delegates(data, citizens_round4)
data = add_missing_delegates(data, grants_season3)
data = add_missing_delegates(data, grants_season4)
data = add_missing_delegates(data, grants_season5)
data = add_missing_delegates(data, grants_mm_season5)
data = add_missing_delegates(data, dab_season5)
data = add_missing_delegates(data, coc_season5)

In [9]:
# Add the columns
data['th_vp'] = None
data['ch_member_r2'] = None
data['ch_vp_r2'] = None
data['ch_member_r3'] = None
data['ch_vp_r3'] = None
data['ch_member_r4'] = None
data['ch_vp_r4'] = None
data['gc_member_s3'] = None
data['gc_vp_s3'] = None
data['gc_member_s4'] = None
data['gc_vp_s4'] = None
data['gc_member_s5'] = None
data['gc_vp_s5'] = None
data['gc_member_mm_s5'] = None
data['gc_vp_mm_s5'] = None
data['sc_member_s5'] = None
data['sc_vp_s5'] = None
data['coc_member_s5'] = None
data['coc_vp_s5'] = None
data['dab_member_s5'] = None
data['dab_vp_s5'] = None

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 226659 entries, 0 to 226658
Data columns (total 23 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   delegate         226659 non-null  object 
 1   voting_power     226659 non-null  float64
 2   th_vp            0 non-null       object 
 3   ch_member_r2     0 non-null       object 
 4   ch_vp_r2         0 non-null       object 
 5   ch_member_r3     0 non-null       object 
 6   ch_vp_r3         0 non-null       object 
 7   ch_member_r4     0 non-null       object 
 8   ch_vp_r4         0 non-null       object 
 9   gc_member_s3     0 non-null       object 
 10  gc_vp_s3         0 non-null       object 
 11  gc_member_s4     0 non-null       object 
 12  gc_vp_s4         0 non-null       object 
 13  gc_member_s5     0 non-null       object 
 14  gc_vp_s5         0 non-null       object 
 15  gc_member_mm_s5  0 non-null       object 
 16  gc_vp_mm_s5      0 non-null       obje

In [11]:
# Create membership columns for existing delegates
data['ch_member_r2'] = data['delegate'].apply(lambda x: 1 if x.lower() in citizens_round2['address'].str.lower().values else 0)
data['ch_member_r3'] = data['delegate'].apply(lambda x: 1 if x.lower() in citizens_round3['address'].str.lower().values else 0)
data['ch_member_r4'] = data['delegate'].apply(lambda x: 1 if x.lower() in citizens_round4['address'].str.lower().values else 0)
data['gc_member_s3'] = data['delegate'].apply(lambda x: 1 if x.lower() in grants_season3['address'].str.lower().values else 0)
data['gc_member_s4'] = data['delegate'].apply(lambda x: 1 if x.lower() in grants_season4['address'].str.lower().values else 0)
data['gc_member_s5'] = data['delegate'].apply(lambda x: 1 if x.lower() in grants_season5['address'].str.lower().values else 0)
data['gc_member_mm_s5'] = data['delegate'].apply(lambda x: 1 if x.lower() in grants_mm_season5['address'].str.lower().values else 0)
data['dab_member_s5'] = data['delegate'].apply(lambda x: 1 if x.lower() in dab_season5['address'].str.lower().values else 0)
data['coc_member_s5'] = data['delegate'].apply(lambda x: 1 if x.lower() in coc_season5['address'].str.lower().values else 0)

In [12]:
# Calculate the share for each delegate in each council
sum_th_vp = data[data['voting_power'] > 1]['voting_power'].sum()
data['th_vp'] = data.apply(lambda row: (row['voting_power'] * 100) / sum_th_vp if row['voting_power'] > 1 else 0, axis=1)

count_ch_member_r2 = data['ch_member_r2'].sum()
data['ch_vp_r2'] = data.apply(lambda row: (row['ch_member_r2'] * 100) / count_ch_member_r2 if row['ch_member_r2'] == 1 else 0, axis=1)

count_ch_member_r3 = data['ch_member_r3'].sum()
data['ch_vp_r3'] = data.apply(lambda row: (row['ch_member_r3'] * 100) / count_ch_member_r3 if row['ch_member_r3'] == 1 else 0, axis=1)

count_ch_member_r4 = data['ch_member_r4'].sum()
data['ch_vp_r4'] = data.apply(lambda row: (row['ch_member_r4'] * 100) / count_ch_member_r4 if row['ch_member_r4'] == 1 else 0, axis=1)

count_gc_member_s3 = data['gc_member_s3'].sum()
data['gc_vp_s3'] = data.apply(lambda row: (row['gc_member_s3'] * 100) / count_gc_member_s3 if row['gc_member_s3'] == 1 else 0, axis=1)

count_gc_member_s4 = data['gc_member_s4'].sum()
data['gc_vp_s4'] = data.apply(lambda row: (row['gc_member_s4'] * 100) / count_gc_member_s4 if row['gc_member_s4'] == 1 else 0, axis=1)

count_gc_member_s5 = data['gc_member_s5'].sum()
data['gc_vp_s5'] = data.apply(lambda row: (row['gc_member_s5'] * 100) / count_gc_member_s5 if row['gc_member_s5'] == 1 else 0, axis=1)

count_gc_member_mm_s5 = data['gc_member_mm_s5'].sum()
data['gc_vp_mm_s5'] = data.apply(lambda row: (row['gc_member_mm_s5'] * 100) / count_gc_member_mm_s5 if row['gc_member_mm_s5'] == 1 else 0, axis=1)

count_coc_member_s5 = data['coc_member_s5'].sum()
data['coc_vp_s5'] = data.apply(lambda row: (row['coc_member_s5'] * 100) / count_coc_member_s5 if row['coc_member_s5'] == 1 else 0, axis=1)

count_dab_member_s5 = data['dab_member_s5'].sum()
data['dab_vp_s5'] = data.apply(lambda row: (row['dab_member_s5'] * 100) / count_dab_member_s5 if row['dab_member_s5'] == 1 else 0, axis=1)


In [13]:
# Define the weights 
weights_season_3_4 = {
    'th_vp': 40,
    'ch_vp_r2': 45,
    'gc_vp_s3': 15,
    'gc_vp_s4': 15
}

weights_season5 = {
    'th_vp': 32,
    'ch_vp_r2': 35,
    'ch_vp_r3': 35,
    'ch_vp_r4': 35,
    'gc_vp_s3': 10,
    'gc_vp_s4': 10,
    'gc_vp_s5': 10,
    'gc_vp_mm_s5': 3,
    'coc_vp_s5': 4,
    'dab_vp_s5': 3
}

In [14]:
round2_season3 = ['th_vp', 'ch_vp_r2', 'gc_vp_s3']
round2_season4 = ['th_vp', 'ch_vp_r2', 'gc_vp_s4']
round2_season5 = ['th_vp', 'ch_vp_r2', 'gc_vp_s5', 'gc_vp_mm_s5', 'dab_vp_s5', 'coc_vp_s5']
round3_season5 = ['th_vp', 'ch_vp_r3', 'gc_vp_s5', 'gc_vp_mm_s5', 'dab_vp_s5', 'coc_vp_s5']
round4_season5 = ['th_vp', 'ch_vp_r4', 'gc_vp_s5', 'gc_vp_mm_s5', 'dab_vp_s5', 'coc_vp_s5']

In [15]:
# Multiply each chosen column by its respective weight and sum the results
data['influence_r2_s3'] = sum(data[col] * (weights_season_3_4[col] / 100) for col in round2_season3)
data['influence_r2_s4'] = sum(data[col] * (weights_season_3_4[col] / 100) for col in round2_season4)
data['influence_r2_s5'] = sum(data[col] * (weights_season5[col] / 100) for col in round2_season5)
data['influence_r3_s5'] = sum(data[col] * (weights_season5[col] / 100) for col in round3_season5)
data['influence_r4_s5'] = sum(data[col] * (weights_season5[col] / 100) for col in round4_season5)

In [16]:
data.columns

Index(['delegate', 'voting_power', 'th_vp', 'ch_member_r2', 'ch_vp_r2',
       'ch_member_r3', 'ch_vp_r3', 'ch_member_r4', 'ch_vp_r4', 'gc_member_s3',
       'gc_vp_s3', 'gc_member_s4', 'gc_vp_s4', 'gc_member_s5', 'gc_vp_s5',
       'gc_member_mm_s5', 'gc_vp_mm_s5', 'sc_member_s5', 'sc_vp_s5',
       'coc_member_s5', 'coc_vp_s5', 'dab_member_s5', 'dab_vp_s5',
       'influence_r2_s3', 'influence_r2_s4', 'influence_r2_s5',
       'influence_r3_s5', 'influence_r4_s5'],
      dtype='object')

In [17]:
data.to_csv("../Data_Source/data.csv", index=False)