In [1]:
# Requirements
import pandas as pd
import numpy as np
import datetime as dt
import networkx as nx
import statsmodels.api as sm

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Get DataFrame
wiki = pd.read_csv('wikiRfA.csv')
wiki.head()

Unnamed: 0,SOURCE,TARGET,VOTE,RESULT,YEAR,DATE,TEXT
0,Steel1943,BDD,1,1,2013,19/04/2013 23:13,'''Support''' as co-nom.
1,Cuchullain,BDD,1,1,2013,20/04/2013 01:04,'''Support''' as nominator.--
2,INeverCry,BDD,1,1,2013,19/04/2013 23:43,'''Support''' per noms.
3,Cncmaster,BDD,1,1,2013,20/04/2013 00:11,'''Support''' per noms. BDD is a strong contri...
4,Miniapolis,BDD,1,1,2013,20/04/2013 00:56,"'''Support''', with great pleasure. I work wit..."


In [3]:
# Adjust Date Format
wiki['DATE'] = pd.to_datetime(wiki['DATE'], infer_datetime_format=True)

# Get Month Variable
wiki['MONTH'] = pd.to_datetime(wiki['DATE']).dt.month

# Election Identifier
wiki['ELECTION'] = wiki['TARGET'] + ', ' + wiki['MONTH'].astype(str) 

In [4]:
# Remove missing values
wiki = wiki.dropna()

In [5]:
wiki.head()

Unnamed: 0,SOURCE,TARGET,VOTE,RESULT,YEAR,DATE,TEXT,MONTH,ELECTION
0,Steel1943,BDD,1,1,2013,2013-04-19 23:13:00,'''Support''' as co-nom.,4.0,"BDD, 4.0"
1,Cuchullain,BDD,1,1,2013,2013-04-20 01:04:00,'''Support''' as nominator.--,4.0,"BDD, 4.0"
2,INeverCry,BDD,1,1,2013,2013-04-19 23:43:00,'''Support''' per noms.,4.0,"BDD, 4.0"
3,Cncmaster,BDD,1,1,2013,2013-04-20 00:11:00,'''Support''' per noms. BDD is a strong contri...,4.0,"BDD, 4.0"
4,Miniapolis,BDD,1,1,2013,2013-04-20 00:56:00,"'''Support''', with great pleasure. I work wit...",4.0,"BDD, 4.0"


In [6]:
# Number of Elections
wiki['ELECTION'].nunique() 

4522

In [7]:
# Number of Nominees
wiki['TARGET'].nunique()

3445

Therefore, from the 1,585 individuals who lost election there were approximately 1,077 re-trys.

In [8]:
elections = set(wiki['ELECTION'].tolist())

In [9]:
# Get Matrices Needed for Peer Effects Regressions

IGY = []
IGGY = []

for i in elections:
    election_df = wiki[wiki['ELECTION'] == i]
    
    # Create the graph & get adjacency matrix
    G_elec = nx.from_pandas_edgelist(election_df, 'SOURCE', 'TARGET', create_using=nx.MultiDiGraph())
    G = nx.to_numpy_array(G_elec)
    
    # Get Identity Matrix
    I = np.identity(G.shape[0])
    
    # Get Y variable
    y_df = election_df.groupby(['SOURCE']).agg(VOTE = ('VOTE', pd.Series.median))
    y = y_df['VOTE'].values
    
    # Get (I-G)
    IG = np.subtract(I, G)
    
    # Get (I-G)y
    try:
        IGy = np.matmul(IG, y)
    except: 
        y = np.append(y, 1)
        IGy = np.matmul(IG, y)
    IGY.append(IGy)
    
    # Get (I-G)Gy
    IGG = np.matmul(IG, G)
    IGGy = np.matmul(IGG, y)
    IGGY.append(IGGy)

In [10]:
# Estimate Simple Peer Effects Model

B = []
p_values = []

for i in range(len(elections)):
    # Regress and get parameters
    lin_reg = sm.OLS(IGY[i], IGGY[i], hasconst=False)
    results = lin_reg.fit()
    beta = results.params[0]
    p = results.pvalues[0]
    B.append(beta)
    p_values.append(p)

In [11]:
# Get descriptive statistics

des_stats = pd.DataFrame()
des_stats['election'] = list(elections)
des_stats['beta'] = B
des_stats['p'] = p_values

des_stats.describe()

Unnamed: 0,beta,p
count,4522.0,4151.0
mean,-0.436281,0.2785158
std,0.489939,0.3661104
min,-2.6,7.334331000000001e-129
25%,-0.705817,0.0001382014
50%,-0.277778,0.09361919
75%,-0.008774,0.3735695
max,1.134615,1.0


In [12]:
# Find number of significant and insignificant peer effects

# 5% significance
def sig5(x):
    return x < 0.05

# 10% significance
def sig10(x):
    return x < 0.1

print('The Number of significant peer effects at the 5% level: ', sum(sig5(x) for x in p_values))
print('The Number of significant peer effects at the 10% level: ', sum(sig10(x) for x in p_values))

The Number of significant peer effects at the 5% level:  1842
The Number of significant peer effects at the 10% level:  2100


In [13]:
# Analyse Significant Elections

sig5_elec = des_stats[des_stats['p'] < 0.05]
sig5_elec.describe()

Unnamed: 0,beta,p
count,1842.0,1842.0
mean,-0.768205,0.005932385
std,0.463728,0.01190814
min,-2.6,7.334331000000001e-129
25%,-1.057158,1.085749e-09
50%,-0.717186,2.755176e-05
75%,-0.412084,0.004499536
max,1.134615,0.04985046


In [14]:
# Analyse Positive Peer Effects

pos_elec = des_stats[des_stats['beta'] > 0]
pos_elec.describe()

Unnamed: 0,beta,p
count,5.0,5.0
mean,0.506494,0.01159439
std,0.432114,0.02201654
min,0.074468,6.497071e-15
25%,0.203704,2.528358e-13
50%,0.375,0.003058879
75%,0.744681,0.00406868
max,1.134615,0.05084437


In [15]:
pos_elec['election']

891         Redwolf24, 7.0
2723           JesseW, 8.0
3777            Femto, 2.0
4117    Ricardo Lagos, 3.0
4456     David Kernow, 9.0
Name: election, dtype: object