# Eurovision 2022 (Draft Notebook)


## Potential Research Questions

1. Are Jury Voters less affected by Cultural Influences in Eurovision ?
2. [Needs to be Formulated Well] Does a sympathy-wave effect for Ukraine exist in 2022 Eurovision results?  How can we account for that?


## Plan

### 1. Pre-Analysis or Exploratory Data Analysis

Worth doing again, with newer data.

1. Basically, redo figure 7 from the paper with the newest data

## 2. Agent-Based Models

1. Null Model
2. Model 1
3. Cultural Affinity Model
   a. Cultural Affinity Model part 2
   
   TODO: We need to adjust the formula now => Or just vary alpha as well?


## 3. Find Best Fit

1. Kolmogorov-Smirnov test
2. Hypothesis: Simulated and Empirical FoF come from the same distribution.
    3. This was rejected in the original paper
    
    
### Further Work
1. Consider doing section 5
2. Ambitious: Section 6


Suggestion: Focus on the ABMs as opposed to fully recreating the full paper (K-S tests etc.) 
Also try various things:  subsets of networks.  what happens if you take away a node etc etc.  Give extra weight to Ukraine itself?

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import networkx as nx
import matplotlib.pyplot as plt
from mesa import Model, Agent
from mesa.time import RandomActivation
import pyvis.network as pyvisnetwork
from mesa.space import NetworkGrid
from mesa.datacollection import DataCollector


from scipy.stats import rankdata
from scipy.stats import ks_2samp

# 1. Exploratory Data Analysis

First, we load the data for the various years and then visualize the network for a given year

In [2]:
#df = pd.read_excel('https://query.data.world/s/cuokq5sbbqlogvjrdwdeacvp367xnq')
df = pd.read_csv("data/dataworld.csv")
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Year,(semi-) final,Edition,Jury or Televoting,From country,To country,Points,Duplicate
0,0,0,1975,f,1975f,J,Belgium,Belgium,0,x
1,1,1,1975,f,1975f,J,Belgium,Finland,0,
2,2,2,1975,f,1975f,J,Belgium,France,2,
3,3,3,1975,f,1975f,J,Belgium,Germany,0,
4,4,4,1975,f,1975f,J,Belgium,Ireland,12,
...,...,...,...,...,...,...,...,...,...,...
56307,56307,56307,2022,f,2022f,T,United Kingdom,Serbia,1,
56308,56308,56308,2022,f,2022f,T,United Kingdom,Spain,5,
56309,56309,56309,2022,f,2022f,T,United Kingdom,Sweden,4,
56310,56310,56310,2022,f,2022f,T,United Kingdom,Switzerland,0,


In [3]:
df.rename(columns=lambda x: x.strip(), inplace=True)
df.rename(columns={
    "(semi-) final": "semi_or_final",
    "Jury or Televoting": "jury_or_televoting",
    "From country": "from",
    "To country": "to",
    "Points": "points"
}, inplace=True)

In [4]:
df_finals = df[df["semi_or_final"] == 'f']
df_finals_televoting = df_finals[df_finals["jury_or_televoting"] == "T"]
df_finals_jury = df_finals[df_finals["jury_or_televoting"] == "J"]


display(df_finals_televoting)
display(df_finals_jury)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Year,semi_or_final,Edition,jury_or_televoting,from,to,points,Duplicate
36352,36352,36352,2016,f,2016f,T,Albania,Armenia,2,
36353,36353,36353,2016,f,2016f,T,Albania,Australia,12,
36354,36354,36354,2016,f,2016f,T,Albania,Austria,0,
36355,36355,36355,2016,f,2016f,T,Albania,Azerbaijan,0,
36356,36356,36356,2016,f,2016f,T,Albania,Belgium,0,
...,...,...,...,...,...,...,...,...,...,...
56307,56307,56307,2022,f,2022f,T,United Kingdom,Serbia,1,
56308,56308,56308,2022,f,2022f,T,United Kingdom,Spain,5,
56309,56309,56309,2022,f,2022f,T,United Kingdom,Sweden,4,
56310,56310,56310,2022,f,2022f,T,United Kingdom,Switzerland,0,


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Year,semi_or_final,Edition,jury_or_televoting,from,to,points,Duplicate
0,0,0,1975,f,1975f,J,Belgium,Belgium,0,x
1,1,1,1975,f,1975f,J,Belgium,Finland,0,
2,2,2,1975,f,1975f,J,Belgium,France,2,
3,3,3,1975,f,1975f,J,Belgium,Germany,0,
4,4,4,1975,f,1975f,J,Belgium,Ireland,12,
...,...,...,...,...,...,...,...,...,...,...
56283,56283,56283,2022,f,2022f,J,United Kingdom,Serbia,1,
56284,56284,56284,2022,f,2022f,J,United Kingdom,Spain,10,
56285,56285,56285,2022,f,2022f,J,United Kingdom,Sweden,12,
56286,56286,56286,2022,f,2022f,J,United Kingdom,Switzerland,2,


Let's visualize!

# Agent Based Modelling

In [5]:
SCORES = np.array([12, 10, 8, 7, 6, 5, 4, 3, 2, 1])
alpha = 0.1

In [6]:
def calc_All_FoF(df, from_country, points_column = "points", FoF_column = "FoF"):
    Voter_selector = df["from"] == from_country

    voting_countries_2022 = df["from"].unique()


    N = len(voting_countries_2022)
    
    for to_country in df["to"].unique():
        Candidate_selector = df["to"] == to_country

        final_score = sum(df.loc[Candidate_selector, points_column])

        points = df.loc[Voter_selector & Candidate_selector, points_column]

        df.loc[Voter_selector & Candidate_selector, FoF_column ] =  ( points / 12 ) - ( ( final_score - points )/( 12*(N-2) ) )

## Null Model

In this null model, countries freely vote for other countries.  The only restriction is that they can assign only a fixed amount of points

In [7]:
class NullVotingAgent(Agent):
    
    def __init__(self, unique_id, model, df):
        '''
         Create a new Null Voting Agent (A "country")

         Args:
            unique_id: Unique identifier for the agent (A country name)
        '''
        
        super().__init__(unique_id, model)
        
        self.df = df
        
        
    def step(self):
        '''
        Run one step of the agent.
        '''
        
        #print(f"Country {self.unique_id} is now voting")
        
        country_row_selector = self.df["from"] == self.unique_id

        fit = np.random.uniform(0, 1, size=len( self.df[country_row_selector] ))

        points_given = 58 * (fit/sum(fit))
        
        #print(f"{voting_country=}, {points_given=}")
        self.df.loc[country_row_selector, "null_model_points"] += points_given

In [8]:
def get_null_model_ks(model):
    
    data1 = model.df["FoF"]
    data2 = model.df["null_model_FoF"]
    
    return ks_2samp(data1, data2)

class NullVotingModel(Model):
    
    def __init__(self, jury_or_televoting, year):
        '''
        Create a new NullVotingModel model.

         Args:
            jury_or_televoting: a string with values  
                * J: Jury
                * T: Televoting
            year: year of competition
        '''
        
        super().__init__()
        
        df_finals_type = df_finals[df_finals["jury_or_televoting"] == jury_or_televoting]
        df_finals_televoting_year = df_finals_type[df_finals_type["Year"]==year].copy()

        self.df = df_finals_televoting_year
        
        self.datacollector = DataCollector(
            model_reporters={"ks": get_null_model_ks}
        )
        
        self.schedule = RandomActivation(self)
        
        
        self.df["null_model_points"] = 0
        self.df["FoF"] = 0
        self.df["null_model_FoF"] = 0

        voting_countries = self.df["from"].unique()

        for voting_country in voting_countries:
            country_row_selector = self.df["from"] == voting_country
            the_agent = NullVotingAgent(voting_country, self, self.df)
            self.schedule.add(the_agent)
            

    def step(self):
        '''
        Run one step of the model. If All agents are happy, halt the model.
        '''
        
        
        self.schedule.step()
        
        #print("Going to start calc all the FoF")
        for voting_country in self.df["from"].unique():
            calc_All_FoF(self.df, voting_country, 'points', "FoF")        
            calc_All_FoF(self.df, voting_country, 'null_model_points', "null_model_FoF")
            
        self.datacollector.collect(self)

## Model1 ABM
This model also gives points at random, but follows the rules of the contest

In [9]:
class Model1Agent(Agent):
    
    def __init__(self, unique_id, model, df):
        '''
         Create a new Model 1 Voting Agent (A "country")

         Args:
            unique_id: Unique identifier for the agent (A country name)
        '''
        
        super().__init__(unique_id, model)
        self.df = df
        
        
    def step(self):
        '''
        Run one step of the agent.
        '''
        
        country_row_selector = self.df["from"] == self.unique_id

        fit = pd.Series( np.random.uniform(0, 1, size=len(self.df[country_row_selector])) )
        rank = fit.rank(ascending=False)
        points_given = rank.apply(
            lambda x: SCORES[int(x) - 1] if x <= 10 else 0
        )
        
        #print(f"{rank=}, {voting_country=}, {type(points_given)=}")
        self.df.loc[country_row_selector, "model1_points"] += points_given.to_numpy()

In [10]:
def get_model1_ks(model):
    
    data1 = model.df["FoF"]
    data2 = model.df["model1_FoF"]
    
    return ks_2samp(data1, data2)


class Model1Model(Model):
    
    def __init__(self, jury_or_televoting, year):
        '''
        Create a new NullVotingModel model.

         Args:
            jury_or_televoting: a string with values  
                * J: Jury
                * T: Televoting
            year: year of competition
        '''
        
        super().__init__()
        
        df_finals_type = df_finals[df_finals["jury_or_televoting"] == jury_or_televoting]
        df_finals_televoting_year = df_finals_type[df_finals_type["Year"]==year].copy()

        self.df = df_finals_televoting_year
        
        self.datacollector = DataCollector(
            model_reporters={"ks": get_model1_ks}
        )
        
        self.df["model1_points"] = 0
        
        self.schedule = RandomActivation(self)
        

        voting_countries = self.df["from"].unique()

        for voting_country in voting_countries:
            country_row_selector = self.df["from"] == voting_country
            a = Model1Agent(voting_country, self, self.df)
            self.schedule.add(a)
            

    def step(self):
        '''
        Run one step of the model.
        '''
        
        self.schedule.step()
        
        #print("Going to start calc all the FoF")
        for voting_country in self.df["from"].unique():
            calc_All_FoF(self.df, voting_country, 'points', "FoF")        
            calc_All_FoF(self.df, voting_country, 'model1_points', "model1_FoF")
            
        self.datacollector.collect(self)

## Affinity Model

In [21]:
class AffinityModelAgent(Agent):
    
    def __init__(self, unique_id, model, df, alpha, cultural_network):
        '''
         Create a new Model 1 Voting Agent (A "country")

         Args:
            unique_id: Unique identifier for the agent (A country name)
        '''
        
        super().__init__(unique_id, model)
        
        self.df = df
        self.cultural_network = cultural_network
        self.alpha = alpha
        
    def step(self):
        '''
        Run one step of the agent.
        '''
        
        country_row_selector = self.df["from"] == self.unique_id
        
        def affinity_fit(row):
            '''
                row: a dataframe row
            '''
            q = float(self.df["relative_score"].sample(1))

            from_country = row['from']
            to_country = row['to']


            network_part = nx.get_edge_attributes(self.cultural_network, "weight")[(from_country, to_country)]

            the_fit = self.alpha * q + network_part * (1 - self.alpha)

            return the_fit

        
        country_df = self.df[country_row_selector]

        fit = country_df.apply(
            lambda row: affinity_fit(row), axis=1
        )
        #print(f"{unique_id=}, {fit=}")
        rank = fit.rank(ascending=False)
        points_given = rank.apply(
            lambda x: SCORES[int(x) - 1] if x <= 10 else 0
        )
        
        self.df.loc[country_row_selector, "affinity_model_points"] += points_given.to_numpy()


In [22]:
def get_affinity_ks(model):
    
    data1 = model.df["FoF"]
    data2 = model.df["affinity_model_FoF"]
    
    return ks_2samp(data1, data2)


class AffinityModel(Model):
    
    def __init__(self,jury_or_televoting, year, mu, sigma, alpha):
        '''
        Create a new NullVotingModel model.

         Args:
        '''
        
        super().__init__()
        
        self.datacollector = DataCollector(
            model_reporters={"ks": get_affinity_ks}
        )

        
        df_finals_type = df_finals[df_finals["jury_or_televoting"] == jury_or_televoting]
        df_finals_televoting_year = df_finals_type[df_finals_type["Year"]==year].copy()
        self.df = df_finals_televoting_year
        
        self.df["affinity_model_points"] = 0
        
        self.df["relative_score"] = self.df["points"]/sum(self.df["points"])
        
        
        self.cultural_network = nx.from_pandas_edgelist(
            self.df,
            source="from",
            target="to",
            create_using=nx.DiGraph()
        )

        self.mu = mu
        self.sigma = sigma
        self.alpha = alpha

        dict_weight = {}
        for edge in self.cultural_network.edges():
            sampled_weight = np.random.normal(mu, sigma)
            dict_weight[edge] = sampled_weight
        nx.set_edge_attributes(self.cultural_network, values = dict_weight, name = 'weight')
        
        

        voting_countries = self.df["from"].unique()
        
        self.schedule = RandomActivation(self)

        for voting_country in voting_countries:
            a = AffinityModelAgent(voting_country, self, self.df, self.alpha, self.cultural_network)
            self.schedule.add(a)
            

    def step(self):
        '''
        Run one step of the model.
        '''
        
        self.schedule.step()
        
        #print("Going to start calc all the FoF")
        for voting_country in self.df["from"].unique():
            calc_All_FoF(self.df, voting_country, 'points', "FoF")        
            calc_All_FoF(self.df, voting_country, 'affinity_model_points', "affinity_model_FoF")
            
        self.datacollector.collect(self)

# 3. Run the ABMs in a batch for various years



# 4. Friend-Or-Foe for the ABMS

Now that we have Various models and also empirical data, we can calculate the Friend-or-Foe coefficients for each of them

## Batch Run

In [13]:
params_null = {
    "jury_or_televoting": "T",
    "year": [2021, 2022]    # 2020 Contest was cancelled due to COVID-19
}

In [14]:
from mesa.batchrunner import batch_run

results = batch_run(
    NullVotingModel,
    parameters=params_null,
    iterations=10,
    max_steps=1,
    number_processes=None,
    data_collection_period=-1,
    display_progress=True,
)

20it [00:09,  2.12it/s]


In [15]:
pd.DataFrame(results)

Unnamed: 0,RunId,iteration,Step,jury_or_televoting,year,ks
0,0,0,1,T,2022,"(0.19692307692307692, 6.045987196332814e-17)"
1,1,1,1,T,2022,"(0.20307692307692307, 5.314625842032393e-18)"
2,2,2,1,T,2022,"(0.2082051282051282, 6.606060603185696e-19)"
3,3,3,1,T,2022,"(0.19487179487179487, 1.336752208138629e-16)"
4,4,0,1,T,2021,"(0.23481781376518218, 2.7014288578343096e-24)"
5,5,4,1,T,2022,"(0.20205128205128206, 8.012925594735214e-18)"
6,6,1,1,T,2021,"(0.2540485829959514, 2.0718000616124288e-28)"
7,7,2,1,T,2021,"(0.23279352226720648, 7.001514394750191e-24)"
8,8,3,1,T,2021,"(0.23076923076923078, 1.799179459889151e-23)"
9,9,5,1,T,2022,"(0.2153846153846154, 3.2591073567185353e-20)"


In [16]:
params_model1 = {
    "jury_or_televoting": "T",
    "year": [2021, 2022]    # 2020 Contest was cancelled due to COVID-19
}

In [17]:
from mesa.batchrunner import batch_run

results = batch_run(
    Model1Model,
    parameters=params_model1,
    iterations=10,
    max_steps=1,
    number_processes=None,
    data_collection_period=-1,
    display_progress=True,
)

20it [00:10,  1.99it/s]


In [18]:
pd.DataFrame(results)

Unnamed: 0,RunId,iteration,Step,jury_or_televoting,year,ks
0,0,0,1,T,2022,"(0.3723076923076923, 1.5931966500532568e-60)"
1,1,0,1,T,2021,"(0.3917004048582996, 5.094950047473383e-68)"
2,2,1,1,T,2022,"(0.3394871794871795, 3.48596777907859e-50)"
3,3,1,1,T,2021,"(0.3937246963562753, 9.696461526990877e-69)"
4,4,2,1,T,2022,"(0.3558974358974359, 3.1826503732390554e-55)"
5,5,3,1,T,2022,"(0.3446153846153846, 9.892277140437767e-52)"
6,6,4,1,T,2022,"(0.37743589743589745, 3.102916166595395e-62)"
7,7,5,1,T,2022,"(0.34974358974358977, 2.6485764714044703e-53)"
8,8,2,1,T,2021,"(0.35931174089068824, 4.7437879590710686e-57)"
9,9,3,1,T,2021,"(0.3684210526315789, 4.9744639321358906e-60)"


In [19]:
params_affinity = {
    "jury_or_televoting": "T",
    "year": [2021, 2022],    # 2020 Contest was cancelled due to COVID-19
    "mu": np.linspace(-0.1, 0.1, 21),
    "sigma": np.linspace(0, 0.1, 21),
    "alpha": np.linspace(0, 0.5, 3)
} 

In [23]:
from mesa.batchrunner import batch_run

results = batch_run(
    AffinityModel,
    parameters=params_affinity,
    iterations=10,
    max_steps=1,
    number_processes=None,
    data_collection_period=-1,
    display_progress=True,
)

26460it [5:28:37,  1.34it/s]


In [24]:
res_affinity = pd.DataFrame(results)
res_affinity.to_csv("res_affinity.csv")

In [25]:
res_affinity = pd.read_csv("res_affinity.csv")

In [26]:
display(res_affinity)

Unnamed: 0.1,Unnamed: 0,RunId,iteration,Step,jury_or_televoting,year,mu,sigma,alpha,ks
0,0,0,0,1,T,2021,-0.1,0.015,0.25,"KstestResult(statistic=0.5668016194331984, pva..."
1,1,1,0,1,T,2021,-0.1,0.005,0.50,"KstestResult(statistic=0.5354251012145749, pva..."
2,2,2,0,1,T,2021,-0.1,0.005,0.25,"KstestResult(statistic=0.555668016194332, pval..."
3,3,3,0,1,T,2021,-0.1,0.025,0.00,"KstestResult(statistic=0.5688259109311741, pva..."
4,4,4,0,1,T,2021,-0.1,0.000,0.50,"KstestResult(statistic=0.4048582995951417, pva..."
...,...,...,...,...,...,...,...,...,...,...
26455,26455,26455,9,1,T,2022,0.1,0.095,0.00,"KstestResult(statistic=0.5415384615384615, pva..."
26456,26456,26456,9,1,T,2022,0.1,0.100,0.25,"KstestResult(statistic=0.5323076923076923, pva..."
26457,26457,26457,9,1,T,2022,0.1,0.095,0.50,"KstestResult(statistic=0.56, pvalue=3.97187096..."
26458,26458,26458,9,1,T,2022,0.1,0.100,0.00,"KstestResult(statistic=0.5323076923076923, pva..."


In [27]:
res_affinity[res_affinity["year"] == 2021]["ks"]

0        KstestResult(statistic=0.5668016194331984, pva...
1        KstestResult(statistic=0.5354251012145749, pva...
2        KstestResult(statistic=0.555668016194332, pval...
3        KstestResult(statistic=0.5688259109311741, pva...
4        KstestResult(statistic=0.4048582995951417, pva...
                               ...                        
25132    KstestResult(statistic=0.5475708502024291, pva...
25133    KstestResult(statistic=0.5597165991902834, pva...
25134    KstestResult(statistic=0.5688259109311741, pva...
25135    KstestResult(statistic=0.541497975708502, pval...
25137    KstestResult(statistic=0.5232793522267206, pva...
Name: ks, Length: 13230, dtype: object