In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
import re
import random
import math
from itertools import combinations
from collections import defaultdict


party_letters = {'ALT' : 'Alternativet',
    'BP' : 'Borgernes Parti',
    'DD' : 'Danmarksdemokraterne',
    'DF' : 'Dansk folkeparti',
    'EL' : 'Enhedslisten',
    'FG' : 'Frie Grønne',
    'IA' : 'Inuit Ataqatigiit',
    'JF' : 'Javnaðarflokkurin',
    'KD' : 'Kristendemokraterne',
    'KF' : 'Konservative Folkeparti',
    'LA' : 'Liberal Alliance',
    'M' : 'Moderaterne',
    'N' : 'Naleraq',
    'NB' : 'Nye Borgerlige',
    'RV' : 'Radikale Venstre',
    'S' : 'Socialdemokratiet',
    'SF' : 'Socialistisk Folkeparti',
    'SIU' : 'Siumut',
    'SP' : 'Sambandsflokkurn', 
    'V' : 'Venstre'}

## Load the data and parse it into a database
Data can be updated with the data_getter.ipynb notebook. 

In [None]:
with open('ft_votes_2026-02-27.json','r') as file:
    values = json.load(file)

Due to inconsistent formatting, parsing the text about the votes is very fiddly. This is where the rules about ignoring data are defined.

In [None]:
def parse_text_blocks(full_text):
    text = full_text['konklusion']
    id = full_text['id']
    approval_bool = full_text['vedtaget']
    if text == "":
        return None # Ignore empty text
    if len(text) > 1000:
        return None # Ignore very long texts, since they are an error
    
    if not "(" in text:
        return None # Ignore text without brackets
    # Normalize newlines and split into four parts
    if "\n" in text:
        #parts = re.split(r'\r\n\r\n|\n{1,2}|\.', text.strip(), maxsplit=3)
        parts = re.split(r'\r\n\r\n|\n{1,2}', text.strip(), maxsplit=3)
    elif brackets_balanced(text):
        parts = re.split(r'stemte', text.strip(), maxsplit=3)
    else:
        return None # Ignore text which has neither newlines nor balanced brackets


    # Ensure we always have exactly 4 parts
    while len(parts) < 4:
        parts.append("")

    part1, part2, part3, part4 = parts

    # --- Part 1: forkastet / vedtaget ---
    if re.search(r'forkastet', part1, re.IGNORECASE):
        status = False
    elif re.search(r'vedtaget', part1, re.IGNORECASE):
        status = True
    else:
        return None # Ignore votes without a passed or failed conclusion
    
    if status != approval_bool:
        return None # Ignore votes where the status of the vote disagrees with the text description

    # --- Regex for parts 2–4 ---
    pattern = r'(?:(?<=^)|(?<=[\s,()\[\]]))(?!UFG(?:$|[\s,()\[\]]))[A-Z]{1,3}(?=$|[\s,()\[\]])'

    results = []
    for section in (part2, part3, part4):
        found = re.findall(pattern, section)
        number = re.findall(r'\d+',section)
        if number == []:
            number = [0]
        results.append((found, [int(x) for x in number]))

    if set.intersection(set(results[0][0]),set(results[1][0]),set(results[2][0])) != set():
        return None # Ignore votes where one party is found voting both for and against. This is the case for "løsgængere".

    #return id, status, results
    return {'id':id, 
            'passed': status, 
            'for_parties': results[0][0], 
            'for_votes': results[0][1][0],
            'against_parties': results[1][0], 
            'against_votes': results[1][1][0],
            'neutral_parties': results[2][0],
            'neutral_votes' : results[2][1][0]
            }

def brackets_balanced(s):
    stack = []
    pairs = {')': '(', ']': '[', '}': '{'}

    for char in s:
        if char in pairs.values():          # opening brackets
            stack.append(char)
        elif char in pairs:                 # closing brackets
            if not stack or stack[-1] != pairs[char]:
                return False
            stack.pop()

    return len(stack) == 0


Construct the database and check that for each passed votes, there are more for votes than against votes.

In [None]:
database = []
for page in range(len(values)):
    for i in range(len(values[page])): # Entry on page
        text = values[page][i]
        parsed = parse_text_blocks(text)
        if parsed != None:
            database += [parsed]

# Check that passed votes have more votes for than against
for d in database:
    if d['passed'] and (d['for_votes']<d['against_votes']):
        print(d['id'])
        print(d['for_votes'])
        print(d['against_votes'])



Analyze the party stats

In [None]:
# Identify unique party names
party_set = set()
for d in database:
    party_set.update(d['for_parties'])
    party_set.update(d['against_parties'])
    party_set.update(d['neutral_parties'])

# Set only Danish parties
dk_party_set = set(['ALT', 'BP','DD','DF','EL','FG','KD','KF','LA','M','NB','RV','S','SF','V'])

# Set up library of party stats
party_stats = {}
n = 0
for p in party_set:
    party_stats[p] = {'id' : n, 'for' : 0, 'against' : 0, 'neutral' : 0, 'for_wins' : 0, 'against_wins' : 0}
    n += 1

for d in database:
    # Count wins and losses
    for p in d['for_parties']:
        party_stats[p]['for'] += 1
        if d['passed']:
            party_stats[p]['for_wins'] += 1
    for p in d['against_parties']:
        party_stats[p]['against'] += 1
        if not d['passed']:
            party_stats[p]['against_wins'] += 1
    for p in d['neutral_parties']:
        party_stats[p]['neutral'] += 1
    
for p in party_stats:
    d = party_stats[p]
    d['total_votes'] = d['for'] + d['against'] + d['neutral']
    d['positivity'] = d['for']/d['total_votes']
    d['win_fraction'] = (d['for_wins'] + d['against_wins'])/d['total_votes']

    if d['for'] != 0:
        d['for_win_fraction'] = d['for_wins']/d['for']
    else: 
        d['for_win_fraction'] = None

    if d['against'] != 0:
        d['against_win_fraction'] = d['against_wins']/d['against']
    else: 
        d['against_win_fraction'] = None



Calculate and plot win fractions

In [None]:
election_parties = ['BP','DF','DD','LA','KF','V','M','S','RV','SF','ALT','EL']
election_party_set = set(election_parties)
dk_party_stats = {}
for p in party_stats:
    if p in election_party_set:
        dk_party_stats[p] = party_stats[p] 
sorted_keys = sorted(dk_party_stats, key=lambda k: dk_party_stats[k]["win_fraction"])

print('For website:')
[print('{ short: \'' + p + '\', score: ' + str(round(y,4)) + ', label: \'' + str(round(y*100)) + '%\'},') for p,y in zip(sorted_keys,y_values)]

fig, ax = plt.subplots()

x_positions = list(range(len(sorted_keys)))
y_values = [party_stats[k]['win_fraction'] for k in sorted_keys]

ax.stem(x_positions, y_values, linefmt='-', markerfmt='o', basefmt=' ')

ax.set_xticks(x_positions)
ax.set_xticklabels(sorted_keys, rotation=-45)#[party_letters[p] for p in sorted_keys]
ax.set_ylabel('Magtfaktor')

plt.show()


Calculate and plot agreement matrix

In [None]:
def count_party_agreements(votes):
    agreement_counts = defaultdict(int)
    
    for vote in votes:
        for group in ['for_parties', 'against_parties', 'neutral_parties']:
            parties = vote[group]
            for party1, party2 in combinations(sorted(parties), 2):
                agreement_counts[(party1, party2)] += 1
    
    return dict(agreement_counts)

agreement_counts = count_party_agreements(database)
norm_agreement_counts = agreement_counts.copy()
election_parties = ['BP','DF','DD','LA','KF','V','M','S','RV','SF','ALT','EL'] # This turns out to be the optimal ordering
#election_parties.reverse()
random.shuffle(election_parties) # To check, one can shuffle to help the annealer start off
N = len(election_parties)
correlation_matrix = np.zeros((N,N),'float')

for keys in agreement_counts:
    party1, party2 = keys
    count = agreement_counts[keys]
    norm_factor = min(party_stats[party1]['total_votes'], party_stats[party2]['total_votes'])
    norm_agreement_counts[keys] = norm_agreement_counts[keys]/norm_factor
    if party1 in election_parties and party2 in election_parties:
        correlation_matrix[election_parties.index(party1),election_parties.index(party2)] = norm_agreement_counts[keys]
        correlation_matrix[election_parties.index(party2),election_parties.index(party1)] = norm_agreement_counts[keys]

for i in range(N):
    correlation_matrix[i,i] = 1

In [None]:
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111)
ax.matshow(correlation_matrix)
ax.set_xticks(np.arange(N))
ax.set_yticks(np.arange(N))
ax.set_xticklabels(election_parties)
ax.set_yticklabels(election_parties)

for (i, j), z in np.ndenumerate(correlation_matrix*100):
    ax.text(j, i, '{:0.1f}%'.format(z), ha='center', va='center',
            bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.3'))

print('Unordered matrix')
plt.show()

Check that the ordering is optimal by defining and ideal matrix, a distance measure and then optimizing with simulated annealing

In [None]:
def distance_to_identity(M):
    # Normalize to [0, 1] 
    M_comp = np.zeros_like(M)
    for i in range(len(M)):
        for j in range(len(M)):
            M_comp[i,j] = 1-(abs(i-j))/24

    #plt.matshow(M_comp)

    return np.linalg.norm(M - M_comp, 'fro')

def simulated_annealing_reorder(M: np.ndarray, n_iterations=100_000, T_start=1.0, T_end=0.001) -> tuple[np.ndarray, float]:
    scores = []
    dists = []
    n = len(M)
    current_order = list(range(n))
    best_order = current_order.copy()
    current_dist = distance_to_identity(M[np.ix_(current_order, current_order)])
    best_dist = current_dist

    for i in range(n_iterations):
        T = T_start * (T_end / T_start) ** (i / n_iterations)
        
        a, b = random.sample(range(n), 2)
        new_order = current_order.copy()
        new_order[a], new_order[b] = new_order[b], new_order[a]
        
        new_dist = distance_to_identity(M[np.ix_(new_order, new_order)])
        delta = new_dist - current_dist
        
        if delta < 0 or random.random() < math.exp(-delta / T):
            current_order = new_order
            current_dist = new_dist
            if new_dist < best_dist:
                best_dist = new_dist
                best_order = new_order.copy()
        
        scores += [best_dist]
        dists += [new_dist]

    return M[np.ix_(best_order, best_order)], best_dist, best_order, scores, dists

ordered_M, best_dist, best_order, scores, dists = simulated_annealing_reorder(correlation_matrix)
print('Optimal ordering')
print(np.array(election_parties)[best_order])
plt.plot(scores)
plt.xlabel('Iterations')
plt.ylabel('Distance')
plt.show()


In [None]:
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111)
ax.matshow(ordered_M)
ax.set_xticks(np.arange(N))
ax.set_yticks(np.arange(N))
ax.set_xticklabels(np.array(election_parties)[best_order])
ax.set_yticklabels(np.array(election_parties)[best_order])

for (i, j), z in np.ndenumerate(ordered_M*100):
    ax.text(j, i, '{:0.1f}%'.format(z), ha='center', va='center',
            bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.3'))

print('Ordered matrix')
plt.show()