# Analytical approaches

One might ask whether the diversity-expertise tradeoff (as modelled by the evidential sources model) can be studied analytically, using approaches from the voting literature. To investigate this, the notebook covers:
1. Lower bound in terms of number of sources and their mean reliability
2. The Cantelli lower bound (in terms of $\mu$ and $\sigma$)
3. Normal approximation. 

> Conclusion: None of these approaches seem useful for studying the diversity-expertise tradeoff in the considered parameter space.

# Lower bound in terms of number of sources and their mean reliability

### Implementation

In [4]:
import math

from scipy.stats import binom
import pandas as pd

from models.team import Team
from models.generate_teams import generate_expert_team, generate_diverse_team
from models.agent import Agent
from models.sources import Sources


In [2]:
def lower_bound_average(team: Team) -> float:
    """ Calculate a lower bound on the probability that the team makes the correct decision 
    (for the evidence-based mechanism) based on the average reliability of the sources they access."""
    sources = team.sources
    sources_accessed = set([s for a in team.members for s in a.heuristic])
    reliabilities_team_sources = [sources.reliabilities[s] for s in sources_accessed]
    n = len(reliabilities_team_sources)
    mean = sum(reliabilities_team_sources) / n
    threshold = math.ceil(n / 2)
    return binom.sf(threshold, n, mean)

sources = Sources(17, ("equi", 0.65, 0.2))
e_team = generate_expert_team(sources, 5, 9)
lower_bound_average(e_team)

0.729659098

In [3]:
def upper_bound_max(team: Team) -> float:
    """ Calculate an upper bound on the probability that the team makes the correct decision
    (for the evidence-based mechanism) based on the maximum reliability of the sources they access."""
    sources = team.sources
    sources_accessed = set([s for a in team.members for s in a.heuristic])
    reliabilities_team_sources = [sources.reliabilities[s] for s in sources_accessed]
    n = len(reliabilities_team_sources)
    rel_max = max(reliabilities_team_sources)
    threshold = math.ceil(n / 2)
    return binom.sf(threshold, n, rel_max)

sources = Sources(17, ("equi", 0.65, 0.2))
e_team = generate_expert_team(sources, 5, 9)
upper_bound_max(e_team)

0.8342742919921875

In [5]:
def generate_lower_average_df() -> pd.DataFrame:
    """Generate a DataFrame comparing expert and diverse teams (for the evidence-based mechanisms)
    based on lower bound average reliability."""
    data = []
    for n in [13, 17]:
        for mean in [0.55, 0.6, 0.65, 0.7, 0.75]:
            sources = Sources(n, ("equi", mean, 0.2))
            e_team = generate_expert_team(sources, 5, 9)
            e_team_accuracy = e_team.accuracy_evidence()
            e_lower = lower_bound_average(e_team)
            e_deviation = e_team_accuracy - e_lower
            e_upper = upper_bound_max(e_team)
            
            d_team = generate_diverse_team(sources, 5, 9)
            d_team_accuracy = d_team.accuracy_evidence()
            d_lower = lower_bound_average(d_team)
            d_deviation = d_team_accuracy - d_lower
            
            DTE = True if d_team_accuracy > e_team_accuracy else False
            DTE_approx = True if d_lower > e_team_accuracy else False
            
            data.append([
                n,
                mean,
                e_deviation,
                e_lower,
                e_team_accuracy,
                e_upper,
                d_deviation,
                d_lower,
                d_team_accuracy,
                DTE,
                DTE_approx, 
            ])
    df = pd.DataFrame(data, columns=[
        "n_sources",
        "rel_mean",
        "e_deviation",
        "e_lower",
        "e_exact",
        "e_upper",
        "d_deviation",
        "d_lower",
        "d_exact",
        "DTE_exact",
        "DTE_approx",
    ])
    return df

### Results

Conclusion:
- The lower bound is not useful to approximate the diversity-expertise tradeoff, since the lower bound for the diverse team is always lower than the expert team’s accuracy, in the considered parameter space.
    - Compare columns `DTE_exact` and `DTE_approx` below.
    - `DTE_exact` shows the diversity-expertise tradeoff in the simulations. (`True` means that diversity trumped expertise.)
    - `DTE_approx` shows the approximated diversity-expertise tradeoff, using the lower bound in terms of number of sources and their mean reliability. 

In [62]:
df_lower_average = generate_lower_average_df()
df_lower_average[["n_sources",
    "rel_mean",
    "d_deviation",
    "d_lower",
    "d_exact",
    "e_exact",
    "DTE_exact",
    "DTE_approx",]]

Unnamed: 0,n_sources,rel_mean,d_deviation,d_lower,d_exact,e_exact,DTE_exact,DTE_approx
0,13,0.55,0.218031,0.426806,0.644837,0.698371,False,False
1,13,0.6,0.198637,0.574396,0.773033,0.799917,False,False
2,13,0.65,0.156791,0.715893,0.872684,0.880559,False,False
3,13,0.7,0.104941,0.834603,0.939544,0.937939,True,False
4,13,0.75,0.057278,0.919787,0.977065,0.97345,True,False
5,17,0.55,0.189435,0.474308,0.663743,0.733887,False,False
6,17,0.6,0.162454,0.640508,0.802962,0.828862,False,False
7,17,0.65,0.115337,0.787238,0.902575,0.9018,True,False
8,17,0.7,0.065845,0.89536,0.961205,0.951614,True,False
9,17,0.75,0.028684,0.959763,0.988447,0.980822,True,False


# Cantelli lower bound

### Implementation

In [1]:
import os
import itertools

from models.agent import Agent
from models.team import Team
from models.sources import Sources
from models.generate_teams import generate_expert_team, generate_diverse_team

In [None]:
def joint_accuracy(agent1: Agent, agent2: Agent) -> float:
    """
    Compute the probability that two agents are BOTH correct.
    Assumes conditional independence given the true state.
    """
    sources = agent1.sources  # assuming both agents share the same Sources object
    rels_shared = [sources.reliabilities[s] for s in agent1.heuristic if s in agent2.heuristic]
    rels_unique_1 = [sources.reliabilities[s] for s in agent1.heuristic if s not in agent2.heuristic]
    rels_unique_2 = [sources.reliabilities[s] for s in agent2.heuristic if s not in agent1.heuristic]
    
    threshold_1, threshold_2 = (len(agent1.heuristic) + 1)//2, (len(agent2.heuristic) + 1)//2
    total = 0.0

    # Enumerate outcomes for shared sources first
    for shared_outcome in itertools.product([0,1], repeat=len(rels_shared)):
        p_shared = 1.0
        for source_rel, bit in zip(rels_shared, shared_outcome):
            p_shared *= source_rel if bit == 1 else (1 - source_rel)

        # agents probability of being correct given shared outcome
        def prob_agent_correct(rels_unique, shared_bits, threshold):
            total = 0.0
            for unique_outcome in itertools.product([0,1], repeat=len(rels_unique)):
                prob = 1.0
                for source_rel, bit in zip(rels_unique, unique_outcome):
                    prob *= source_rel if bit == 1 else (1 - source_rel)
                if sum(shared_bits) + sum(unique_outcome) >= threshold:
                    total += prob
            return total

        p_agent1_cond = prob_agent_correct(rels_unique_1, shared_outcome, threshold_1)
        p_agent2_cond = prob_agent_correct(rels_unique_2, shared_outcome, threshold_2)

        total += p_shared * p_agent1_cond * p_agent2_cond

    return total


def cantelli_bounds(team: Team) -> dict:
    """Given a team of agents, computes the agent accuracies and pairwise joint accuracies,
    then uses Cantelli's inequality to compute lower and upper bounds on the probability
    that the team majority is correct.
    
    Parameters
    ----------
        team: Team object
    
    Returns
    -------
        Lower and upper Cantelli bounds for group correctness.
    """    
    # Step 1: compute each agent's accuracy 
    agent_scores = [agent.score for agent in team.members]
    mu = sum(agent_scores)  # mean of the sum

    # Step 2: compute all pairwise joint accuracies p_ij
    cov_sum = 0.0
    for agent_i, agent_j in itertools.combinations(team.members, 2):
        p_ij = joint_accuracy(agent_i, agent_j)
        cov_sum += p_ij - agent_i.score*agent_j.score

    # Step 3: compute total variance
    var = sum(score_i*(1-score_i) for score_i in agent_scores) + 2 * cov_sum

    # Step 4: Cantelli bounds
    threshold = (team.size + 1) / 2
    lower = 1 - var / (var + (mu - (threshold - 1))**2)
    upper = var / (var + (threshold - mu)**2) if mu < threshold else 1.0

    return dict(mu=mu, var=var, lower=lower, upper=upper, q=agent_scores)

In [None]:
def generate_cantelli_df() -> pd.DataFrame:
    """Generates a DataFrame comparing expert and diverse teams based on Cantelli bounds."""
    data = []
    for n in [13, 17, 21]:
        for mean in [0.55, 0.6, 0.65, 0.7, 0.75]:
            # Retrieve exact accuracies from simulation data
            files = [
                file
                for file in os.listdir("data")
                if file.split("_")[0] == "simulation"
            ]
            for file in files:
                df = pd.read_csv(f"data/{file}")
                if (
                    5 in df.heuristic_size.values
                    and 9 in df.team_size.values
                    and 0.2 in df.reliability_range.values
                    and n in df.n_sources.values
                    and mean in df.reliability_mean.values
                ):
                    if "diverse" in df.team_type.values:
                        df_diverse = df[df["team_type"] == "diverse"]
                        d_team_accuracy = df_diverse["accuracy_opinion"].median()
                    if "expert" in df.team_type.values:
                        df_expert = df[df["team_type"] == "expert"]
                        e_team_accuracy = df_expert["accuracy_opinion"].median()
            
            # Generate diverse team and compute Cantelli bounds
            sources = Sources(n, ("equi", mean, 0.2))
            team = generate_diverse_team(sources, 5, 9)
            result = cantelli_bounds(team)
            approx = result['lower']
            deviation = d_team_accuracy - approx
            
            data.append([
                n,
                mean,
                deviation,
                approx,
                d_team_accuracy,
                e_team_accuracy,
            ])
    df_cantelli = pd.DataFrame(data, columns=[
        "n_sources",
        "rel_mean",
        "d_deviation",
        "d_lower",
        "d_exact",
        "e_exact",
    ])
    df_cantelli.loc[:, "DTE_exact"] = df_cantelli["d_exact"] > df_cantelli["e_exact"]
    df_cantelli.loc[:, "DTE_approx"] = df_cantelli["d_lower"] > df_cantelli["e_exact"]
    return df_cantelli

### Results

Conclusion
- The Cantelli lower bound is not useful to approximate the diversity-expertise tradeoff, since the lower bound for the diverse team is always lower than the expert team’s accuracy, in the considered parameter space. 
    - Compare columns `DTE_exact` and `DTE_approx` below.
    - `DTE_exact` shows the diversity-expertise tradeoff in the simulations. (`True` means that diversity trumped expertise.)
    - `DTE_approx` shows the approximated diversity-expertise tradeoff, using the Cantelli lower bound. 

In [54]:
df_cantelli = generate_cantelli_df()
df_cantelli

Unnamed: 0,n_sources,rel_mean,d_deviation,d_lower,d_exact,e_exact,DTE_exact,DTE_approx
0,13,0.55,0.398139,0.238979,0.637119,0.717723,False,False
1,13,0.6,0.279069,0.480823,0.759892,0.803017,False,False
2,13,0.65,0.205175,0.653318,0.858494,0.873727,False,False
3,13,0.7,0.112177,0.815685,0.927863,0.927802,True,False
4,13,0.75,0.082841,0.886852,0.969692,0.965042,True,False
5,17,0.55,0.383596,0.268819,0.652415,0.735792,False,False
6,17,0.6,0.259877,0.525151,0.785029,0.818156,False,False
7,17,0.65,0.180152,0.70489,0.885042,0.885408,False,False
8,17,0.7,0.118727,0.82996,0.948688,0.935926,True,False
9,17,0.75,0.067605,0.914371,0.981976,0.96994,True,False


# Normal approximation

### Implementation

In [7]:
import os
import math
import itertools

import pandas as pd
import numpy as np

from typing import Tuple
from scipy.stats import norm

from models.agent import Agent
from models.sources import Sources
from models.team import Team
from models.generate_teams import generate_expert_team, generate_diverse_team

In [4]:
def joint_accuracy(agent1: Agent, agent2: Agent) -> float:
    """
    Computes the probability that two agents are BOTH correct.
    Assumes conditional independence given the true state.
    """
    sources = agent1.sources  # assuming both agents share the same Sources object
    rels_shared = [sources.reliabilities[s] for s in agent1.heuristic if s in agent2.heuristic]
    rels_unique_1 = [sources.reliabilities[s] for s in agent1.heuristic if s not in agent2.heuristic]
    rels_unique_2 = [sources.reliabilities[s] for s in agent2.heuristic if s not in agent1.heuristic]
    
    threshold_1, threshold_2 = (len(agent1.heuristic) + 1)//2, (len(agent2.heuristic) + 1)//2
    total = 0.0

    # Enumerate outcomes for shared sources first
    for shared_outcome in itertools.product([0,1], repeat=len(rels_shared)):
        p_shared = 1.0
        for source_rel, bit in zip(rels_shared, shared_outcome):
            p_shared *= source_rel if bit == 1 else (1 - source_rel)

        # agent probability of being correct given shared outcome
        def prob_agent_correct(rels_unique, shared_bits, threshold):
            total = 0.0
            for unique_outcome in itertools.product([0,1], repeat=len(rels_unique)):
                prob = 1.0
                for source_rel, bit in zip(rels_unique, unique_outcome):
                    prob *= source_rel if bit == 1 else (1 - source_rel)
                if sum(shared_bits) + sum(unique_outcome) >= threshold:
                    total += prob
            return total

        p_agent1_cond = prob_agent_correct(rels_unique_1, shared_outcome, threshold_1)
        p_agent2_cond = prob_agent_correct(rels_unique_2, shared_outcome, threshold_2)

        total += p_shared * p_agent1_cond * p_agent2_cond

    return total


def normal_approximation_group_accuracy(
    team: Team,
    continuity_correction: bool = True
) -> Tuple[float, float, float]:
    """Computes the normal approximation to the probability that the team majority is correct."""
    agent_scores = [agent.score for agent in team.members]
    mu = sum(agent_scores)
    
    # Compute all pairwise joint accuracies p_ij
    cov_sum = 0.0
    for agent_i, agent_j in itertools.combinations(team.members, 2):
        p_ij = joint_accuracy(agent_i, agent_j)
        cov_sum += p_ij - agent_i.score*agent_j.score

    # Compute total variance
    var = sum(score_i*(1-score_i) for score_i in agent_scores) + 2 * cov_sum
    sigma = math.sqrt(var)

    threshold = math.floor(team.size / 2) + 1
    # If degenerate variance
    if var <= 0:
        approx = 1.0 if mu >= threshold else 0.0
        return mu, var, approx

    # Normal approximation
    if continuity_correction:
        z = (threshold - 0.5 - mu) / sigma
    else:
        z = (threshold - mu) / sigma

    approx = 1 - norm.cdf(z)

    return mu, var, float(approx)


In [5]:
def generate_normal_approximation_df(team_type) -> pd.DataFrame:
    data = []
    for n_sources in [13, 17, 21]:
        for mean in [0.55, 0.6, 0.65, 0.7, 0.75]:
            # Retrieve exact accuracies from simulation data
            files = [
                file
                for file in os.listdir("data")
                if file.split("_")[0] == "simulation"
            ]
            for file in files:
                df = pd.read_csv(f"data/{file}")
                if (
                    5 in df.heuristic_size.values
                    and 9 in df.team_size.values
                    and 0.2 in df.reliability_range.values
                    and n_sources in df.n_sources.values
                    and mean in df.reliability_mean.values
                ):
                    if team_type in df.team_type.values:
                        df_team_type = df[df["team_type"] == team_type]
                        team_accuracy = df_team_type["accuracy_opinion"].median()
            
            # Generate team and compute normal approximation
            sources = Sources(n_sources, ("equi", mean, 0.2))
            if team_type == "diverse":
                team = generate_diverse_team(sources, 5, 9)
            elif team_type == "expert":
                team = generate_expert_team(sources, 5, 9)
            else:
                raise ValueError("team_type must be 'diverse' or 'expert'")
            mu, sigma2, approx = normal_approximation_group_accuracy(team)
            deviation = approx - team_accuracy
            
            data.append([
                n_sources,
                mean,
                deviation,
                approx,
                mu, 
                sigma2,
                team_accuracy,
            ])
    df_normal = pd.DataFrame(data, columns=[
        "n_sources",
        "rel_mean",
        "deviation",
        "approx",
        "mu",
        "sigma2",
        "exact",
    ])
    return df_normal

### Results

Check if the normal approximation is useful in the considered parameter space.

Conclusion:
- The normal approximation is not useful for approximating the diversity-expertise tradeoff: the expert team’s approximated accuracy always exceeds the diverse team’s approximated accuracy in the considered parameter space. (Compare columns `DTE_exact` and `DTE_approx` below.)
    - `DTE_exact` shows the diversity-expertise tradeoff in the simulations. (`True` means that diversity trumped expertise.)
    - `DTE_approx` shows the approximated diversity-expertise tradeoff, using normal approximation. 

In [8]:
df_normal_e = generate_normal_approximation_df("expert")
df_normal_d = generate_normal_approximation_df("diverse")

df_normal = df_normal_e.copy()
df_normal.rename(columns={
    "deviation": "e_deviation",
    "approx": "e_approx",
    "mu": "e_mu",
    "sigma2": "e_sigma2",
    "exact": "e_exact",
}, inplace=True)
df_normal.loc[:, ["deviation", "approx", "mu", "sigma2", "exact"]] = df_normal_d[["deviation", "approx", "mu", "sigma2", "exact"]]
df_normal.rename(columns={
    "deviation": "d_deviation",
    "approx": "d_approx",
    "mu": "d_mu",
    "sigma2": "d_sigma2",
    "exact": "d_exact",
}, inplace=True)
df_normal.loc[:, "DTE_exact"] = df_normal["d_exact"] > df_normal["e_exact"]
df_normal.loc[:, "DTE_approx"] = df_normal["d_approx"] > df_normal["e_approx"]
df_normal[["n_sources", "rel_mean", "e_approx", "e_exact", "d_approx", "d_exact", "DTE_exact", "DTE_approx"]]

Unnamed: 0,n_sources,rel_mean,e_approx,e_exact,d_approx,d_exact,DTE_exact,DTE_approx
0,13,0.55,0.712611,0.717723,0.646868,0.637119,False,False
1,13,0.6,0.812087,0.803017,0.768289,0.759892,False,False
2,13,0.65,0.904629,0.873727,0.879008,0.858494,False,False
3,13,0.7,0.972467,0.927802,0.964573,0.927863,True,False
4,13,0.75,0.99825,0.965042,0.99593,0.969692,True,False
5,17,0.55,0.732942,0.735792,0.646554,0.652415,False,False
6,17,0.6,0.831585,0.818156,0.7943,0.785029,False,False
7,17,0.65,0.920609,0.885408,0.904435,0.885042,False,False
8,17,0.7,0.980748,0.935926,0.971905,0.948688,True,False
9,17,0.75,0.999267,0.96994,0.998235,0.981976,True,False


Full results

In [9]:
df_normal

Unnamed: 0,n_sources,rel_mean,e_deviation,e_approx,e_mu,e_sigma2,e_exact,d_deviation,d_approx,d_mu,d_sigma2,d_exact,DTE_exact,DTE_approx
0,13,0.55,-0.005113,0.712611,6.278812,10.052908,0.717723,0.009749,0.646868,5.410198,5.832717,0.637119,False,False
1,13,0.6,0.00907,0.812087,7.00695,8.01316,0.803017,0.008397,0.768289,6.177629,5.23504,0.759892,False,False
2,13,0.65,0.030902,0.904629,7.63899,5.755802,0.873727,0.020514,0.879008,6.904719,4.224023,0.858494,False,False
3,13,0.7,0.044665,0.972467,8.154956,3.630023,0.927802,0.036711,0.964573,7.583333,2.913434,0.927863,True,False
4,13,0.75,0.033207,0.99825,8.543316,1.917447,0.965042,0.026238,0.99593,8.104795,1.85567,0.969692,True,False
5,17,0.55,-0.00285,0.732942,6.434481,9.680978,0.735792,-0.005861,0.646554,5.322257,4.781472,0.652415,False,False
6,17,0.6,0.013429,0.831585,7.144037,7.57859,0.818156,0.009271,0.7943,6.180189,4.183817,0.785029,False,False
7,17,0.65,0.035201,0.920609,7.75294,5.328655,0.885408,0.019393,0.904435,6.897453,3.363471,0.885042,False,False
8,17,0.7,0.044822,0.980748,8.242849,3.271114,0.935926,0.023217,0.971905,7.5092,2.483348,0.948688,True,False
9,17,0.75,0.029326,0.999267,8.604291,1.66457,0.96994,0.016259,0.998235,8.094483,1.517997,0.981976,True,False
