In [1]:
from dataclasses import dataclass
from abc import abstractmethod
from typing import Iterable
import numpy as np
import sklearn.preprocessing
import csv
import random

In [2]:
@dataclass(frozen=True)
class Annotation:
    annotator: str
    task: str
    value: str


In [3]:
@dataclass(frozen=True)
class Estimation:
    task: str
    value: str


In [4]:
class TruthInference:
    @abstractmethod
    def fit(self, annotations: Iterable[Annotation]):
        pass
    
    @abstractmethod
    def estimate(self) -> Iterable[Estimation]:
        pass

In [5]:
class DawidSkene(TruthInference):

    def __init__(self) -> None:
        super().__init__()
        self.predictions_ = {}

    def estimate(self) -> Iterable[Estimation]:
        return [Estimation(task, val) for task, val in self.predictions_.items()]

    def fit(self, annotations: Iterable[Annotation], max_iter=100):
        tasks = set(a.task for a in annotations)
        task_to_id = {task: i for i, task in enumerate(tasks)}

        workers = list(set(a.annotator for a in annotations))
        worker_to_id = {worker: i for i, worker in enumerate(workers)}
        
        values = list(set(a.value for a in annotations))
        value_to_id = {value: i for i, value in enumerate(values)}

        worker_annotations_values = [[] for _ in workers]
        worker_annotations_tasks = [[] for _ in workers]

        prediction_distr = np.zeros((len(tasks), len(values)))
        for a in annotations:
            a_id = worker_to_id[a.annotator]
            value_id = value_to_id[a.value]
            task_id = task_to_id[a.task]

            worker_annotations_values[a_id].append(value_id)
            worker_annotations_tasks[a_id].append(task_id)
            prediction_distr[task_id, value_id] += 1
        prediction_distr = sklearn.preprocessing.normalize(prediction_distr, axis=1, norm='l1')

        for i in range(len(worker_to_id)):
            worker_annotations_values[i] = np.array(worker_annotations_values[i])
            worker_annotations_tasks[i] = np.array(worker_annotations_tasks[i])


        prior = np.zeros(len(values))
        old_conf_mx = [np.zeros((len(values), len(values))) for _ in workers]

        for iter in range(max_iter):
            conf_mx = [np.zeros((len(values), len(values))) for _ in workers]
            for k in range(len(workers)):
                for j in range(len(values)):
                    np.add.at(conf_mx[k][:, j], worker_annotations_values[k], prediction_distr[worker_annotations_tasks[k], j])
                conf_mx[k] = np.transpose(conf_mx[k])
                conf_mx[k] = sklearn.preprocessing.normalize(conf_mx[k], axis=1, norm='l1')

            for j in range(len(values)):
                prior[j] = np.sum(prediction_distr[:, j]) / len(tasks)
            likelihood = np.ones((len(values), len(tasks)))

            for k in range(len(workers)):
                for j in range(len(values)):
                    np.multiply.at(likelihood[j, :], worker_annotations_tasks[k], conf_mx[k][j, worker_annotations_values[k]])
            likelihood = np.transpose(likelihood)

            logit = 1
            for i in range(len(tasks)):
                s = 0
                for j in range(len(values)):
                    prediction_distr[i, j] = prior[j] * likelihood[i, j]
                    s += prediction_distr[i, j]
                logit += np.log(s)
            print(f'Iter {iter:02}, logit: {logit / len(tasks):.6f}')

            prediction_distr = sklearn.preprocessing.normalize(prediction_distr, axis=1, norm='l1')

            converged = True
            for old, new in zip(old_conf_mx, conf_mx):
                if np.linalg.norm(old - new) > 0.0001:
                    converged = False

            if converged:
                break

            old_conf_mx = conf_mx
        self.predictions_ = {t: values[np.argmax(prediction_distr[i, :])] for t, i in task_to_id.items()}


In [6]:
class DataProvider:
    @abstractmethod
    def labels(self) -> Iterable[Annotation]:
        pass

    @abstractmethod
    def gold(self) -> Iterable[Estimation]:
        pass

In [7]:
class RelDataProvider(DataProvider):
    def __init__(self, path: str):
        self._rel_labels = []
        self._rel_gold = set()
        with open(path, newline='') as csvfile:
            file_reader = csv.reader(csvfile, delimiter='\t')
            next(file_reader)
            for row in file_reader:
                self._rel_labels.append(Annotation(row[1], row[0] + '#' + row[2], row[4]))
                gold = row[3]
                if gold != '-1':
                    self._rel_gold.add(Estimation(row[0] + '#' + row[2], gold))

    def labels(self) -> Iterable[Annotation]:
        return self._rel_labels

    def gold(self) -> Iterable[Estimation]:
        return list(self._rel_gold)

# Задание: 
- Реализовать majority vote
- Сравнить MV с DS
- Выявить "надежных"/"ненадежных" экспертов
- Скольки "надежным" экспертам мы не заплатим, если будем платить по совпадению с MV?

In [8]:
class MajorityVote(TruthInference):
    def __init__(self):
        super().__init__()
        self.predictions_ = {}
        
        
    def estimate(self) -> Iterable[Estimation]:
        return [Estimation(task, val) for task, val in self.predictions_.items()]
        
    
    def fit(self, annotations: Iterable[Annotation], max_iter=100):
        tasks = set(a.task for a in annotations)
        task_to_id = {task: i for i, task in enumerate(tasks)}

        workers = list(set(a.annotator for a in annotations))
        worker_to_id = {worker: i for i, worker in enumerate(workers)}
        
        values = list(set(a.value for a in annotations))
        value_to_id = {value: i for i, value in enumerate(values)}

        task_values = [[] for _ in tasks]
        for a in annotations:
            task_id = task_to_id[a.task]
            task_values[task_id].append(a.value)
        self.predictions_ = {t: self.most_freq(task_values[i]) for t, i in task_to_id.items()}
    
    def most_freq(self, lst):
        l = set(lst)
        m_fr = max([lst.count(x) for x in l])
        candids = [x for x in l if lst.count(x) == m_fr]
        if len(candids) == 1:
            return candids[0]
        else:
            return random.choice(candids)

        
def median(lst):
    lst.sort()
    return lst[len(lst) // 2]

def arith_mean(lst):
    return sum(lst) / len(lst)
        

In [9]:
import random 

W_CONF_BOUND = 1.
W_SCORE_BOUND = 0.5

class ConfidentMajorityVote(MajorityVote):
    def __init__(self, scores):
        super().__init__()
        self.scores = scores
        
    # scores == {worker : (worker score, our confidence in score)}
    def fit(self, annotations: Iterable[Annotation], max_iter=100):
        tasks = set(a.task for a in annotations)
        task_to_id = {task: i for i, task in enumerate(tasks)}
        print("Tasks: " + str(len(tasks)))

        workers = list(set(a.annotator for a in annotations))
        worker_to_id = {worker: i for i, worker in enumerate(workers)}
        
        values = list(set(a.value for a in annotations))
        value_to_id = {value: i for i, value in enumerate(values)}

        conf_task_values = [[] for _ in tasks]
        rest_task_values = [[] for _ in tasks]
        for a in annotations:
            w_score, w_conf = self.scores[a.annotator]
            task_id = task_to_id[a.task]
            if w_conf < W_CONF_BOUND or w_score < W_SCORE_BOUND:
                rest_task_values[task_id].append((a.value, w_score, w_conf))
            else:
                conf_task_values[task_id].append((a.value, w_score, w_conf))

        counter = 0
        for t, i in task_to_id.items():
            if len(conf_task_values[i]) != 0:
                self.predictions_[t] = self.most_freq(conf_task_values[i])
            elif len(rest_task_values[i]) != 0:
                counter += 1
                self.predictions_[t] = self.most_freq(rest_task_values[i])
            else:
                self.predictions_[t] = random.choice(values)
        print("No top: " + str(counter))
    
    def most_freq(self, lst):
        setlst = set([x[0] for x in lst])
        weighted_votes = {x : 0. for x in setlst}
        for val, sc, conf in lst:
            weighted_votes[val] += sc * conf
        m_fr = max(weighted_votes.values())
        candids = [val for val in weighted_votes if weighted_votes[val] == m_fr]
        if len(candids) == 1:
            return candids[0]
        else:
            return random.choice(candids)

In [10]:
class ConfusionMatrix:
    def __init__(self, worker):
        self.worker_ = worker
        self.dim_ = 0
        self.matrix_ = None
        self.gold_numbers_ = []
        self.gold_total_ = 0
    
    def get_by_matrix_normalized(self):
        res = self.matrix_.copy()
        s = sum(self.gold_numbers_)
        if s == 0:
            return res
        for i in range(self.dim_):
            for j in range(self.dim_):
                res[i][j] /= s
        return res
    
    def get_by_rows_normalized(self):
        if self.matrix_ is None:
            return None
        res = self.matrix_.copy()
        for i in range(self.dim_):
            if self.gold_numbers_[i] == 0:
                continue
            for j in range(self.dim_):
                res[i][j] /= self.gold_numbers_[i]
        return res
        
        
    def fit(self, annotations: Iterable[Annotation], golds: Iterable[Estimation]):
        values = list(set(a.value for a in annotations))
        value_to_id_ = {value: i for i, value in enumerate(values)}
        self.dim_ = len(values)
        self.matrix_ = [[0] * self.dim_ for _ in range(self.dim_)]
        self.gold_numbers_ = [0 for _ in range(self.dim_)]
        
        task_to_gold = {g.task: g.value for g in golds}
        
        for a in annotations:
            if a.annotator != self.worker_ or a.task not in task_to_gold:
                continue
            v = value_to_id_[a.value]
            g = value_to_id_[task_to_gold[a.task]]
            self.matrix_[g][v] += 1
            self.gold_numbers_[g] += 1
            self.gold_total_ += 1
        
    

In [11]:
provider = RelDataProvider('trec-rf10-data.txt')

In [12]:
from scipy.stats.mstats import gmean

def max_golds_evaluated_by_workers(provider):
    workers = list(set(a.annotator for a in provider.labels()))
    worker_to_id = {worker: i for i, worker in enumerate(workers)}
    task_to_gold = {g.task: g.value for g in provider.gold()}
    golds_evaluated = [0 for _ in workers]

    for a in provider.labels():
        if a.task not in task_to_gold:
            continue
        golds_evaluated[worker_to_id[a.annotator]] += 1
    return golds_evaluated

golds_evaluated = max_golds_evaluated_by_workers(provider)
non_zeros = [x for x in golds_evaluated if x != 0 ]
print(min(golds_evaluated), max(golds_evaluated))
print(gmean([x for x in golds_evaluated if x != 0 ]))


0 2208
6.8563825662320665


In [13]:
class WorkersEvaluator:
    
    def __init__(self, provider):
        self.provider_ = provider
        self.workers_ = list(set(a.annotator for a in provider.labels()))

        self.matrices_ = [ConfusionMatrix(w) for w in self.workers_]
        self.golds_evaluated = []
        for m in self.matrices_:
            m.fit(provider.labels(), provider.gold())
            self.golds_evaluated.append(m.gold_numbers_)
        
        golds_total = [sum(gs) for gs in self.golds_evaluated]
        self.max_golds_ = max(golds_total) # highest confidence score
        self.geom_mean_golds = int(gmean([x for x in golds_total if x != 0 ])) # geometric mean among non zeros
        
        
       
    # Returns (worker score, our confidence in score) per worker
    # confidence in [0., 1.]
    # normalize == 'M' => whole confusion matrix is devided by golds numeber
    # normalize == 'R' => confusion matrix rows are devided by golds per class
    def evaluate_by_trace_sum(self, normalize='M'):
        scores = {w : (0., 0.) for w in self.workers_}
        
        for wid, w in enumerate(self.workers_):
            matrix = ConfusionMatrix(w)
            matrix.fit(self.provider_.labels(), self.provider_.gold())
            m = None
            if (normalize == 'M'):
                m = matrix.get_by_matrix_normalized()
            elif (normalize == 'R'):
                m = matrix.get_by_rows_normalized()
            sc = 0.
            for i in range(len(m)):
                sc += m[i][i]
            scores[w] = (sc, min(1., sum(self.golds_evaluated[wid]) / self.geom_mean_golds))
        return scores

        

In [14]:
def accuracy(provider: DataProvider, inference: TruthInference) -> int:
    accepted = 0
    all_points = 0
    estimates = {}

    inference.fit(provider.labels())
    for estimate in inference.estimate():
        estimates[estimate.task] = estimate.value
    for point in provider.gold():
        if point.task in estimates:
            estimate = estimates[point.task]
            all_points += 1
            if point.value == estimate:
                accepted += 1

    accuracy = accepted / all_points
    return accuracy

In [15]:
accuracy(provider, DawidSkene())

Iter 00, logit: -3.789689
Iter 01, logit: -3.672793
Iter 02, logit: -3.635591
Iter 03, logit: -3.619169
Iter 04, logit: -3.610236
Iter 05, logit: -3.604747
Iter 06, logit: -3.601037
Iter 07, logit: -3.598405
Iter 08, logit: -3.596427
Iter 09, logit: -3.594798
Iter 10, logit: -3.593363
Iter 11, logit: -3.592099
Iter 12, logit: -3.591016
Iter 13, logit: -3.590134
Iter 14, logit: -3.589404
Iter 15, logit: -3.588847
Iter 16, logit: -3.588411
Iter 17, logit: -3.588044
Iter 18, logit: -3.587721
Iter 19, logit: -3.587436
Iter 20, logit: -3.587194
Iter 21, logit: -3.586989
Iter 22, logit: -3.586807
Iter 23, logit: -3.586638
Iter 24, logit: -3.586475
Iter 25, logit: -3.586321
Iter 26, logit: -3.586186
Iter 27, logit: -3.586064
Iter 28, logit: -3.585954
Iter 29, logit: -3.585861
Iter 30, logit: -3.585782
Iter 31, logit: -3.585711
Iter 32, logit: -3.585643
Iter 33, logit: -3.585578
Iter 34, logit: -3.585517
Iter 35, logit: -3.585460
Iter 36, logit: -3.585403
Iter 37, logit: -3.585336
Iter 38, log

0.6143497757847534

In [16]:
accuracy(provider, MajorityVote())

0.5385650224215247

In [17]:
evaluator = WorkersEvaluator(provider)

In [18]:
scores_trace_sum = evaluator.evaluate_by_trace_sum()

In [22]:
W_CONF_BOUND = 0.
W_SCORE_BOUND = 0.4
accuracy(provider, ConfidentMajorityVote(scores_trace_sum))

Tasks: 20232
No top: 945


0.6468609865470852