<a href="https://colab.research.google.com/github/Buitragox/Crowdsourcing-Thesis/blob/main/notebooks/gcce_cl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generalized Categorical Cross-Entropy and CrowdLayer

Training of the models with GCCE and CL

We perform 2 grid searchs for each model:
- One with the entire dataset
- One with the reduced dataset that uses at least 2 annotations per sample

In [None]:
import numpy as np
import zipfile

from matplotlib import pyplot as plt

from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.losses import Loss

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Download the necessary utilities from the repo
!wget --no-check-certificate 'https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/utils.py' -O utils.py
!wget --no-check-certificate 'https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/grid_search.py' -O grid_search.py

--2024-06-04 16:19:48--  https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5405 (5.3K) [text/plain]
Saving to: ‘utils.py’


2024-06-04 16:19:48 (51.6 MB/s) - ‘utils.py’ saved [5405/5405]

--2024-06-04 16:19:48--  https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/grid_search.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5486 (5.4K) [text/plain]
Saving to: ‘grid_search.py’


2024-06-04 16:19:49 (52.4 MB/s) - ‘grid_search.py’ saved [

In [None]:
from utils import load_ma_data
from grid_search import grid_search, show_results

In [None]:
!wget --no-check-certificate 'https://github.com/Buitragox/Crowdsourcing-Thesis/raw/main/data/pkl/train_crowdsourced_labels.pkl' -O train_crowdsourced_labels.pkl

--2024-06-04 16:19:49--  https://github.com/Buitragox/Crowdsourcing-Thesis/raw/main/data/pkl/train_crowdsourced_labels.pkl
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/data/pkl/train_crowdsourced_labels.pkl [following]
--2024-06-04 16:19:49--  https://raw.githubusercontent.com/Buitragox/Crowdsourcing-Thesis/main/data/pkl/train_crowdsourced_labels.pkl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10426477 (9.9M) [application/octet-stream]
Saving to: ‘train_crowdsourced_labels.pkl’


2024-06-04 16:19:50 (90.0 MB/s) - ‘train_crowdsourced_labels.pkl’ saved [1

In [None]:
# Download TrainTestNpyInt.zip from google drive
!pip install gdown
!gdown 1XeVC0FOmv_V8jY31JP73yXqa4q27EWJS -O TrainTestNpyInt.zip

#!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XeVC0FOmv_V8jY31JP73yXqa4q27EWJS' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XeVC0FOmv_V8jY31JP73yXqa4q27EWJS" -O TrainTestNpyInt.zip && rm -rf /tmp/cookies.txt

Downloading...
From (original): https://drive.google.com/uc?id=1XeVC0FOmv_V8jY31JP73yXqa4q27EWJS
From (redirected): https://drive.google.com/uc?id=1XeVC0FOmv_V8jY31JP73yXqa4q27EWJS&confirm=t&uuid=9bf8e715-ec6c-42f4-874b-43d78130ed9a
To: /content/TrainTestNpyInt.zip
100% 96.7M/96.7M [00:01<00:00, 60.2MB/s]


In [None]:
# Unzip file from google drive or change the path to a local file.
with zipfile.ZipFile("./drive/MyDrive/npy/TrainTestNpyInt.zip", 'r') as zip_ref:
    zip_ref.extractall("./TrainTestNpyInt")

In [None]:
pkl_path = "./train_crowdsourced_labels.pkl"
data_path = "./TrainTestNpyInt"
json_path = "./drive/MyDrive/Experiment results/ma"
batch_size = 8
epochs = 20
R = 20 # Annotators
K = 3 # Classes

## GCCE

In [None]:
class GCCELoss(Loss):
    """
    Generalized Categorical Cross Entropy loss

    R = Amount of annotators
    K = Amount classes
    q = Value (0, 1] that handles the noise resistance.
        the higher the value the more robust the function becomes to noise.

    """
    def __init__(self, R=20, K=3, q=0.1):
        super().__init__()
        self.R = R
        self.K = K
        self.q = q


    def call(self, y_true, y_pred):
        ann = y_pred[:, :self.R]
        pred = y_pred[:, self.R:]
        pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1-1e-9)
        y_true_onehot = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1) # N * K * R
        y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis=-1) # N * K * R
        p_gcce = y_true_onehot * (1 - y_hat**self.q) / self.q
        temp1 = ann*tf.math.reduce_sum(p_gcce, axis=1)
        temp2 = (1 - ann) * (1-(1/self.K)**self.q) / self.q * tf.reduce_sum(y_true_onehot,axis=1)
        ans = tf.math.reduce_sum((temp1 + temp2))
        return ans

In [None]:
def build_model(q: float):
    input_layer = Input(shape=(512, ))

    model = Dense(128, activation='relu')(input_layer)
    model = Dropout(0.25)(model)
    model = Dense(64, activation='relu')(model)
    model = Dropout(0.25)(model)

    output_R = tf.keras.layers.Dense(R,activation="sigmoid", name='output_R')(model)

    output_K = tf.keras.layers.Dense(K,activation="softmax", name='output_K')(model)

    model = tf.keras.layers.concatenate([output_R, output_K])

    model = Model(input_layer, model, name="VGG16_GCCE")

    model.compile(optimizer="adam", loss=GCCELoss(R, K, q))
    return model

In [None]:
def evaluate(model, X_test, Y_test):
    y_pred = model.predict(X_test)
    pred = y_pred[:, R:]
    pred = np.argmax(pred, axis=1)
    report = classification_report(np.argmax(Y_test, axis=1), pred, output_dict=True)
    return report

In [None]:
# Change the parameter min_two_ann for whether to use the entire dataset or a minimum of 2 annotators per sample.
X_train, labels, X_test, Y_test = load_ma_data(data_path, pkl_path, R, min_two_ann=True)
# history_path = json_path + '/history_gcce_int_all.json'
# report_path = json_path + '/report_gcce_int_all.json'
history_path = json_path + '/history_gcce_int_mintwo.json'
report_path = json_path + '/report_gcce_int_mintwo.json'

q_grid = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
history, report = grid_search(X_train, labels, X_test, Y_test, build_model, evaluate,
                              repeat=10, epochs=epochs, report_path=report_path, history_path=history_path,
                              q=q_grid)

Finished experiments: {(0.7,), (0.2,), (0.3,), (0.01,), (0.8,), (0.4,), (0.9,), (0.5,), (0.6,), (0.1,)}


In [None]:
show_results(history, report, K)

parameters = {'q': 0.01}
	mean f1 scores: [0.7813864360264391, 0.7027975292662456, 0.6496363120424922]
	std f1 scores: [0.260754002827133, 0.09146930217900186, 0.22198055602479397]
	mean accuracy: 0.7610219981668195
	std accuracy: 0.16274603348180577
parameters = {'q': 0.7}
	mean f1 scores: [0.6688943750163967, 0.6347422660354678, 0.4324576954001734]
	std f1 scores: [0.33770420429232556, 0.11813420132843233, 0.21890814984319967]
	mean accuracy: 0.6734188817598533
	std accuracy: 0.20429484084820218
parameters = {'q': 0.2}
	mean f1 scores: [0.8634061963084815, 0.7355372327614298, 0.7271215440684401]
	std f1 scores: [0.010738229378024218, 0.008202453436022935, 0.03642506761572092]
	mean accuracy: 0.8122593950504123
	std accuracy: 0.011248812017504117
parameters = {'q': 0.3}
	mean f1 scores: [0.8593133067699407, 0.7350182900685867, 0.7058340774409577]
	std f1 scores: [0.00718156808499821, 0.009806162109582222, 0.03811725158153601]
	mean accuracy: 0.8075847846012831
	std accuracy: 0.0080051

## Crowd Layer

Article: [Deep Learning from Crowds](https://arxiv.org/abs/1709.01779)

Code: https://github.com/fmpr/CrowdLayer


In [None]:
!git clone 'https://github.com/fmpr/CrowdLayer'

Cloning into 'CrowdLayer'...
remote: Enumerating objects: 63, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 63 (delta 1), reused 4 (delta 1), pack-reused 54[K
Receiving objects: 100% (63/63), 285.05 KiB | 1.23 MiB/s, done.
Resolving deltas: 100% (23/23), done.


In [None]:
import sys
sys.path.append('/content/CrowdLayer')
from crowd_layer.crowd_layers import CrowdsClassification

In [None]:
# Change the parameter min_two_ann for whether to use the entire dataset or a minimum of 2 annotators per sample.
X_train, labels, X_test, Y_test = load_ma_data(data_path, pkl_path, R, min_two_ann=False)
print(X_train.shape, labels.shape, X_test.shape, Y_test.shape)

(75243, 512) (75243, 20) (4364, 512) (4364, 3)


In [None]:
def onehot_with_missings(labels, K):
    """Apply onehot encoding to labels and mark missing answers with -1 instead of 0"""
    one_hot = tf.one_hot(tf.cast(labels, dtype=tf.int32), depth=K, axis=1).numpy() # Array of size N * K * R
    for i in range(len(labels)):
        for r in range(R):
            if labels[i,r] == -1:
                one_hot[i,:,r] = -1 * np.ones(K)
    return one_hot

In [None]:
labels_onehot = onehot_with_missings(labels, K)
print(labels_onehot.shape)
print(labels_onehot)

(75243, 3, 20)
[[[-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]]

 [[-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]]

 [[-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]]

 ...

 [[-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]]

 [[ 1.  1.  1. ...  0.  1.  1.]
  [ 0.  0.  0. ...  1.  0.  0.]
  [ 0.  0.  0. ...  0.  0.  0.]]

 [[-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]
  [-1. -1. -1. ... -1. -1. -1.]]]


In [None]:
# This function was extracted from https://github.com/fmpr/CrowdLayer/blob/master/crowd_layer/crowd_layers.py#L142
# There is an error with parameter dim on tf.nn.softmax_cross_entropy_with_logits
# Other people have encountered this issue: https://github.com/fmpr/CrowdLayer/pull/3
# The use of tf.nn.softmax_cross_entropy_with_logits with a softmax activation layer is discouraged.
class LossMaskedMultiCrossEntropy(Loss):
    def __init__(self):
        super().__init__()

    def call(self, y_true, y_pred):
        # shape of y_true, y_pred (none, 3, 20)

        # Not using this function with a softmax activation
        vec = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true, axis=1) #error dim=1

        # put 0s where there are missing annotations
        mask = tf.equal(y_true[:,0,:], -1)
        zer = tf.zeros_like(vec)
        loss = tf.where(mask, x=zer, y=vec)

        return tf.reduce_sum(loss)

In [None]:
def build_base_model():
    base_model = Sequential()
    base_model.add(Input(shape=(512, )))
    base_model.add(Dense(128, activation='relu'))
    base_model.add(Dropout(0.25))
    base_model.add(Dense(64, activation='relu'))
    base_model.add(Dropout(0.25))
    # base_model.add(Dense(K, activation="softmax"))
    base_model.add(Dense(K, activation="linear"))
    base_model.compile(optimizer='adam', loss='categorical_crossentropy')

    return base_model

In [None]:
def build_crowd_layer_model(conn_type: str):
    model = build_base_model()
    model.add(CrowdsClassification(K, R, conn_type=conn_type))
    model.compile(optimizer='adam', loss=LossMaskedMultiCrossEntropy())
    return model

def evaluate_crowd_layer(model, X_test, Y_test) -> dict:
    model.pop()
    model.add(tf.keras.layers.Activation('softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    y_pred = model.predict(X_test)
    pred = np.argmax(y_pred, axis=1)
    report = classification_report(np.argmax(Y_test, axis=1), pred, output_dict=True)
    return report

In [None]:
conn_types = ['MW', 'VW', 'VB', 'VW+B', 'SW']
history_path = json_path + '/history_crowdlayer_int_all.json'
report_path = json_path + '/report_crowdlayer_int_all.json'
# history_path = json_path + '/history_crowdlayer_int_mintwo.json'
# report_path = json_path + '/report_crowdlayer_int_mintwo.json'

exp_histories, exp_reports = grid_search(X_train, labels_onehot, X_test, Y_test, build_crowd_layer_model,
                                         evaluate_crowd_layer, repeat=10, epochs=epochs,
                                         history_path=history_path, report_path=report_path,
                                         conn_type=conn_types)

Finished experiments: {('MW',), ('SW',), ('VW',), ('VW+B',), ('VB',)}


In [None]:
show_results(exp_histories, exp_reports, K)

parameters = {'conn_type': 'MW'}
	mean f1 scores: [0.8281740389619902, 0.6754308351223931, 0.49897393681573676]
	std f1 scores: [0.025071457815442913, 0.03981614682343137, 0.16133288086139694]
	mean accuracy: 0.7514436296975253
	std accuracy: 0.03455816599576129
parameters = {'conn_type': 'VW'}
	mean f1 scores: [0.8087314474307427, 0.7004645198143715, 0.7342476292138482]
	std f1 scores: [0.04012628307889424, 0.031251079447517954, 0.02350992561693734]
	mean accuracy: 0.7654445462878094
	std accuracy: 0.034738378769642766
parameters = {'conn_type': 'SW'}
	mean f1 scores: [0.8671268122798879, 0.7429945710270405, 0.7428033987972154]
	std f1 scores: [0.009287778106990642, 0.010448851922485937, 0.018839198362629997]
	mean accuracy: 0.818423464711274
	std accuracy: 0.008986675773921458
parameters = {'conn_type': 'VW+B'}
	mean f1 scores: [0.8523026936703741, 0.7343477516278278, 0.7214740814548858]
	std f1 scores: [0.023763360303493207, 0.020489004230371966, 0.025264043009676638]
	mean accuracy