# Tsallis Entropy labelling

In [1]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression as LR

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action = 'ignore', category = FutureWarning)

In [13]:
def tsallis_label(q, probas, s_cls):
    elements = np.power(probas, q - 1)
    # thrshld in tsallis entropy model
    ts_thrshld = np.sum(np.power(probas, q))
    if q < 1:
        labels = np.array([s_cls[i] for i, e in enumerate(elements) if e < ts_thrshld])
    else:
        labels = np.array([s_cls[i] for i, e in enumerate(elements) if e > ts_thrshld])
    return labels

#### ラベルづけからその正解率の評価まで行う関数

In [14]:
# labelling and evaluating them
def tsallis_ent_eval(q, orig_A, lim_A, dataset):

    # generate an annotator
    a1_model = LR().fit(train_imgs[:orig_A], train_labels[:orig_A])
    acc = accuracy_score(test_labels, a1_model.predict(test_imgs))
    print(f"annotator's ability: {acc*100}[%]")
    
    a1_proba = a1_model.predict_proba(train_imgs[orig_A:orig_A + lim_A])
    # entropy labelling
    mul_labels = [tsallis_label(q, probas, [i for i in range(10)]) for probas in a1_proba]
    
    # labels score evaluation
    score = 0
    for labels, t_label in zip(mul_labels, train_labels[orig_A:orig_A + lim_A]):
        for l in labels:
            if (l == t_label):
                score += 1

    m_labels = []
    for labels in mul_labels:
         [m_labels.append(l) for l in labels]                
                    
    return (len(m_labels)/lim_A, score*100/len(m_labels), score*100/lim_A)

## MNIST

In [4]:
from keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_imgs = np.array([x.ravel() for x in train_images])
test_imgs = np.array([y.ravel() for y in test_images])

img_SIZE = train_images.shape[1]*train_images.shape[2]

Using TensorFlow backend.


### labels evaluation

In [18]:
orig_A1, lim_A1 = 50, 2000
dataset = "mnist"
q = 1.5
(ave_l_num, labels_qual, labels_qual2) = tsallis_ent_eval(q, orig_A1, lim_A1, dataset)
print(f"quality of labels generated: {labels_qual}[%], {labels_qual2}[%](as groups)")
print(f"average number of labels per image: {ave_l_num}")

annotator's ability: 62.71[%]
quality of labels generated: 64.3756050338819[%], 66.5[%](as groups)
average number of labels per image: 1.033


## CIFAR 10

In [28]:
from keras.datasets import cifar10

(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# number of data samples
train_N, test_N = train_images.shape[0], test_images.shape[0]
# data normalisation and vecotrise
train_imgs, test_imgs = train_images.reshape(train_N, -1)/255, test_images.reshape(test_N, -1)/255
# labels ravel
train_labels, test_labels = train_labels.ravel(), test_labels.ravel()

img_SIZE = train_images.shape[1]*train_images.shape[2]*train_images.shape[3]

# rgb画像なので，3つ組1つが各pixelの色(rgb値)を決める この3の一つ一つをチャンネルとよぶらしい
# データの一つ一つは32x32x3の画像 -> ravelすることで32*32*3がフラットに
# https://keras.io/datasets/
print(f"images shape transformation; {train_images.shape} -> {train_imgs.shape}\n")

images shape transformation; (50000, 32, 32, 3) -> (50000, 3072)



### labels evaluation

In [29]:
orig_A2, lim_A2 = 2000, 2000
dataset = "cifar10"
q = 3/2
(ave_l_num, labels_qual, labels_qual2) = tsallis_ent_eval(q, orig_A2, lim_A2, dataset)
print(f"quality of labels generated: {labels_qual}[%], {labels_qual2}[%](as groups)")
print(f"average number of labels per image: {ave_l_num}")

annotator's ability: 28.92[%]
quality of labels generated: 26.071555083244775[%], 36.8[%](as groups)
average number of labels per image: 1.4115


## CIFAR100

In [30]:
from keras.datasets import cifar100

(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode = "fine")

# number of data samples
train_N, test_N = train_images.shape[0], test_images.shape[0]
# data normalisation and vecotrise
train_imgs, test_imgs = train_images.reshape(train_N, -1)/255, test_images.reshape(test_N, -1)/255
# labels ravel
train_labels, test_labels = train_labels.ravel(), test_labels.ravel()

img_SIZE = train_images.shape[1]*train_images.shape[2]*train_images.shape[3]

### labels evaluation

In [14]:
orig_A3, lim_A3 = 1000, 1000
dataset = "cifar100"
(ave_l_num, labels_qual, labels_qual2) = entropy_eval(orig_A3, lim_A3, dataset)
print(f"quality of labels generated: {labels_qual}[%], {labels_qual2}[%](as groups)")
print(f"average number of labels per image: {ave_l_num}")

annotator's ability: 7.04[%]
quality of labels generated: 3.8848631239935587[%], 19.3[%](as groups)
average number of labels per image: 4.968
