## you need to install numpy and sklearn for the task

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, balanced_accuracy_score

In [2]:
main_folder = Path('imageclef2011_feats/')
with open('trainset_gt_annotations.txt') as f:
    y_all = []
    x_all = []
    for l in f.readlines():
        y_all.append([int(o) for o in l.split()[1:]])
        filepath = str(main_folder/l.split()[0]).split('.')[0] + '.jpg_ft.npy'
        x_all.append(np.load(filepath))

with open('concepts_2011.txt') as f:
    labels_names = [l.split()[1] for l in f.readlines()[1:]]

## generate dataset

In [3]:
labels_names.index('Spring'), labels_names.index('Summer'), labels_names.index('Autumn'), labels_names.index('Winter')

(9, 10, 11, 12)

In [4]:
X_spring, X_summer, X_autumn, X_winter = [], [], [], []
for xo, yo in zip(x_all, y_all):
    if yo[9] == 1 or yo[10] == 1 or yo[11] == 1 or yo[12] == 1:
        if yo[9] == 1:
            X_spring.append(xo)
        elif yo[10] == 1:
            X_summer.append(xo)
        elif yo[11] == 1:
            X_autumn.append(xo)
        else: 
            X_winter.append(xo)

## First test out spring

In [5]:
train_size = 0.6
val_size = 0.15
test_size = 0.25
# spring
spring_train_x, spring_test_x = np.split(X_spring, [int((train_size+val_size)*len(X_spring))])
spring_train_x, spring_val_x = np.split(spring_train_x, [int(train_size*len(X_spring))])
# Summner
summer_train_x, summer_test_x = np.split(X_summer, [int((train_size+val_size)*len(X_summer))])
summer_train_x, summer_val_x = np.split(summer_train_x, [int(train_size*len(X_summer))])
# Autumn
autumn_train_x, autumn_test_x = np.split(X_autumn, [int((train_size+val_size)*len(X_autumn))])
autumn_train_x, autumn_val_x = np.split(autumn_train_x, [int(train_size*len(X_autumn))])
# Winter
winter_train_x, winter_test_x = np.split(X_winter, [int((train_size+val_size)*len(X_winter))])
winter_train_x, winter_val_x = np.split(winter_train_x, [int(train_size*len(X_winter))])


In [6]:
train_x = np.concatenate([spring_train_x, summer_train_x, autumn_train_x, winter_train_x])
spring_train_y = len(spring_train_x)*[1] + len(summer_train_x)*[0] + len(autumn_train_x)*[0] + len(winter_train_x)*[0]
summer_train_y = len(spring_train_x)*[0] + len(summer_train_x)*[1] + len(autumn_train_x)*[0] + len(winter_train_x)*[0]
autumn_train_y = len(spring_train_x)*[0] + len(summer_train_x)*[0] + len(autumn_train_x)*[1] + len(winter_train_x)*[0]
winter_train_y = len(spring_train_x)*[0] + len(summer_train_x)*[0] + len(autumn_train_x)*[0] + len(winter_train_x)*[1]
train_y = len(spring_train_x)*[0] + len(summer_train_x)*[1] + len(autumn_train_x)*[2] + len(winter_train_x)*[3]


val_x = np.concatenate([spring_val_x, summer_val_x, autumn_val_x, winter_val_x])
val_y = len(spring_val_x)*[0] + len(summer_val_x)*[1] + len(autumn_val_x)*[2] + len(winter_val_x)*[3]

test_x = np.concatenate([spring_test_x, summer_test_x, autumn_test_x, winter_test_x])
test_y = len(spring_test_x)*[0] + len(summer_test_x)*[1] + len(autumn_test_x)*[2] + len(winter_test_x)*[3]

In [7]:
np.save('train_x.npy', train_x, )
np.save('train_y.npy', train_y, )
np.save('val_x.npy', val_x,  )
np.save('val_y.npy', val_y,  )
np.save('test_x.npy', test_x, )
np.save('test_y.npy', test_y, )

## using sklearn.svm.SVC using 'RBF' kernel (default)

In [8]:
def classwise_accuracy(y_true, y_pred):
    num_classes = len(set(y_true))
    correct = np.zeros(num_classes)
    total = np.zeros(num_classes)
    for t, p in zip(y_true, y_pred):
        total[t] += 1
        if t == p:
            correct[t] += 1
    acc = sum(correct) / sum(total)
    class_acc = sum(correct/total) / num_classes
    return acc, class_acc
            
            

In [9]:
Cs = [1e-2, 1e-1, 1e-1**0.5, 1, 1e1**0.5, 1e1, 1e3**0.5]
balanced_accs = []
accs = []

for c in Cs: # Training
    clf_spring = SVC(C=c, probability=True, gamma='auto')
    clf_spring.fit(train_x, spring_train_y)
    clf_summer = SVC(C=c, probability=True, gamma='auto')
    clf_summer.fit(train_x, summer_train_y)
    clf_autumn = SVC(C=c, probability=True, gamma='auto')
    clf_autumn.fit(train_x, autumn_train_y)
    clf_winter = SVC(C=c, probability=True, gamma='auto')
    clf_winter.fit(train_x, winter_train_y)
    # val
    val_spring = clf_spring.predict_proba(val_x)[:, 1]
    val_summer = clf_summer.predict_proba(val_x)[:, 1]
    val_autumn = clf_autumn.predict_proba(val_x)[:, 1]
    val_winter = clf_winter.predict_proba(val_x)[:, 1]
    val_prob = np.vstack([val_spring, val_summer, val_autumn, val_winter]).T
    val_predict = val_prob.argmax(axis=1)
    acc, balanced_acc = classwise_accuracy(val_y, val_predict)
    balanced_accs.append(balanced_acc)
    accs.append(acc)

In [10]:
print("The best C is :", Cs[np.argmax(balanced_accs)])

The best C is : 1


In [11]:
for c, b_acc, acc in zip(Cs, balanced_accs, accs):
    print("C: ", round(c,2),  "    classwise accuracy: ", round(b_acc,2)  , "  accuracy: ", round(acc,2),)

C:  0.01     classwise accuracy:  0.59   accuracy:  0.76
C:  0.1     classwise accuracy:  0.58   accuracy:  0.76
C:  0.32     classwise accuracy:  0.6   accuracy:  0.77
C:  1     classwise accuracy:  0.61   accuracy:  0.78
C:  3.16     classwise accuracy:  0.61   accuracy:  0.79
C:  10.0     classwise accuracy:  0.58   accuracy:  0.78
C:  31.62     classwise accuracy:  0.55   accuracy:  0.77


In [12]:
# test on the test set

In [13]:
train_x = np.concatenate([train_x, val_x])
spring_val_y = len(spring_val_x)*[1] + len(summer_val_x)*[0] + len(autumn_val_x)*[0] + len(winter_val_x)*[0]
summer_val_y = len(spring_val_x)*[0] + len(summer_val_x)*[1] + len(autumn_val_x)*[0] + len(winter_val_x)*[0]
autumn_val_y = len(spring_val_x)*[0] + len(summer_val_x)*[0] + len(autumn_val_x)*[1] + len(winter_val_x)*[0]
winter_val_y = len(spring_val_x)*[0] + len(summer_val_x)*[0] + len(autumn_val_x)*[0] + len(winter_val_x)*[1]
spring_train_y = np.concatenate([spring_train_y, spring_val_y])
summer_train_y = np.concatenate([summer_train_y, summer_val_y])
autumn_train_y = np.concatenate([autumn_train_y, autumn_val_y])
winter_train_y = np.concatenate([winter_train_y, winter_val_y])

In [14]:
c = Cs[np.argmax(balanced_accs)]

clf_spring = SVC(C=c, probability=True, gamma='auto')
clf_spring.fit(train_x, spring_train_y)
clf_summer = SVC(C=c, probability=True, gamma='auto')
clf_summer.fit(train_x, summer_train_y)
clf_autumn = SVC(C=c, probability=True, gamma='auto')
clf_autumn.fit(train_x, autumn_train_y)
clf_winter = SVC(C=c, probability=True, gamma='auto')
clf_winter.fit(train_x, winter_train_y)
# val
val_spring = clf_spring.predict_proba(test_x)[:, 1]
val_summer = clf_summer.predict_proba(test_x)[:, 1]
val_autumn = clf_autumn.predict_proba(test_x)[:, 1]
val_winter = clf_winter.predict_proba(test_x)[:, 1]

val_prob = np.vstack([val_spring, val_summer, val_autumn, val_winter]).T
val_predict = val_prob.argmax(axis=1)
acc = accuracy_score(test_y, val_predict)
balanced_acc = balanced_accuracy_score(test_y, val_predict)
balanced_accs.append(balanced_acc)

In [15]:
print("classwise accuracy on test set: ", balanced_acc)
print("vanilla accuracy on test set: ", acc)

classwise accuracy on test set:  0.5502473627473627
vanilla accuracy on test set:  0.7558823529411764
