In [18]:
from warnings import filterwarnings
filterwarnings('ignore')

In [19]:
import tqdm
import time
import random
import numpy as np
from scipy import spatial
from scipy.stats import pearsonr
from skmultilearn.dataset import load_from_arff

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from skmultilearn.problem_transform import BinaryRelevance

In [20]:
X, y = load_from_arff("Image.arff", label_count = 5)
X = X.toarray()
y = y.toarray()

index = np.arange(len(X))
np.random.shuffle(index)
X, y = X[index], y[index]

print(X.shape, y.shape)

(2000, 294) (2000, 5)


In [21]:
print(X)
print(y)

[[4.31313e-01 1.50050e-02 4.46017e-01 ... 7.02000e-04 7.00200e-03
  2.41000e-04]
 [6.63477e-01 1.26127e-01 3.58374e-01 ... 7.33730e-02 3.48700e-02
  3.71200e-02]
 [3.69259e-01 6.10000e-05 3.66326e-01 ... 5.95290e-02 3.39900e-02
  1.13380e-02]
 ...
 [4.49200e-01 2.74000e-03 4.56098e-01 ... 1.45110e-02 3.95250e-02
  7.22840e-02]
 [5.24545e-01 1.58420e-02 5.23251e-01 ... 2.05401e-01 4.34746e-01
  2.32819e-01]
 [3.83019e-01 7.30000e-04 3.79321e-01 ... 1.47600e-03 2.52030e-02
  8.32000e-04]]
[[0 0 1 0 0]
 [0 1 0 0 1]
 [0 1 0 0 0]
 ...
 [0 1 0 0 1]
 [1 0 0 0 0]
 [0 0 1 0 0]]


In [22]:
train_samples = int(X.shape[0] * 0.6)

X_train = X[:train_samples, :]
y_train = y[:train_samples]
X_test = X[train_samples:, :]
y_test = y[train_samples:]

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)

X_train: (1200, 294) y_train: (1200, 5)
X_test: (800, 294) y_test: (800, 5)


In [23]:
d = X_train.shape[1] 
l = y_train.shape[1]
print("d:", d, "l:", l)

d: 294 l: 5


In [24]:
def score(y, pred):
    pred = pred.toarray()
    score = 0
    for i in range(y.shape[0]):
        score += np.sum(y[i] * pred[i]) / (np.sum(y[i]) + np.sum(pred[i]) - np.sum(y[i] * pred[i]))
    score /= y.shape[0]
    return score

## Without FS, taking the whole dataset

In [25]:
X_train_subset = X_train[:, :]
X_test_subset = X_test[:, :]

print("X_train_subset:", X_train_subset.shape, "y_train:", y_train.shape)
print("X_test_subset:", X_test_subset.shape, "y_test:", y_test.shape)

lr = BinaryRelevance(classifier = LogisticRegression())
lr.fit(X_train_subset, y_train)

lr_train_score = score(y_train, lr.predict(X_train_subset))
lr_test_score = score(y_test, lr.predict(X_test_subset))

print("lr train accu:", round(lr_train_score, 4))
print("lr test_accu:", round(lr_test_score, 4))

nb = BinaryRelevance(classifier = GaussianNB())
nb.fit(X_train_subset, y_train)

nb_train_score = score(y_train, nb.predict(X_train_subset))
nb_test_score = score(y_test, nb.predict(X_test_subset))

print("nb train accu:", round(nb_train_score, 4))
print("nb test_accu:", round(nb_test_score, 4))

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)

knn_train_score = score(y_train, knn.predict(X_train_subset))
knn_test_score = score(y_test, knn.predict(X_test_subset))

print("knn train accu:", round(knn_train_score, 4))
print("knn test_accu:", round(knn_test_score, 4))

avg_train_score = (lr_train_score + nb_train_score + knn_train_score) / 3
avg_test_score = (lr_test_score + nb_test_score + knn_test_score) / 3

print("avg train accu:", round(avg_train_score, 4))
print("avg test_accu:", round(avg_test_score, 4))

X_train_subset: (1200, 294) y_train: (1200, 5)
X_test_subset: (800, 294) y_test: (800, 5)
lr train accu: 0.5369
lr test_accu: 0.4694
nb train accu: 0.3634
nb test_accu: 0.3548
knn train accu: 0.619
knn test_accu: 0.5025
avg train accu: 0.5065
avg test_accu: 0.4422


In [26]:
beg = time.time()
fcorr = np.corrcoef(X_train.T)
end = time.time()
fcorr = np.absolute(fcorr)
print(fcorr.shape)
print(fcorr)

(294, 294)
[[1.         0.22891059 0.95314891 ... 0.11398277 0.18863438 0.11820044]
 [0.22891059 1.         0.30307195 ... 0.02696605 0.08730909 0.04609204]
 [0.95314891 0.30307195 1.         ... 0.12484053 0.20397726 0.12536069]
 ...
 [0.11398277 0.02696605 0.12484053 ... 1.         0.39400124 0.80938837]
 [0.18863438 0.08730909 0.20397726 ... 0.39400124 1.         0.41953227]
 [0.11820044 0.04609204 0.12536069 ... 0.80938837 0.41953227 1.        ]]


In [27]:
#his one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact linear relationship.
#  Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases.

flcorr = np.zeros((d, l))
for i in tqdm.tqdm(range(d)):
    for j in range(l):
        flcorr[i][j] = pearsonr(X_train[:, i], y_train[:, j])[0] + 0.001
flcorr = np.absolute(flcorr)
flcorr = np.max(flcorr, axis = 1)
print(flcorr.shape)
print(flcorr)

100%|██████████| 294/294 [00:00<00:00, 1772.10it/s]

(294,)
[0.47703882 0.0939037  0.49292623 0.10317575 0.50034056 0.12145456
 0.48946087 0.09404729 0.4985689  0.08868053 0.49701336 0.08640842
 0.48042703 0.08070738 0.51282126 0.12322825 0.51550533 0.12423064
 0.49839703 0.12948849 0.49252793 0.14891923 0.50157757 0.1719923
 0.50870536 0.11245849 0.49921652 0.09010143 0.49942004 0.10337162
 0.49939671 0.16143983 0.49323035 0.19382865 0.49333024 0.19917648
 0.49704112 0.17343188 0.50617479 0.12320216 0.49665435 0.09829377
 0.46315408 0.18292615 0.45534031 0.19998417 0.45079731 0.23019689
 0.47817078 0.28650122 0.47832467 0.23539103 0.47151    0.18645821
 0.46909967 0.18409831 0.38937433 0.25510671 0.40736215 0.23609585
 0.41703716 0.28653355 0.41799597 0.25352365 0.41876493 0.23089895
 0.41364471 0.21094751 0.39696926 0.20735679 0.27485245 0.27294937
 0.31785502 0.28987158 0.34910163 0.27691071 0.3473091  0.25500481
 0.32949507 0.26250502 0.30168887 0.2697432  0.25554844 0.22733422
 0.27648922 0.23742009 0.27036489 0.2510751  0.26680843 




## Base paper ka ACO, 10 features ka subset

In [28]:
tou = []
for i in tqdm.tqdm(range(d)):
    tou0 = 0
    for j in range(l):
        tou0 = max(tou0, 1 - spatial.distance.cosine(X_train[:, i], y_train[:, j]))
    tou.append(tou0)
tou = np.array(tou)
tou = (tou - np.min(tou)) / (np.max(tou) - np.min(tou)) #min max normalisation

q0 = 0.7
rho = 0.1
ants = 25
iterations = 40

for it in tqdm.tqdm(range(iterations)):
    fc = [0] * d
    for ant in range(ants):

        visited = set()
        unvisited = set(range(d))
        i = random.sample(list(unvisited), 1)[0]
        visited.add(i)
        unvisited.remove(i)

        while len(visited) < 10:
            ni = list(unvisited)
            p = []
            for j in ni:
                p.append(tou[j] * flcorr[j] / fcorr[i][j])
            p = np.array(p)
            p /= np.sum(p)
            q = random.random()
            j = -1

            if q >= q0:
                j = np.random.choice(ni, p = p)   #The probabilities associated with each entry in a. If not given, the sample assumes a uniform distribution over all entries in a.
            else:
                j = ni[np.argmax(p)]  #Returns the indices of the maximum values along an axis.

            unvisited.remove(j)
            visited.add(j)
            i = j


        for i in visited:
            fc[i]+=1

#global update
    for i in range(d):
        tou[i] = (1 - rho) * tou[i] + fc[i] / sum(fc)

fs_ind = []
for i in range(d):
    fs_ind.append([tou[i], i])
fs_ind.sort(reverse = True)
fs_ind = np.array(fs_ind)
fs_ind = fs_ind[:10, 1].astype(int)

print(fs_ind)

X_train_subset = X_train[:, fs_ind]
X_test_subset = X_test[:, fs_ind]

print("X_train_subset:", X_train_subset.shape, "y_train:", y_train.shape)
print("X_test_subset:", X_test_subset.shape, "y_test:", y_test.shape)

lr = BinaryRelevance(classifier = LogisticRegression())
lr.fit(X_train_subset, y_train)

lr_train_score = score(y_train, lr.predict(X_train_subset))
lr_test_score = score(y_test, lr.predict(X_test_subset))

print("lr train accu:", round(lr_train_score, 4))
print("lr test_accu:", round(lr_test_score, 4))

nb = BinaryRelevance(classifier = GaussianNB())
nb.fit(X_train_subset, y_train)

nb_train_score = score(y_train, nb.predict(X_train_subset))
nb_test_score = score(y_test, nb.predict(X_test_subset))

print("nb train accu:", round(nb_train_score, 4))
print("nb test_accu:", round(nb_test_score, 4))

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)

knn_train_score = score(y_train, knn.predict(X_train_subset))
knn_test_score = score(y_test, knn.predict(X_test_subset))

print("knn train accu:", round(knn_train_score, 4))
print("knn test_accu:", round(knn_test_score, 4))

avg_train_score = (lr_train_score + nb_train_score + knn_train_score) / 3
avg_test_score = (lr_test_score + nb_test_score + knn_test_score) / 3

print("avg train accu:", round(avg_train_score, 4))
print("avg test_accu:", round(avg_test_score, 4))

100%|██████████| 294/294 [00:00<00:00, 2788.43it/s]
100%|██████████| 40/40 [00:02<00:00, 14.15it/s]


[188  28 206  14 190  44 186   2 228 237]
X_train_subset: (1200, 10) y_train: (1200, 5)
X_test_subset: (800, 10) y_test: (800, 5)
lr train accu: 0.1064
lr test_accu: 0.1223
nb train accu: 0.3752
nb test_accu: 0.3697
knn train accu: 0.5175
knn test_accu: 0.3863
avg train accu: 0.333
avg test_accu: 0.2927


## Base paper ke ACO ka solution further optimized with random restructure LS with KNN wrapper

In [29]:
tmp = fs_ind[:]

In [30]:
X_train_subset = X_train[:, fs_ind]

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)
best_acc = score(y_train, knn.predict(X_train_subset))

ants = 40

for ant in tqdm.tqdm(range(ants)):

    visited = set(random.sample(fs_ind.tolist(), 5))
    unvisited = set(range(d)) - visited
    i = random.sample(list(visited), 1)[0]

    while len(visited) < 10:
        ni = list(unvisited)
        p = []
        for j in ni:
            p.append(tou[j] * flcorr[j] / fcorr[i][j])
        p = np.array(p)
        p /= np.sum(p)
        j = ni[np.argmax(p)]
        unvisited.remove(j)
        visited.add(j)
        i = j

    ind = np.array(list(visited))
    X_train_subset = X_train[:, ind]

    knn = BinaryRelevance(classifier = KNeighborsClassifier())
    knn.fit(X_train_subset, y_train)
    acc = score(y_train, knn.predict(X_train_subset))

    if acc > best_acc:
        best_acc = acc
        fs_ind = ind[:]

print(fs_ind)

X_train_subset = X_train[:, fs_ind]
X_test_subset = X_test[:, fs_ind]

print("X_train_subset:", X_train_subset.shape, "y_train:", y_train.shape)
print("X_test_subset:", X_test_subset.shape, "y_test:", y_test.shape)

lr = BinaryRelevance(classifier = LogisticRegression())
lr.fit(X_train_subset, y_train)

lr_train_score = score(y_train, lr.predict(X_train_subset))
lr_test_score = score(y_test, lr.predict(X_test_subset))

print("lr train accu:", round(lr_train_score, 4))
print("lr test_accu:", round(lr_test_score, 4))

nb = BinaryRelevance(classifier = GaussianNB())
nb.fit(X_train_subset, y_train)

nb_train_score = score(y_train, nb.predict(X_train_subset))
nb_test_score = score(y_test, nb.predict(X_test_subset))

print("nb train accu:", round(nb_train_score, 4))
print("nb test_accu:", round(nb_test_score, 4))

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)

knn_train_score = score(y_train, knn.predict(X_train_subset))
knn_test_score = score(y_test, knn.predict(X_test_subset))

print("knn train accu:", round(knn_train_score, 4))
print("knn test_accu:", round(knn_test_score, 4))

avg_train_score = (lr_train_score + nb_train_score + knn_train_score) / 3
avg_test_score = (lr_test_score + nb_test_score + knn_test_score) / 3

print("avg train accu:", round(avg_train_score, 4))
print("avg test_accu:", round(avg_test_score, 4))

100%|██████████| 40/40 [00:12<00:00,  3.27it/s]


[  2 199  44  14  48 188  52 216  28 190]
X_train_subset: (1200, 10) y_train: (1200, 5)
X_test_subset: (800, 10) y_test: (800, 5)
lr train accu: 0.1444
lr test_accu: 0.1565
nb train accu: 0.3947
nb test_accu: 0.3775
knn train accu: 0.5685
knn test_accu: 0.4194
avg train accu: 0.3692
avg test_accu: 0.3178


## Base paper ke ACO ka solution further optimized with random restructure LS with KNN + NB + LR wrapper

In [31]:
fs_ind = tmp[:]

In [32]:
X_train_subset = X_train[:, fs_ind]

lr = BinaryRelevance(classifier = LogisticRegression())
lr.fit(X_train_subset, y_train)
lr_train_score = score(y_train, lr.predict(X_train_subset))

nb = BinaryRelevance(classifier = GaussianNB())
nb.fit(X_train_subset, y_train)
nb_train_score = score(y_train, nb.predict(X_train_subset))

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)
knn_train_score = score(y_train, knn.predict(X_train_subset))

best_acc = (lr_train_score + nb_train_score + knn_train_score) / 3

ants = 40

for ant in tqdm.tqdm(range(ants)):

    visited = set(random.sample(fs_ind.tolist(), 5))
    unvisited = set(range(d)) - visited
    i = random.sample(list(visited), 1)[0]

    while len(visited) < 10:
        ni = list(unvisited)
        p = []
        for j in ni:
            p.append(tou[j] * flcorr[j] / fcorr[i][j])
        p = np.array(p)
        p /= np.sum(p)
        j = ni[np.argmax(p)]
        unvisited.remove(j)
        visited.add(j)
        i = j

    ind = np.array(list(visited))
    X_train_subset = X_train[:, ind]

    lr = BinaryRelevance(classifier = LogisticRegression())
    lr.fit(X_train_subset, y_train)
    lr_train_score = score(y_train, lr.predict(X_train_subset))

    nb = BinaryRelevance(classifier = GaussianNB())
    nb.fit(X_train_subset, y_train)
    nb_train_score = score(y_train, nb.predict(X_train_subset))

    knn = BinaryRelevance(classifier = KNeighborsClassifier())
    knn.fit(X_train_subset, y_train)
    knn_train_score = score(y_train, knn.predict(X_train_subset))

    acc = (lr_train_score + nb_train_score + knn_train_score) / 3

    if acc > best_acc:
        best_acc = acc
        fs_ind = ind[:]

print(fs_ind)

X_train_subset = X_train[:, fs_ind]
X_test_subset = X_test[:, fs_ind]

print("X_train_subset:", X_train_subset.shape, "y_train:", y_train.shape)
print("X_test_subset:", X_test_subset.shape, "y_test:", y_test.shape)

lr = BinaryRelevance(classifier = LogisticRegression())
lr.fit(X_train_subset, y_train)

lr_train_score = score(y_train, lr.predict(X_train_subset))
lr_test_score = score(y_test, lr.predict(X_test_subset))

print("lr train accu:", round(lr_train_score, 4))
print("lr test_accu:", round(lr_test_score, 4))

nb = BinaryRelevance(classifier = GaussianNB())
nb.fit(X_train_subset, y_train)

nb_train_score = score(y_train, nb.predict(X_train_subset))
nb_test_score = score(y_test, nb.predict(X_test_subset))

print("nb train accu:", round(nb_train_score, 4))
print("nb test_accu:", round(nb_test_score, 4))

knn = BinaryRelevance(classifier = KNeighborsClassifier())
knn.fit(X_train_subset, y_train)

knn_train_score = score(y_train, knn.predict(X_train_subset))
knn_test_score = score(y_test, knn.predict(X_test_subset))

print("knn train accu:", round(knn_train_score, 4))
print("knn test_accu:", round(knn_test_score, 4))

avg_train_score = (lr_train_score + nb_train_score + knn_train_score) / 3
avg_test_score = (lr_test_score + nb_test_score + knn_test_score) / 3

print("avg train accu:", round(avg_train_score, 4))
print("avg test_accu:", round(avg_test_score, 4))

100%|██████████| 40/40 [00:21<00:00,  1.87it/s]


[ 64 258 199  72 218  14  60 248 186  28]
X_train_subset: (1200, 10) y_train: (1200, 5)
X_test_subset: (800, 10) y_test: (800, 5)
lr train accu: 0.1472
lr test_accu: 0.1665
nb train accu: 0.4106
nb test_accu: 0.4141
knn train accu: 0.5604
knn test_accu: 0.4281
avg train accu: 0.3727
avg test_accu: 0.3362
