In [1]:
%matplotlib notebook
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import matplotlib.patches as patches
from matplotlib.collections import PatchCollection
import pandas as pd
import math as m
import itertools
import os

In [2]:
# Initial quality scores when all features are included

iris_base_qa = 0.11
iris_base_qf = 0.16
cancer_base_qa = 0.2
cancer_base_qf = 0.25
adult_base_qa = 0.5
adult_base_qf = 0.52
heart_base_qa = 0.42
heart_base_qf = 0.41
statlog_base_qa = 0.48
statlog_base_qf = 0.71
abalone_base_qa = 0.32
abalone_base_qf = 0.37
spambase_base_qa = 0.29
spambase_base_qf = 0.35
bean_base_qa = 0.36
bean_base_qf = 0.4

In [3]:
# Total number of rows for the datasets

n_iris = 150
n_cancer = 569
n_adult = 2000
n_heart = 297
n_statlog = 959
n_spambase = 4601
n_abalone = 4177
n_bean = 1400

In [4]:
# Total number of features for the datasets

f_iris = 4
f_cancer = 30
f_adult = 10

f_heart = 13
f_statlog = 24
f_abalone = 8
f_spambase = 57
f_bean = 16

In [4]:
# Number of features selected

feat_iris = [3, 2, 1]
feat_cancer = [23, 12, 5, 4, 3, 2]
feat_adult = [8, 4, 2]

feat_heart = [12, 10, 8, 6, 4, 3, 2, 1]
feat_statlog = [20, 18, 12, 10, 8, 6, 4, 3, 2, 1]
feat_abalone = [6, 4, 3, 2, 1]
feat_spambase = [52, 43, 37, 28, 19, 10, 6, 4, 3, 2, 1]
feat_bean = [12, 11, 8, 6, 4, 3, 2, 1]

feats = [feat_iris, feat_cancer, feat_adult, feat_heart, feat_statlog, feat_abalone, feat_spambase, feat_bean]

In [5]:
def load_data(dataset, feat_list, n, init_f, init_qa, init_qf):
    ds = [init_f/init_f]
    qas = [init_qa]
    qfs = [init_qf]
    for feat in feat_list:
        _, qa, qf, _ = np.load("./features/"+dataset+"_"+str(feat)+"_features_(x,qa,qf,time).npy", allow_pickle=True)
        qas.append(qa[0])
        qfs.append(qf[0])
        ds.append(feat/init_f)
    return ds, qas, qfs
        

In [6]:
iris_ds, iris_qas, iris_qfs = load_data('iris', feat_iris, n_iris, f_iris, iris_base_qa, iris_base_qf)
cancer_ds, cancer_qas, cancer_qfs = load_data('cancer', feat_cancer, n_cancer, f_cancer, cancer_base_qa, cancer_base_qf)
adult_ds, adult_qas, adult_qfs = load_data('adult', feat_adult, n_adult, f_adult, adult_base_qa, adult_base_qf)

heart_ds, heart_qas, heart_qfs = load_data('heart', feat_heart, n_heart, f_heart, heart_base_qa, heart_base_qf)
statlog_ds, statlog_qas, statlog_qfs = load_data('statlog', feat_statlog, n_statlog, f_statlog, statlog_base_qa, statlog_base_qf)
abalone_ds, abalone_qas, abalone_qfs = load_data('abalone', feat_abalone, n_abalone, f_abalone, abalone_base_qa, abalone_base_qf)
spambase_ds, spambase_qas, spambase_qfs = load_data('spambase', feat_spambase, n_spambase, f_spambase, spambase_base_qa, spambase_base_qf)
bean_ds, bean_qas, bean_qfs = load_data('bean', feat_bean, n_bean, f_bean, bean_base_qa, bean_base_qf)


NameError: name 'n_iris' is not defined

In [None]:
iris_dqs = np.array([max(qa,qf) for qa,qf in zip(iris_qas, iris_qfs)])
cancer_dqs = np.array([max(qa,qf) for qa,qf in zip(cancer_qas, cancer_qfs)])
adult_dqs = np.array([max(qa,qf) for qa,qf in zip(adult_qas, adult_qfs)])

heart_dqs = np.array([max(qa,qf) for qa,qf in zip(heart_qas, heart_qfs)])
statlog_dqs = np.array([max(qa,qf) for qa,qf in zip(statlog_qas, statlog_qfs)])
spambase_dqs = np.array([max(qa,qf) for qa,qf in zip(spambase_qas, spambase_qfs)])
abalone_dqs = np.array([max(qa,qf) for qa,qf in zip(abalone_qas, abalone_qfs)])
bean_dqs = np.array([max(qa,qf) for qa,qf in zip(bean_qas, bean_qfs)])

In [None]:
ys = [0, 0.3, 0.6]
heights = [0.3, 0.3, 0.4]
colors = cm.rainbow([0.5, 0.75, 1])
zones = []

for j in range(3):
    zones.append(patches.Rectangle((0, ys[j]), 55, heights[j], color=colors[j], alpha=0.33))

fig, ax = plt.subplots()

line1a, = plt.plot(iris_ds, iris_dqs, color='C0', label='Iris')
line2a, = plt.plot(cancer_ds, cancer_dqs, color='C1', label='Breast Cancer')
line3a, = plt.plot(adult_ds, adult_dqs, color='C2', label='Adult')

plt.ylim([0, 1])
plt.xlim([0, 1])

ymajor_ticks = np.arange(0, 11, 1)/10
yminor_ticks = np.arange(0, 20, 1)/20
ax.set_yticks(ymajor_ticks)
ax.set_yticks(yminor_ticks, minor=True)
ax.set_xticks(ymajor_ticks)
ax.grid(which='minor', linestyle='--')

plt.legend(loc='upper right')

pc = PatchCollection(zones, match_original=True)
ax.add_collection(pc)
plt.grid(alpha=0.5)
fig.tight_layout()
plt.show()

In [None]:
ys = [0, 0.3, 0.6]
heights = [0.3, 0.3, 0.4]
colors = cm.rainbow([0.5, 0.75, 1])
zones = []

for j in range(3):
    zones.append(patches.Rectangle((0, ys[j]), 55, heights[j], color=colors[j], alpha=0.33))

fig, ax = plt.subplots()

line1a, = plt.plot(heart_ds, heart_dqs, color='c', label='Heart Disease')
line2a, = plt.plot(statlog_ds, statlog_dqs, color='m', label='Statlog')
line3a, = plt.plot(abalone_ds, abalone_dqs, color='y', label='Abalone')
line4a, = plt.plot(spambase_ds, spambase_dqs, color='k', label='Spambase')
line5a, = plt.plot(bean_ds, bean_dqs, color='0.5', label='Dry Beans')


plt.ylim([0, 1])
plt.xlim([0, 1])

ymajor_ticks = np.arange(0, 11, 1)/10
yminor_ticks = np.arange(0, 20, 1)/20
ax.set_yticks(ymajor_ticks)
ax.set_xticks(ymajor_ticks)
ax.set_yticks(yminor_ticks, minor=True)
ax.grid(which='minor', linestyle='--')

plt.legend(loc='lower right')

pc = PatchCollection(zones, match_original=True)
ax.add_collection(pc)
plt.grid(alpha=0.5)
fig.tight_layout()
plt.show()