# Validation notebook
Here the FIRES implementation for multiclass and regression is validated and compared to other online features selcetion algorithms.


In [1]:
# import the functions needed for validate and comparsion

import numpy as np
import cupy as cp
import pandas as pd
from timeit import default_timer as timer

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from skmultiflow.data import FileStream
from skmultiflow.neural_networks import PerceptronMask
from sklearn.linear_model import SGDRegressor

# using plotly for plots
#import plotly.graph_objects as go
import plotly.express as px



In [2]:
# paint mnist
def paint_digit(digit_values):
    fig = px.imshow(digit_values.reshape(28,28), binary_string=True)
    fig.update_layout(coloraxis_showscale=False)
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(showticklabels=False)
    fig.show()


In [3]:
# stability measurment as proposed in "Measurment the Stability of Feature Selection"

def pearson_stability_ij(arr1,arr2):
    d = len(arr1)
    k_i = np.sum(arr1)
    k_j = np.sum(arr2)
    x_hat_i = k_i / d
    x_hat_j = k_j / d
    arr1 = arr1 - x_hat_i
    arr2 = arr2 - x_hat_j
    dividend = 1/d * np.sum(arr1*arr2)
    divisor = np.sqrt(1/d*np.sum(arr1**2))*np.sqrt(1/d*np.sum(arr2**2))
    return dividend/divisor

def stability_factor(selected_ftrs):
   M = len(selected_ftrs)
   sum_stabilities = 0
   for i in range(M):
       for j in range(i+1, M):
           sum_stabilities += pearson_stability_ij(selected_ftrs[i], selected_ftrs[j])
   return 1/(M*(M-1))*sum_stabilities * 2   

In [4]:
# import algorithms
from fires import FIRES
from fires_cuda import FIRES as FC
from ofs import OFS, MC_OFS
from ofssgr import OFSSGD, MC_OFSSGD
from fsds import StreamFeatWeight

## Multiclass Data

Here the FIRES softmax implementation is compared to the FSDS, OFS and OFSSGD oun multiclass data.


### Load Datasets as Streaming Data

In [5]:
# MNIST data
stream = FileStream('datasets/Multiclass/mnist_train_normalized.csv', target_idx=0)
stream.prepare_for_use()
dataset_name = "mnist"
n_selected_ftr = 100

# load test data
test_data = pd.read_csv('datasets/Multiclass/mnist_test_normalized.csv', header=None)
test_y = test_data[0].to_numpy()
test_x = test_data.drop(columns=0).to_numpy()


In [None]:
# Human Activity Recognition
# labels changed from [1,...,6] to [0,...,5]
# rows shuffled
# split into train set with 7352 instances and test set with 2948
stream = FileStream('datasets/Multiclass/har_train.csv', target_idx = 561)
stream.prepare_for_use()
dataset_name = "har"

# load test data
test_data = pd.read_csv('datasets/Multiclass/har_test.csv')
test_y = test_data["Class"].to_numpy()
test_x = test_data.drop(columns="Class").to_numpy()


In [None]:
# Covtype scaled to 0,1
# https://archive.ics.uci.edu/ml/datasets/covertype

# rows shuffled
# split into train set with 400000 instances and test set with 180000
stream = FileStream('datasets/Multiclass/covtype.scale01.test.csv', target_idx = 0)
stream.prepare_for_use()
dataset_name = "covtype"

# load test data
test_data = pd.read_csv('datasets/Multiclass/covtype.scale01.train.csv', header=None)
test_y = test_data[0].to_numpy()
test_x = test_data.drop(columns=0).to_numpy()

### Test Perceptron
the same model is used for all algorithms


In [None]:
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

### Test without feature selection


In [None]:
accuarcy_pure = []
while stream.has_more_samples():
    x, y = stream.next_sample(batch_size=100)
    y_pred = predictor.predict(x)
    accuarcy_pure.append(accuracy_score(y, y_pred))
    predictor.partial_fit(x,y)

    


In [None]:
fig = px.line(y = accuarcy_pure, title="Accuracy without ftr selection", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [None]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_x, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

In [None]:
stream.restart()

### FIRES Framework

In [6]:
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
               fit_intercept=True, max_iter=1000, n_iter_no_change=5,
               n_jobs=None, penalty=None, random_state=0, shuffle=True,
               tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)

In [7]:
# still without d, regularizer set 0.1 without being in the derivatives
fires_model = FC(n_total_ftr=stream.n_features,
                    target_values=stream.target_values,
                    mu_init=0,
                    sigma_init=1,
                    model='softmax')               

In [9]:
fires_cuda_accuracy = []
#fsds_f1 = []
fires_cuda_times = []

fires_cuda_selected_ftrs = []
fires_cuda_stability = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)
    # Select features
    start_time = timer()
    ftr_weights = fires_model.weigh_features(x,y)
    ftr_selection = np.argsort(ftr_weights)[::-1][:n_selected_ftr]
    fires_cuda_times.append(timer()-start_time)

    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, ftr_selection] = x[:, ftr_selection]

    # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[ftr_selection] = 1
    fires_cuda_selected_ftrs.append(ftr_array)

    if len(fires_cuda_selected_ftrs) >= 10:
        stability = stability_factor(fires_cuda_selected_ftrs[-10:])
        fires_cuda_stability.append(stability)


    # Test
    y_pred = predictor.predict(x_reduced)
    
    fires_cuda_accuracy.append(accuracy_score(y, y_pred))
    #fsds_f1.append(f1_score(y, y_pred, average=None, labels=stream.target_values))


    # Train
    predictor.partial_fit(x_reduced, y)

# Restart the FileStream
end_time_all = timer()
fires_cuda_run_time = timer() - start_time_all
print("The whole fires_cuda run took {}".format(fires_cuda_run_time))
stream.restart()

The whole fires_cuda run took 53.439564456999506


In [10]:
fig = px.line(y = fires_cuda_accuracy, title="Accuracy for Fires algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [11]:
fig = px.line(y = fires_cuda_stability, title="Stability for Fires algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [12]:
if dataset_name == "mnist":
    img = np.zeros((784))
    img[ftr_selection] = 1
    paint_digit(img)


In [13]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_y, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

For the test dataset the previous trained predictor reached: 0.7159


### FIRES without cuda

Only one batch because of long calculation time

In [None]:
stream.restart()
print("Choosen dataset: {}".format(dataset_name))
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

In [None]:
fires_model = FIRES(n_total_ftr=stream.n_features,
                    target_values=stream.target_values,
                    mu_init=0,
                    sigma_init=1,
                    model='softmax')
print(fires_model.n_mc_samples)


x, y = stream.next_sample(batch_size=100)
# Select features
start_time = timer()
ftr_weights = fires_model.weigh_features(x, y)  # Get feature weights with FIRES
ftr_selection = np.argsort(ftr_weights)[::-1][:n_selected_ftr]
print(timer()-start_time)     

### FSDS algorithm


In [14]:
stream.restart()
print("Choosen dataset: {}".format(dataset_name))
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

Choosen dataset: mnist


PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
               fit_intercept=True, max_iter=1000, n_iter_no_change=5,
               n_jobs=None, penalty=None, random_state=0, shuffle=True,
               tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)

In [15]:
fsds_model = StreamFeatWeight(m=stream.n_features, k=stream.n_classes)
fsds_model.low_rank_approximation(x.T) # needs some pretraining in the first run

fsds_selected_ftrs = []
fsds_stability = []

fsds_accuracy = []
#fsds_f1 = []
fsds_times = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)
    # Select features
    start_time = timer()
    ftr_weights = fsds_model.low_rank_approximation(x.T)
    ftr_selection = np.argsort(ftr_weights)[::-1][:n_selected_ftr]
    fsds_times.append(timer()-start_time)

    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, ftr_selection] = x[:, ftr_selection]

     # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[ftr_selection] = 1
    fsds_selected_ftrs.append(ftr_array)

    if len(fsds_selected_ftrs) >= 10:
        stability = stability_factor(fsds_selected_ftrs[-10:])
        fsds_stability.append(stability)

    # Test
    y_pred = predictor.predict(x_reduced)
    
    fsds_accuracy.append(accuracy_score(y, y_pred))
    #fsds_f1.append(f1_score(y, y_pred, average=None, labels=stream.target_values))


    # Train
    predictor.partial_fit(x_reduced, y)

# Restart the FileStream
end_time_all = timer()
fsds_run_time = timer() - start_time_all
print("The whole fsds run took {}".format(fsds_run_time))
stream.restart()

The whole fsds run took 7.819808810999348


In [16]:
fig = px.line(y = fsds_accuracy, title="Accuracy for FSDS algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [17]:
fig = px.line(y = fsds_stability, title="Stability for FSDS algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [18]:
if dataset_name == "mnist":
    img = np.zeros((784))
    img[ftr_selection] = 1
    paint_digit(img)


In [19]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_y, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

For the test dataset the previous trained predictor reached: 0.7148


### OFS algorithm

In [20]:
stream.restart()
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
               fit_intercept=True, max_iter=1000, n_iter_no_change=5,
               n_jobs=None, penalty=None, random_state=0, shuffle=True,
               tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)

In [21]:
ofs = MC_OFS(regularization_param = 0.01, step_size = 0.1, n_selected_ftr=n_selected_ftr, n_total_ftr=stream.n_num_features, n_classes=stream.n_classes)

ofs_accuracy = []
ofs_selected_ftrs = []
ofs_stability = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)

    # Select features
    for idx, label in enumerate(y):
        ofs.train(x[idx],label)

    selected_ftr = ofs.get_feature_indices()
    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, selected_ftr] = x[:, selected_ftr]

     # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[selected_ftr] = 1
    ofs_selected_ftrs.append(ftr_array)

    if len(ofs_selected_ftrs) >= 10:
        stability = stability_factor(ofs_selected_ftrs[-10:])
        ofs_stability.append(stability)

    # Test
    y_pred = predictor.predict(x_reduced)
    ofs_accuracy.append(accuracy_score(y, y_pred))

    # Train
    predictor.partial_fit(x_reduced, y)

end_time_all = timer()
ofs_run_time = timer() - start_time_all
print("The whole ofs run took {}".format(ofs_run_time))

# Restart the FileStream
stream.restart()

The whole ofs run took 7.0733578070003205


In [22]:
fig = px.line(y = ofs_accuracy, title="Accuracy for OFS algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [23]:
fig = px.line(y = ofs_stability, title="Stability for OFS algorithm", labels={"x":"batches", "y":"stability"})
fig.show()

In [24]:
if dataset_name == "mnist":
    img = np.zeros((784))
    img[selected_ftr] = 1
    paint_digit(img)


In [25]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_y, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

For the test dataset the previous trained predictor reached: 0.7718


### OFSSGR algorithm

In [26]:
stream.restart()
predictor = PerceptronMask()
x,y = stream.next_sample(batch_size=100)
predictor.partial_fit(x,y, stream.target_values)

PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
               fit_intercept=True, max_iter=1000, n_iter_no_change=5,
               n_jobs=None, penalty=None, random_state=0, shuffle=True,
               tol=0.001, validation_fraction=0.1, verbose=0, warm_start=False)

In [27]:
ofssgd_model = MC_OFSSGD(reduction_threshold=0.4, reduction_value=0.1, regularization_param=0.01, step_size=0.2, n_total_ftrs=stream.n_num_features, n_classes=stream.n_classes)

ofssgd_accuracy = []
ofssgd_selected_ftrs = []
ofssgd_stability = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)

    # Select features
    for idx, label in enumerate(y):
        ofssgd_model.train(x[idx],label)

    selected_ftr = ofssgd_model.get_feature_indices()
    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, selected_ftr] = x[:, selected_ftr]

    # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[selected_ftr] = 1
    ofssgd_selected_ftrs.append(ftr_array)

    if len(ofssgd_selected_ftrs) >= 10:
        stability = stability_factor(ofssgd_selected_ftrs[-10:])
        ofssgd_stability.append(stability)

    # Test
    y_pred = predictor.predict(x_reduced)
    ofssgd_accuracy.append(accuracy_score(y, y_pred))

    # Train
    predictor.partial_fit(x_reduced, y)

end_time_all = timer()
ofssgd_run_time = timer() - start_time_all
print("The whole ofssgd run took {}".format(ofssgd_run_time))

# Restart the FileStream
stream.restart()

The whole ofssgd run took 113.74397321499964


In [28]:
fig = px.line(y = ofssgd_accuracy, title="Accuracy for OFSSGD algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [29]:
fig = px.line(y = ofssgd_stability, title="Stability for OFSSGD algorithm", labels={"x":"batches", "y":"stability"})
fig.show()

In [30]:
if dataset_name == "mnist":
    img = np.zeros((784))
    img[selected_ftr] = 1
    paint_digit(img)


In [32]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_y, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

For the test dataset the previous trained predictor reached: 0.8328


## Regression Data
The FIRES Model can't be compared to a real datastream algorithm, instead the SGDRegressor form scikit learn


### Load Datasets as Streaming Data

In [None]:
stream = FileStream('datasets/Regression/ailerons.csv', target_idx=0)
stream.prepare_for_use()
dataset_name = "ailerons"
n_selected_ftr = 10

# load test data
test_data = pd.read_csv('datasets/Multiclass/mnist_test_normalized.csv')
test_y = test_data[40].to_numpy()
test_x = test_data.drop(columns="40").to_numpy()

In [None]:
# use SGDRegressor as predictor
predictor = SGDRegressor()
X, y = stream.next_sample(batch_size=100)
predictor.partial_fit(X,y)

### FIRES for Regression

In [None]:
fires_model = FC(n_total_ftr=stream.n_features,
                 mu_init=0,
                 sigma_init=1,
                 model='regression')  

In [None]:
fires_cuda_accuracy = []
#fsds_f1 = []
fires_cuda_times = []

fires_cuda_selected_ftrs = []
fires_cuda_stability = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)
    # Select features
    start_time = timer()
    ftr_weights = fires_model.weigh_features(x,y)
    ftr_selection = np.argsort(ftr_weights)[::-1][:n_selected_ftr]
    fires_cuda_times.append(timer()-start_time)

    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, ftr_selection] = x[:, ftr_selection]

    # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[ftr_selection] = 1
    fires_cuda_selected_ftrs.append(ftr_array)

    if len(fires_cuda_selected_ftrs) >= 10:
        stability = stability_factor(fires_cuda_selected_ftrs[-10:])
        fires_cuda_stability.append(stability)


    # Test
    y_pred = predictor.predict(x_reduced)
    
    fires_cuda_accuracy.append(accuracy_score(y, y_pred))
    


    # Train
    predictor.partial_fit(x_reduced, y)

# Restart the FileStream
end_time_all = timer()
fires_cuda_run_time = timer() - start_time_all
print("The whole fires_cuda run took {}".format(fires_cuda_run_time))
stream.restart()

In [None]:
fig = px.line(y = fires_cuda_accuracy, title="Accuracy for Fires algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [None]:
fig = px.line(y = fires_cuda_stability, title="Stability for Fires algorithm", labels={"x":"batches", "y":"accuracy"})
fig.show()

In [None]:
y_pred = predictor.predict(test_x)
accuracy = accuracy_score(test_y, y_pred)
print("For the test dataset the previous trained predictor reached: {}".format(accuracy))

### Feature selection via SGDRegressor

In [None]:
sgdr_model = SGDRegressor(penalty="l1") #penalty could be elasticnet as well
#n_selectey_ftrs?

In [None]:
sgdr_accuracy = []
#fsds_f1 = []
sgdr_times = []

sgdr_selected_ftrs = []
sgdr_stability = []

start_time_all = timer()
while stream.has_more_samples():
    # Load a new sample
    x, y = stream.next_sample(batch_size=100)
    # Select features
    start_time = timer()
    sgdr_model.partial_fit(x,y)
    ftr_weights = sgdr_model.coef_
    ftr_selection = np.argsort(ftr_weights)[::-1][:n_selected_ftr]
    sgdr_times.append(timer()-start_time)

    # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
    x_reduced = np.zeros(x.shape)
    x_reduced[:, ftr_selection] = x[:, ftr_selection]

    # stability test
    ftr_array = np.zeros(stream.n_features)
    ftr_array[ftr_selection] = 1
    sgdr_selected_ftrs.append(ftr_array)

    if len(sgdr_selected_ftrs) >= 10:
        stability = stability_factor(sgdr_selected_ftrs[-10:])
        sgdr_stability.append(stability)


    # Test
    y_pred = predictor.predict(x_reduced)
    
    sgdr_accuracy.append(accuracy_score(y, y_pred))
    


    # Train
    predictor.partial_fit(x_reduced, y)

# Restart the FileStream
end_time_all = timer()
sgdr_run_time = timer() - start_time_all
print("The whole sgdr run took {}".format(sgdr_run_time))
stream.restart()