### OFS with sparse gradient
- as given by "An online approach for feature selection for classification in big data"
- no implementation found




In [None]:
#get mean of dataset, as recommended by the paper, not necessary
import pandas as pd 
df = pd.read_csv('datasets/binary/shuffle_spambase.csv')
df = df.drop(labels='class', axis=1)
ftr_means = df.mean()
total_mean = ftr_means.mean()


In [None]:
import numpy as np

class OFSSGD:
    def __init__(self, reduction_threshold, reduction_value, n_total_ftrs, regularization_param, step_size):
        try:
            if len(reduction_threshold) == n_total_ftrs:
                self.vartheta = reduction_threshold
            else:
                raise ValueError("threshold vector and amount of features is not matching")
        except TypeError as e:
            self.vartheta = np.ones(n_total_ftrs) * reduction_threshold
        self.sigma = reduction_value
        self.W = np.zeros(n_total_ftrs)
        self.regularization_param = regularization_param
        self.step_size = step_size

    def __ola(self, x, y):
        # copied from ofs 
        if np.dot(x, self.W) * y <= 1: # should be 0, shouldn't it
            w_tilde = (1-self.regularization_param * self.step_size)*self.W + self.step_size * y * x
            w_hat = min(1, (1/np.sqrt(self.regularization_param)) / np.linalg.norm(w_tilde) )*w_tilde
            self.W = w_hat
        else:
            self.W *= (1-self.regularization_param*self.step_size)

    def __SGr(self):

        for i in range(len(self.W)):
            if self.W[i] > 0 and self.W[i] < self.vartheta[i]:
                self.W[i] = max(0, self.W[i] - self.sigma)
            elif self.W[i] < 0 and self.W[i] > -self.vartheta[i]:
                self.W[i] = min(0, self.W[i] + self.sigma)


    def train(self, x, y):
        self.__ola(x,y)
        self.__SGr()
            
    def get_weights(self):
        return np.where(self.W != 0)[0]    
        # i suppose that all featurs with weight unequal zero are kept

        

In [None]:
import numpy as np
from skmultiflow.data import FileStream
from skmultiflow.neural_networks import PerceptronMask
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

stream = FileStream('datasets/binary/shuffle_spambase.csv', target_idx=57)
stream.prepare_for_use()

x,y = stream.next_sample(batch_size=100)
predictor = PerceptronMask()
predictor.partial_fit(x,y, stream.target_values)

n_selected_ftr = 10

ofssgd = OFSSGD(reduction_threshold=ftr_means, reduction_value=0.2, regularization_param=0.01, step_size=0.2, n_total_ftrs=stream.n_num_features)

accuracy = []

for epoch in range(1):
    while stream.has_more_samples():
        # Load a new sample
        x, y = stream.next_sample(batch_size=10)

        # Select features
        for idx, label in enumerate(y):
            if label == 0:
                label = -1

            ofssgd.train(x[idx],label)

        selected_ftr = ofssgd.get_weights()
        # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
        x_reduced = np.zeros(x.shape)
        x_reduced[:, selected_ftr] = x[:, selected_ftr]

        # Test
        y_pred = predictor.predict(x)
        accuracy.append(accuracy_score(y, y_pred))

        # Train
        predictor.partial_fit(x, y)

    # Restart the FileStream
    stream.restart()

plt.plot(accuracy)
plt.show()
print("Amount of selected features: {}".format(len(selected_ftr)))

### Multiclass varition
- use multiclass OFS
- apply sgd only on updated vectors
- how to find selected ftrs:
    - mean zero or below threshold?

In [None]:
class MC_OFSSGD:
    def __init__(self, reduction_threshold, reduction_value, n_total_ftrs, regularization_param, step_size, n_classes):
        try:
            if len(reduction_threshold) == n_total_ftrs:
                self.vartheta = reduction_threshold
            else:
                raise ValueError("threshold vector and amount of features is not matching")
        except TypeError as e:
            self.vartheta = np.ones(n_total_ftrs) * reduction_threshold
        self.sigma = reduction_value
        self.W = np.zeros((n_classes, n_total_ftrs))
        self.regularization_param = regularization_param
        self.step_size = step_size

    def __ola(self, x, y):
        predictions = np.dot(self.W, x)
        print("Predictions: {}".format(predictions))
        prediction = np.where(predictions == np.amax(predictions))[0][0]
        print("Prediction: {}, class: {}".format(prediction, y))
        if y != prediction:
            #print("{} \n {}".format(self.W[prediction], self.W[y]))
            #reduce wrong
            w_tilde = (1-self.regularization_param * self.step_size)*self.W[prediction] - self.step_size  * x
            w_hat = min(1, (1/np.sqrt(self.regularization_param)) / np.linalg.norm(w_tilde) )*w_tilde
            self.W[prediction] = w_hat

            #increase right
            w_tilde = (1-self.regularization_param * self.step_size)*self.W[y] + self.step_size * x
            w_hat = min(1, (1/np.sqrt(self.regularization_param)) / np.linalg.norm(w_tilde) )*w_tilde
            self.W[y] = w_hat
            self.__SGr(y, prediction)
        else:
            self.W[y] *= (1-self.regularization_param*self.step_size)
            self.__SGr(y)

    def __SGr(self, y , prediction=None):

        for i in range(len(self.W[y])):
            if self.W[y,i] > 0 and self.W[y,i] < self.vartheta[i]:
                self.W[y,i] = max(0, self.W[y,i] - self.sigma)
            elif self.W[y,i] < 0 and self.W[y,i] > -self.vartheta[i]:
                self.W[y,i] = min(0, self.W[y,i] + self.sigma)
        if prediction != None:
            for i in range(len(self.W[y])):
                if self.W[prediction,i] > 0 and self.W[prediction,i] < self.vartheta[i]:
                    self.W[prediction,i] = max(0, self.W[prediction,i] - self.sigma)
                elif self.W[prediction,i] < 0 and self.W[prediction,i] > -self.vartheta[i]:
                    self.W[prediction,i] = min(0, self.W[prediction,i] + self.sigma)


    def train(self, x, y):
        self.__ola(x,y)
        # calling it now from __ola to get the updated vectors
        # self.__SGr()
            
    def get_weights(self):
        W_mean = np.mean(self.W, axis=0)
        return np.where(W_mean != 0)[0]    
        # i suppose that all featurs with weight unequal zero are kept

In [None]:
import pandas as pd 
df = pd.read_csv('datasets/Multiclass/mnist_test_normalized.csv')
df = df.drop(df.columns[0], axis=1)
ftr_means = df.mean()
total_mean = ftr_means.mean()

In [None]:
import numpy as np
from skmultiflow.data import FileStream
from skmultiflow.neural_networks import PerceptronMask
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

stream = FileStream('datasets/Multiclass/mnist_test_normalized.csv', target_idx=0)
stream.prepare_for_use()

x,y = stream.next_sample(batch_size=100)
predictor = PerceptronMask()
predictor.partial_fit(x,y, stream.target_values)


In [None]:
ofssgd = MC_OFSSGD(reduction_threshold=ftr_means, reduction_value=0.2, regularization_param=0.01, step_size=0.2, n_total_ftrs=stream.n_num_features, n_classes=stream.n_classes)

accuracy = []

for epoch in range(1):
    while stream.has_more_samples():
    #if True:
        # Load a new sample
        x, y = stream.next_sample(batch_size=10)

        # Select features
        for idx, label in enumerate(y):
            ofssgd.train(x[idx],label)

        selected_ftr = ofssgd.get_weights()
        # Truncate x (retain only selected features, 'remove' all others, e.g. by replacing them with 0)
        x_reduced = np.zeros(x.shape)
        x_reduced[:, selected_ftr] = x[:, selected_ftr]

        # Test
        y_pred = predictor.predict(x)
        accuracy.append(accuracy_score(y, y_pred))

        # Train
        predictor.partial_fit(x, y)

    # Restart the FileStream
    stream.restart()

In [None]:
selected_ftr

In [None]:
len(selected_ftr)