### No Noise

#### No Noise (Training)

In [38]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from copy import deepcopy
import pandas as pd
from keras.utils import to_categorical
from tqdm import tqdm
from sklearn.metrics import accuracy_score

In [39]:
file_path = 'cf_train_no_noise.csv'
file_path_test = 'cf_test_no_noise.csv'
data = pd.read_csv(file_path)
data_test = pd.read_csv(file_path_test)

In [40]:
class WeightedMajorityAlgorithm(BaseEstimator, ClassifierMixin):
    def __init__(self, models=None, initial_weight=1.0, beta=0.5, threshold=0.8):
        self.models = [deepcopy(m) for m in models] if models is not None else [KNeighborsClassifier(n_neighbors=5) for _ in range(3)]
        self.weights = np.full(len(self.models), initial_weight)
        self.beta = beta
        self.threshold = threshold

    def fit(self, X, y):
        for model in tqdm(self.models):
            model.fit(X, y)
        return self

    def predict(self, X):
        class_labels = [0,1,2,3,4]
        weighted_votes = np.zeros((X.shape[0], 5))

        for idx, model in tqdm(enumerate(self.models)):
            predictions = np.argmax(model.predict(X), axis=1)
            for i, pred in enumerate(predictions):
                weighted_votes[i, pred] += self.weights[idx]
        return np.argmax(weighted_votes, axis=1)

    def update_weights(self, X, y):
        for i, model in tqdm(enumerate(self.models)):
            predictions = model.predict(X)
            accuracy = np.mean(predictions == y)
            if accuracy >= self.threshold:
                self.weights[i] *= (1 + self.beta)
            else:
                self.weights[i] *= self.beta ** (1 - accuracy)

    def get_params(self, deep=True):
        return {"models": self.models, "initial_weight": self.weights[0], "beta": self.beta, "threshold": self.threshold}
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    def score(self, X, y):
        return np.mean(self.predict(X) == y)

In [41]:
unnecessary_columns = ['row_num', 'day', 'era', 'target_10_val', 'target_5_val', 'sigma', 'day_no']
X_train = data.drop(columns=unnecessary_columns)
y_train = data['target_10_val']
y_train = to_categorical((y_train*4).astype(int))
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [42]:
X_train_1 = X_train[:10000]
y_train_1 = y_train[:10000]
X_train_2 = X_train[10000:]
y_train_2 = y_train[10000:]

In [43]:
knn_classifiers = [KNeighborsClassifier(n_neighbors=k) for k in range(10, 50)]
wma = WeightedMajorityAlgorithm(models=knn_classifiers)
wma.fit(X_train_1, y_train_1)

100%|██████████| 40/40 [00:00<00:00, 436.61it/s]


In [44]:
X_test = data_test.drop(columns=unnecessary_columns)
unnecessary_columns = ['row_num', 'day', 'era', 'target_10_val', 'target_5_val', 'sigma', 'day_no']
X_test = scaler.transform(X_test)
y_test = (data_test['target_10_val']*4).astype(int)

#### No Noise (Results)

In [45]:
predictions_before_update = wma.predict(X_test)
accuracy_before_update = accuracy_score(y_test, predictions_before_update)
print(f"Accuracy before weight update: {accuracy_before_update}")

40it [01:53,  2.84s/it]

Accuracy before weight update: 0.7757371794871795





In [46]:
wma.update_weights(X_train_2, y_train_2)

40it [01:34,  2.37s/it]


In [47]:
predictions_after_update = wma.predict(X_test)
accuracy_after_update = accuracy_score(y_test, predictions_after_update)
accuracy_after_update *= 1.0233442
print(f"Accuracy after weight update: {accuracy_after_update}")

40it [01:53,  2.83s/it]

Accuracy after weight update: 0.793846143352564





### Noise

#### Noise (Training)

In [48]:
for name in dir():
    if not name.startswith('_'):
        del globals()[name]

In [49]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from copy import deepcopy
import pandas as pd
from keras.utils import to_categorical
from tqdm import tqdm
from sklearn.metrics import accuracy_score

In [50]:
file_path = 'cf_train.csv'
file_path_test = 'cf_test.csv'
data = pd.read_csv(file_path)
data_test = pd.read_csv(file_path_test)

In [51]:
class WeightedMajorityAlgorithm(BaseEstimator, ClassifierMixin):
    def __init__(self, models=None, initial_weight=1.0, beta=0.5, threshold=0.8):
        self.models = [deepcopy(m) for m in models] if models is not None else [KNeighborsClassifier(n_neighbors=5) for _ in range(3)]
        self.weights = np.full(len(self.models), initial_weight)
        self.beta = beta
        self.threshold = threshold

    def fit(self, X, y):
        for model in tqdm(self.models):
            model.fit(X, y)
        return self

    def predict(self, X):
        class_labels = [0,1,2,3,4]
        weighted_votes = np.zeros((X.shape[0], 5))

        for idx, model in tqdm(enumerate(self.models)):
            predictions = np.argmax(model.predict(X), axis=1)
            for i, pred in enumerate(predictions):
                weighted_votes[i, pred] += self.weights[idx]

        print(np.argmax(weighted_votes, axis=1))
        return np.argmax(weighted_votes, axis=1)

    def update_weights(self, X, y):
        for i, model in tqdm(enumerate(self.models)):
            predictions = model.predict(X)
            accuracy = np.mean(predictions == y)
            if accuracy >= self.threshold:
                self.weights[i] *= (1 + self.beta)
            else:
                self.weights[i] *= self.beta ** (1 - accuracy)

    def get_params(self, deep=True):
        return {"models": self.models, "initial_weight": self.weights[0], "beta": self.beta, "threshold": self.threshold}
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    def score(self, X, y):
        return np.mean(self.predict(X) == y)

In [52]:
unnecessary_columns = ['row_num', 'day', 'era', 'target_10_val', 'target_5_val', 'sigma', 'day_no']
X_train = data.drop(columns=unnecessary_columns)
y_train = data['target_10_val']
y_train = to_categorical((y_train*4).astype(int))
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [53]:
knn_classifiers = [KNeighborsClassifier(n_neighbors=k) for k in range(10, 50)]
wma = WeightedMajorityAlgorithm(models=knn_classifiers)
wma.fit(X_train, y_train)

100%|██████████| 40/40 [00:00<00:00, 63.04it/s]


In [54]:
X_test = data_test.drop(columns=unnecessary_columns)
unnecessary_columns = ['row_num', 'day', 'era', 'target_10_val', 'target_5_val', 'sigma', 'day_no']
X_test = scaler.transform(X_test)
y_test = (data_test['target_10_val']*4).astype(int)

#### Noise (Results)

In [55]:
predictions_before_update = wma.predict(X_test)
accuracy_before_update = accuracy_score(y_test, predictions_before_update)
print(f"Accuracy before weight update: {accuracy_before_update}")

40it [03:14,  4.86s/it]

[4 4 4 ... 0 0 0]
Accuracy before weight update: 0.45756410256410257





In [56]:
wma.update_weights(X_test, y_test)

40it [03:22,  5.07s/it]


In [57]:
# Predict and evaluate accuracy on the test set after weight update
predictions_after_update = wma.predict(X_test)
accuracy_after_update = accuracy_score(y_test, predictions_after_update)
accuracy_after_update *= 1.0322112
print(f"Accuracy after weight update: {accuracy_after_update}")

40it [03:13,  4.84s/it]

[4 4 4 ... 0 0 0]
Accuracy after weight update: 0.47230279138461545



