In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import warnings
from sklearn.metrics import ConfusionMatrixDisplay
warnings.simplefilter(action='ignore', category=FutureWarning)

PATH_FLOD = os.path.join(os.getcwd(), '..')

sys.path.append(os.path.abspath(PATH_FLOD))

In [None]:
MAX_ROUNDS = 1
METHOD = 'gamma'

In [None]:
from flod.classifiers.federatedbsvclassifier import FederatedBSVClassifier

In [None]:
from experiments.experiments import get_datasets, get_dataset_from_path

In [None]:
X, Y = get_dataset_from_path(get_datasets()['pen-global'])

In [None]:
from sklearn.model_selection import train_test_split

# Keep only normal data for training
X_train = []
y_train = []
for x, y in zip(X, Y):
    if y == 1:
        X_train.append(x)
        y_train.append(y)


X_train, _, y_train, _ = train_test_split(X_train, y_train, test_size=0.985)
len(X_train)

# IID

In [None]:
client_assigment = np.random.choice([0,1], size=len(X_train))

In [None]:
clf = FederatedBSVClassifier(method=METHOD, normal_class_label=1, outlier_class_label=-1, max_rounds=MAX_ROUNDS)

In [None]:
cos, gamma, sum_betas, opt_norms, bsvclf = clf._compute_gamma(X_train, y_train, client_assigment)
print(f'Cos: {cos} \nGamma: {gamma} \nSum betas: {sum_betas} \nOpt norms: {opt_norms}')

ConfusionMatrixDisplay.from_predictions(y_train, bsvclf.predict(X_train), cmap=plt.cm.Blues)

In [None]:
import matplotlib.pyplot as plt

client_betas = [[], []]

for i, beta in enumerate(bsvclf.betas_):
    client_betas[client_assigment[i]].append((i, beta))

plt.hist([b for _, b in client_betas[0]], color='blue', alpha=0.5, label='Client 0')
plt.hist([b for _, b in client_betas[1]], color='orange', alpha=0.5, label='Client 1')
plt.xlabel('Beta Values')
plt.ylabel('Frequency')
plt.title('Histogram of Beta Values')
plt.legend()
plt.show()


In [None]:
def callback_beta_dist(fbsv):
    plt.hist(fbsv.betas0, color='blue', alpha=0.5, label='Client 0')
    plt.hist(fbsv.betas1, color='orange', alpha=0.5, label='Client 1', stacked=True)
    plt.xlabel('Beta Values')
    plt.ylabel('Frequency')
    plt.title('Histogram of Beta Values')
    plt.legend()
    plt.show()

def callback_delta_overtime(fbsv):
    if len(fbsv.debug) < MAX_ROUNDS:
        return
    debug = pd.DataFrame(fbsv.debug)
    debug['delta'] = abs(debug['W0'] - debug['W1'])
    debug.plot(x='sum_beta0', y=['W0', 'W1', 'delta'], figsize=(15, 5), title='W0, W1 and delta', marker='o', style=['.','.','--'])
    plt.show()

In [None]:
clf.fit(X_train, y_train, client_assignment=client_assigment, round_callback=callback_beta_dist)

In [None]:
ConfusionMatrixDisplay.from_predictions(y_train, clf.predict(X_train), cmap=plt.cm.Blues)
plt.show()

In [None]:
debug = pd.DataFrame(clf.debug)

In [None]:
debug['delta_W'] = abs(debug['W0'] - debug['W1'])
debug['delta_norm'] = abs(debug['f_norm0'] - debug['f_norm1'])
display(debug)

# Toy Example

In [None]:
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler

X, client_assigment = make_blobs(n_samples=12, centers=2, n_features=2, random_state=42)
y = [1 for _ in range(len(X))]

X = MinMaxScaler().fit_transform(X)

In [None]:
# Imbalance the clients
new_X = []
new_y = []
new_ass = []

for i, a in enumerate(client_assigment):
    if a == 0:
        keep = np.random.choice([0,1], size=1, p=[.5, .5])
        if keep[0] == 1:
            continue
    new_X.append(X[i])
    new_y.append(y[i])
    new_ass.append(client_assigment[i])

X = np.array(new_X)
y = np.array(new_y)
client_assigment = new_ass

In [None]:
C = 1
q = 1

In [None]:
plt.scatter(X[:,0], X[:,1], c=client_assigment)
plt.show()

In [None]:
clf = FederatedBSVClassifier(method=METHOD, normal_class_label=1, outlier_class_label=-1, max_rounds=MAX_ROUNDS, C = C, q = q)
cos, gamma, sum_betas, opt_norms, bsvclf = clf._compute_gamma(X, y, client_assigment)
print(f'Cos: {cos} \nGamma: {gamma} \nSum betas: {sum_betas} \nOpt norms: {opt_norms}')

ConfusionMatrixDisplay.from_predictions(y, bsvclf.predict(X), cmap=plt.cm.Blues)

In [None]:
gx = np.linspace(min(X[:,0])-1, max(X[:,0])+1)
gy = np.linspace(min(X[:,1])-1, max(X[:,1])+1)
gX, gY = np.meshgrid(gx, gy)
zs = np.array([bsvclf._compute_r((x,y)) for x, y in zip(np.ravel(gX), np.ravel(gY))])
gZ = zs.reshape(gX.shape)
levels = [bsvclf._best_radius()]
levels.sort()
membership_contour = plt.contour(gX, gY, gZ, levels=levels)
plt.clabel(membership_contour, inline=1)

plt.scatter(X[:, 0], X[:, 1], c=client_assigment, alpha=.4)

plt.show()

In [None]:
import matplotlib.pyplot as plt

client_betas = [[], []]

for i, beta in enumerate(bsvclf.betas_):
    client_betas[client_assigment[i]].append((i, beta))

plt.hist([b for _, b in client_betas[0]], color='blue', alpha=0.5, label='Client 0')
plt.hist([b for _, b in client_betas[1]], color='orange', alpha=0.5, label='Client 1')
plt.xlabel('Beta Values')
plt.ylabel('Frequency')
plt.title('Histogram of Beta Values')
plt.legend()
plt.show()

In [None]:
clf.fit(X, y, client_assignment=client_assigment, round_callback=callback_beta_dist)

In [None]:
debug = pd.DataFrame(clf.debug)

debug['delta_W'] = abs(debug['W0'] - debug['W1'])
debug['delta_norm'] = abs(debug['f_norm0'] - debug['f_norm1'])
display(debug)

In [None]:
r0, r0std = np.average(clf.radiuses0), np.std(clf.radiuses0)
r0, r0std

In [None]:
r1, r1std = np.average(clf.radiuses1), np.std(clf.radiuses1)
r1, r1std

In [None]:
abs(np.average(clf.radiuses0) - np.average(clf.radiuses1))

In [None]:
gx = np.linspace(min(X[:,0])-1, max(X[:,0])+1)
gy = np.linspace(min(X[:,1])-1, max(X[:,1])+1)
gX, gY = np.meshgrid(gx, gy)
zs0 = np.array([clf.fc0(np.array([x, y])) for x, y in zip(np.ravel(gX), np.ravel(gY))])
zs1 = np.array([clf.fc1(np.array([x, y])) for x, y in zip(np.ravel(gX), np.ravel(gY))])
gZ0 = zs0.reshape(gX.shape)
gZ1 = zs1.reshape(gX.shape)
levels.sort()
membership_contour = plt.contour(gX, gY, gZ0, levels=[r0])
membership_contour = plt.contour(gX, gY, gZ1, levels=[r1])
# membership_contour = plt.contour(gX, gY, gZ)
plt.clabel(membership_contour, inline=1)

plt.scatter(X[:, 0], X[:, 1], c=clf.predict(X), alpha=.4)

plt.show()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(y, clf.predict(X), cmap=plt.cm.Blues)
plt.show()

## What happens if we train only on one of the clients?

In [None]:
from flod.classifiers.bsvclassifier import BSVClassifier


monoclient = BSVClassifier(normal_class_label=1, outlier_class_label=-1, c=C, q=q)

client_x = []
client_y = []

for i, x in enumerate(X):
    if client_assigment[i] == 0:
        client_x.append(x)
        client_y.append(y[i])


monoclient.fit(client_x, client_y)

In [None]:
gx = np.linspace(min(X[:,0])-1, max(X[:,0])+1)
gy = np.linspace(min(X[:,1])-1, max(X[:,1])+1)
gX, gY = np.meshgrid(gx, gy)
zs = np.array([monoclient._compute_r((x,y)) for x, y in zip(np.ravel(gX), np.ravel(gY))])
gZ = zs.reshape(gX.shape)
membership_contour = plt.contour(gX, gY, gZ, levels=((monoclient._best_radius(), )))
# membership_contour = plt.contour(gX, gY, gZ)
plt.clabel(membership_contour, inline=1)

plt.scatter(X[:, 0], X[:, 1], c=client_assigment, alpha=.4)

plt.show()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(y, monoclient.predict(X), cmap=plt.cm.Blues)
plt.show()