In [25]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [26]:
!mkdir dataset

In [None]:
!tar -xzf "/content/drive/MyDrive/Wireless/diabetes.tar.gz" --directory "/content/dataset" --checkpoint=.100

In [None]:
!pip install tensorflow==1.15
!pip install keras==2.2.4 

In [39]:
import numpy as np
import random
import copy

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
import tensorflow as tf
import matplotlib.pyplot as plt

In [40]:
class SVM:

  def __init__(self, X_train, y_train, X_test, y_test, val=True, val_type='k_fold', val_distribution='balanced', k=5, learning_rate=0.001, lambda_param=0.01, n_iters=1000):

    self.lr = learning_rate
    self.lambda_param = lambda_param
    self.n_iters = n_iters

    self.X_train = X_train
    self.y_train = y_train

    self.X_test = X_test
    self.y_test = y_test

    self.val_distribution = val_distribution
    self.val = val
    self.val_type=val_type
    self.val_distribution=val_distribution
    self.k=k

    self.w = np.array([])
    self.b = None


  def Gradient_update(self, X_train, y_train, X_val=None, y_val=None):

    n_samples, n_features = X_train.shape  
    y_ = np.where(y_train <= 0, -1, 1)
          
    if self.w.size == 0 and self.b is None :
      self.w = np.zeros(n_features)
      self.b = 0

    w_best = np.zeros(n_features)
    b_best = 0

    acc_list = [] 
    for i in range(0,self.n_iters):
      for idx, x_i in enumerate(X_train):
        condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
        if condition:
          self.w -= self.lr * (2 * self.lambda_param * self.w)
        else:
          self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
          self.b -= self.lr * y_[idx]

      if i%10 == 0 and self.val:
        approx_w = np.dot(X_val, self.w) - self.b
        approx_w = np.sign(approx_w)
        res_w = np.where(approx_w<0, 0, approx_w)

        approx_w_best = np.dot(X_val, w_best) - b_best
        approx_w_best = np.sign(approx_w_best)
        res_w_best = np.where(approx_w_best<0, 0, approx_w_best)
          
        if (accuracy_score(y_val, res_w_best) < accuracy_score(y_val, res_w)):
          w_best = copy.deepcopy(self.w)
          b_best = copy.deepcopy(self.b)
        else:  
          self.w = copy.deepcopy(w_best)
          self.b = copy.deepcopy(b_best)  
          break

  def Cross_validation(self, val_split):

    if (self.val_distribution == 'balanced'):
      X_train0, X_val0, y_train0, y_val0 = train_test_split(self.X_train[0], self.y_train[0], test_size=val_split)
      X_train1, X_val1, y_train1, y_val1 = train_test_split(self.X_train[1], self.y_train[1], test_size=val_split)

      X_train = np.concatenate((X_train0,X_train1),axis=0)
      y_train = np.concatenate((y_train0,y_train1),axis=0)

      X_val = np.concatenate((X_val0,X_val1),axis=0)
      y_val = np.concatenate((y_val0,y_val1),axis=0)

    elif (self.val_distribution == 'unbalanced'):
      X_train = np.concatenate((self.X_train[0],self.X_train[1]),axis=0)
      y_train = np.concatenate((self.y_train[0],self.y_train[1]),axis=0)

      X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_split)

    X_train, y_train = self.random_shuffle(X_train, y_train)
    self.Gradient_update(X_train, y_train, X_val, y_val)

  def fit(self):
    if self.val_type == 'cross_val' and self.val:
      self.Cross_validation(0.2)
    
    elif not self.val:
      X_train = np.concatenate((self.X_train[0],self.X_train[1]),axis=0)
      y_train = np.concatenate((self.y_train[0],self.y_train[1]),axis=0)
      X_train, y_train = self.random_shuffle(X_train, y_train)
      self.Gradient_update(X_train, y_train)

  def random_shuffle(self, X_train, y_train):
    self.x_tr, self.x_te, self.y_tr, self.y_te = train_test_split(X_train,y_train,test_size=0.5)
    return np.concatenate((self.x_tr, self.x_te),axis=0), np.concatenate((self.y_tr, self.y_te),axis=0)

  def predict(self):
     approx = np.dot(self.X_test, self.w) - self.b
     approx = np.sign(approx)
     return np.where(approx<0, 0, approx)

  def accuracy(self):
    return accuracy_score(self.y_test, self.predict())*100


In [41]:
class Federated_SVM:

  def __init__(self, n_clients=4, val=True, val_type='k_fold', val_distribution='balanced', k=5, learning_rate=0.001, lambda_param=0.01, n_iters=100):
    self.n_clients = n_clients
    self.learning_rate = learning_rate
    self.lambda_param = lambda_param
    self.n_iters = n_iters
    self.val = val
    self.val_type = val_type
    self.val_distribution = val_distribution
    self.client_distribution = []
    self.k = k
    self.X_test = None
    self.y_test = None
    self.noise = None
    
  def create_clients(self, X_train, y_train, X_test, y_test):
    self.clients=[]
    for i in range(self.n_clients):
      self.client_distribution.append(X_train[i][0].shape[0] + X_train[i][1].shape[0])
      self.clients.append(SVM(X_train[i],y_train[i], X_test, y_test, self.val, self.val_type, self.val_distribution, self.k, self.learning_rate, self.lambda_param, self.n_iters))
    self.X_test = copy.deepcopy(X_test)
    self.y_test = copy.deepcopy(y_test)
  
  def find_outliers(self, weights, threshold=100, increment_size = False):
    outliers = []
    for i in range(self.n_clients):
        param = weights[i]
        if isinstance(param, np.ndarray):
            z_score = np.abs((param - np.mean(param)) / np.std(param))
            if np.all(z_score > threshold) or (increment_size and i+1 == self.n_clients):
                outliers.append(i+1)
        elif isinstance(param, float) or isinstance(param, int):
            if np.abs(param - np.mean(weights)) > threshold * np.std(weights):
                outliers.append(i+1)
    return outliers

  def fed_averaging(self, parameter_list):
    w = np.zeros(parameter_list[0].shape[0])
    b = 0
    for i in range(0,2*self.n_clients,2):
        w = np.add(w,parameter_list[i]*self.client_distribution[i//2]/sum(self.client_distribution))
        b = b + parameter_list[i+1]
    return (w, b/self.n_clients)

  def fit(self, g_iters, aggregator, outlier):
    w_best = np.zeros(self.X_test.shape[1])
    b_best = 0
    for i in range(0,g_iters):
      print('global round',i+1)
      print()
      for j in range(0,self.n_clients):
        print('client',j+1)
        perform_client_operation(self.clients[j].X_train[0])
        if i==0:
          self.clients[j].fit()
        else:
          self.clients[j].w = copy.deepcopy(w_agg)
          self.clients[j].b = copy.deepcopy(b_agg)
          self.clients[j].fit()
        print('accuracy', self.clients[j].accuracy())
        print()
      
      increment_size = False
      if (i == g_iters - 1):
        increment_size = True
        print('client', j+2)
        self.n_clients = self.n_clients + 1
        client_dis = copy.deepcopy(self.client_distribution[0])
        client = copy.deepcopy(self.clients[0])
        client.X_train = replace_with_zeros(client.X_train)
        client.y_train = replace_with_zeros(client.y_train)
        self.client_distribution.append(client_dis)
        self.clients.append(client)
        perform_client_operation(self.clients[j+1].X_train[0])
        self.clients[j+1].w = copy.deepcopy(w_agg)
        self.clients[j+1].b = copy.deepcopy(b_agg)
        self.clients[j+1].fit()
        print('accuracy', self.clients[j+1].accuracy())
        print()

      parameter_list = []
      weights = []
      for k in range(0,self.n_clients):
        parameter_list.append(self.clients[k].w)
        weights.append(self.clients[k].w)
        parameter_list.append(self.clients[k].b)

      outlier_nodes = outlier(weights, increment_size=increment_size)
      print("Anomaly nodes:", outlier_nodes)
      w_agg, b_agg = aggregator(parameter_list)

      if increment_size:
        print("Accuracy before removing anomaly nodes:", self.accuracy(w_agg,b_agg))
        parameter_list.pop()
        self.n_clients = self.n_clients - 1
        w_agg, b_agg = aggregator(parameter_list)
        print("Accuracy after removing anomaly nodes:", self.accuracy(w_agg,b_agg))
        
        
      if self.accuracy(w_agg,b_agg)>self.accuracy(w_best,b_best) or i==0:
        w_best=copy.deepcopy(w_agg)
        b_best=copy.deepcopy(b_agg)
      print('global test acc',self.accuracy(w_best,b_best))
      print()

  def predict(self,w,b):
     approx = np.dot(self.X_test, w) - b
     approx = np.sign(approx)
     return np.where(approx<0, 0, 1)

  def accuracy(self,w,b):
    return accuracy_score(self.y_test, self.predict(w,b))*100


In [42]:
from sklearn.ensemble import IsolationForest

def replace_with_zeros(lst):
    if isinstance(lst, list):
        for i in range(len(lst)):
            lst[i] = replace_with_zeros(lst[i])
        return lst
    elif isinstance(lst, np.ndarray):
        return np.zeros_like(lst)
    else:
        return -1

def perform_client_operation(client_data, threshold = 25):
    clf = IsolationForest(random_state=0)
    clf.fit(client_data)
    predicitons = clf.predict(client_data)
    #ml = svm.SVC(kernel='linear')
    #ml.fit(client_data, predicitons)
    total_anomalies = np.count_nonzero(predicitons == -1)
    print ("anomalies found using Isolation Forest:", total_anomalies)
    print ("data left to transfer after dropping anomalies:", len(client_data) - total_anomalies)

In [43]:
def get_clients(class1, class2, n_clients = 3):

  clients_X = []
  clients_y = []

  clientsXtest = []
  clientsYtest = []

  clusters_1 = KMeans(n_clusters=n_clients, random_state=0).fit_predict(class1)
  clusters_2 = KMeans(n_clusters=n_clients, random_state=0).fit_predict(class2)

  for i in range(n_clients):

    X_train0, X_test0, y_train0, y_test0 = train_test_split(class1[clusters_1 == i],np.zeros((class1[clusters_1 == i].shape[0],)),test_size=0.2)
    X_train1, X_test1, y_train1, y_test1 = train_test_split(class2[clusters_2 == i],np.ones((class2[clusters_2 == i].shape[0],)),test_size=0.2)

    clients_X.append([X_train0, X_train1])
    clients_y.append([y_train0, y_train1])

    clientsXtest.extend([X_test0,X_test1])
    clientsYtest.extend([y_test0,y_test1])

  X_test = np.concatenate(clientsXtest,axis=0)
  y_test = np.concatenate(clientsYtest,axis=0)

  return clients_X,clients_y,X_test,y_test

In [44]:
def get_total_from_clients(clients_X,clients_y):
  x_train0 = [i[0] for i in clients_X]
  x_train0 = np.concatenate(x_train0, axis=0)
  x_train1 = [i[1] for i in clients_X]
  x_train1 = np.concatenate(x_train1, axis=0)
  y_train0 = [i[0] for i in clients_y]
  y_train0 = np.concatenate(y_train0, axis=0)
  y_train1 = [i[1] for i in clients_y]
  y_train1 = np.concatenate(y_train1, axis=0)

  return ([x_train0,x_train1],[y_train0,y_train1])    

In [45]:
def load_and_return_data(n1, n2):
  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.load_dataset('/content/dataset')
  x_total = np.concatenate((x_train, x_test), axis=0)
  y_total = np.concatenate((y_train, y_test), axis=0)

  # Normalizing and reshaping the data
  x_total = x_total/255 
  x_total = x_total.reshape(x_total.shape[0], 784)

  x_n1 = x_total[y_total == n1]
  y_n1 = y_total[y_total == n1]

  x_n2 = x_total[y_total == n2]
  y_n2 = y_total[y_total == n2]

  return [(x_n1, y_n1),(x_n2, y_n2)]

data = load_and_return_data(0, 9)

In [46]:
clients_X,clients_y,X_test,y_test = get_clients(data[0][0], data[1][0], n_clients = 10)



In [47]:
xtrain_gl, ytrain_gl = get_total_from_clients(clients_X,clients_y)

In [48]:
f_svm = Federated_SVM(n_clients = 10, val=False, n_iters=150)
f_svm.create_clients(clients_X,clients_y,X_test,y_test)

In [49]:
clf = SVM(xtrain_gl, ytrain_gl, X_test, y_test, val=False, n_iters=1000)
clf.fit()
print(clf.accuracy())

99.42466738583244


In [50]:
f_svm.fit(5, f_svm.fed_averaging, f_svm.find_outliers)

global round 1

client 1
anomalies found using Isolation Forest: 14
data left to transfer after dropping anomalies: 659
accuracy 93.88709097446961

client 2
anomalies found using Isolation Forest: 10
data left to transfer after dropping anomalies: 697
accuracy 94.4264653002517

client 3
anomalies found using Isolation Forest: 12
data left to transfer after dropping anomalies: 539
accuracy 71.19741100323624

client 4
anomalies found using Isolation Forest: 9
data left to transfer after dropping anomalies: 399
accuracy 83.06364617044228

client 5
anomalies found using Isolation Forest: 12
data left to transfer after dropping anomalies: 360
accuracy 86.01222581805106

client 6
anomalies found using Isolation Forest: 16
data left to transfer after dropping anomalies: 612
accuracy 93.06005034160374

client 7
anomalies found using Isolation Forest: 13
data left to transfer after dropping anomalies: 535
accuracy 95.68500539374327

client 8
anomalies found using Isolation Forest: 11
data left 

## Energy Efficiency

In [None]:
#Data tranfering from client to server 
import pandas as pd
import time
data = pd.read_csv("diabetes_wireless.csv") # Load diabetes dataset (in CSV format)
output_file = []
start_time = time.time()
for i in range(10):
    output_file.append(data)
end_time = time.time()
result = pd.concat(output_file)
processing_time = end_time - start_time
data_size = len(result) / 1024 # Calculate data size of dataset (in KB)
data_transfer_rate = 1 # Define data transfer rate of network (in Mbps)
transfer_time = (data_size * 8) / data_transfer_rate * 1000 #Calculate time it takes to transfer data (in ms)
total_energy_consumption = 10000 # Define total energy consumption of network during data transfer operation (in Wh)
total_energy_consumption_kWh = total_energy_consumption / 1000 # Convert total energy consumption from Wh to kWh
energy_efficiency = total_energy_consumption_kWh / (data_size / 1024) # Calculate energy efficiency of network (in kWh per MB of data transferred)
print("Energy efficiency of network: {:.4f} kWh per MB of data transferred".format(energy_efficiency))

In [None]:
import time
import psutil
from sklearn.ensemble import IsolationForest
from sklearn.datasets import make_classification
Number_of_Sensors = 10
Number_of_Rounds = 5
# Generate synthetic data
X = pd.read_csv("diabetes_wireless.csv")
X = X.dropna()

# Instantiate Isolation Forest model
model = IsolationForest(n_estimators=100, contamination='auto', random_state=20)

# Measure energy consumption before anomaly detection
energy_before = psutil.cpu_percent(interval=1)

# Start timer
start_time = time.time()

# Fit the model and detect anomalies
model.fit(X)
y_pred = model.predict(X)

# Measure energy consumption after anomaly detection
energy_after = psutil.cpu_percent(interval=1)

# Calculate total energy consumption in Watt-hours (Wh)
energy_consumption = (energy_after - energy_before) * (time.time() - start_time) / 3600

# Calculate energy efficiency in kWh per data point
energy_efficiency_isolation_forest = (energy_consumption / len(X) * 1000) * Number_of_Sensors * Number_of_Rounds

# Print energy efficiency
print("Energy efficiency of Isolation Forest anomaly detection: {:.6f} kWh per data point".format(energy_efficiency_isolation_forest))


In [None]:
#Data tranfering from client to server and server to client
# Define the parameters
data_size = 307080   # in bytes
transmission_power = 10/1000000  # in milliwatts
distance = 1000  # in meters
num_sensors = 10
transmission_time = data_size/1000*1024 * 8 / (transmission_power * distance ** 2)
energy_consumed = num_sensors * transmission_time * transmission_power / 1000   # in Joules
energy_consumed_kwh = energy_consumed / 3600000 # Convert Joules to kWh
energy_efficiency = data_size/1000 * num_sensors / energy_consumed_kwh # Calculate the energy efficiency
print("Energy efficiency for transmitting the data from client to the server:", energy_efficiency, "bytes/kWh")

data_size = 307080   # in bytes
transmission_power = 20/1000000  # in milliwatts
distance = 1000  # in meters
protocol_efficiency = 0.9
num_sensors = 10
transmission_time = data_size/1000*1024 * 8 / (transmission_power * distance ** 2)
energy_consumed = transmission_time * transmission_power / 1000  # in Joules
energy_consumed_kwh = energy_consumed / 3600000 # Convert Joules to kWh
energy_efficiency = data_size/1000 * num_sensors / energy_consumed_kwh # Calculate the energy efficiency
print("Energy efficiency for transmitting the data from server to the client:", energy_efficiency, "bytes/kWh")



In [None]:
# Define the parameters
data_size = 8198416  # in bytes
num_clients = 10
num_rounds = 5
transmission_power = 20  # in milliwatts
distance = 100  # in meters
protocol_efficiency = 0.8

# Calculate the energy consumed by one client per round
transmission_time = data_size * 8 / (transmission_power * distance ** 2)
energy_per_client_per_round = transmission_time * transmission_power / 1000 / protocol_efficiency  # in Joules

# Calculate the total energy consumed by all clients
total_energy_consumed = energy_per_client_per_round * num_clients * num_rounds

# Convert Joules to kWh
total_energy_consumed_kwh = total_energy_consumed / 3600000

# Calculate the energy efficiency
data_transmitted = data_size * num_clients * num_rounds
energy_efficiency = data_transmitted / total_energy_consumed_kwh

# Print the result
print("Energy efficiency:", energy_efficiency, "bytes/kWh")
