<a href="https://colab.research.google.com/github/JeraldYik/rcp-colab-notebooks/blob/main/Objective_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Referred to article: https://medium.com/@yvettewu.dw/tutorial-kaggle-api-google-colaboratory-1a054a382de0
!pip install kaggle --upgrade

!rm -rf .kaggle
!mkdir .kaggle
import json
username = "jeraldyik"
key = "889ee4e40f578ab82c7a7f9bbf1018dc"
token = {"username":username,"key":key}
with open("/content/.kaggle/kaggle.json", "w") as file:
    json.dump(token, file)

!chmod 600 /content/.kaggle/kaggle.json

!mkdir ~/.kaggle
!cp /content/.kaggle/kaggle.json ~/.kaggle/kaggle.json
!kaggle config set -n path -v{/content}

!kaggle competitions download -c dogs-vs-cats -p /content

!unzip test1.zip
!unzip train.zip

import os, cv2, itertools
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

!pip install np_utils

from keras.utils.np_utils import to_categorical

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout

from sklearn.utils import shuffle

!pip install sklearn
import sklearn
from sklearn.model_selection import train_test_split

!pip install tensorflow-privacy
# will explore other DP Optimisers like DP Adam in the near future
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
from tensorflow_privacy.privacy.dp_query import gaussian_query

Requirement already up-to-date: kaggle in /usr/local/lib/python3.6/dist-packages (1.5.9)
mkdir: cannot create directory ‘/root/.kaggle’: File exists
- path is now set to: {/content}
sampleSubmission.csv: Skipping, found more recently modified local copy (use --force to force download)
test1.zip: Skipping, found more recently modified local copy (use --force to force download)
train.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  test1.zip
replace test1/1.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Archive:  train.zip
replace train/cat.0.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [None]:
class Cloud:

  def __init__(self, num_clients):
    self.TRAIN_DIR = './train/'
    self.TEST_DIR = './test1/'

    self.ROWS = 64
    self.COLS = 64
    self.CHANNELS = 3
    self.EPOCHS = 50 
    self.BATCH_SIZE = 64
    self.LEARNING_RATE = 0.25
    self.L2_NORM_CLIP = 1.0e9
    self.STDDEV = 0.0
    self.num_clients = num_clients
    self.classes = {0: 'Cats', 1: 'Dogs'}
    self.model = None

  def read_image(self, file_path):
    #print(file_path)
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
    #print(img)
    return cv2.resize(img, (self.ROWS, self.COLS), interpolation=cv2.INTER_CUBIC)

  def prep_data(self,images):
    m = len(images)
    n_x = self.ROWS*self.COLS*self.CHANNELS
    
    x = np.ndarray((m,self.ROWS,self.COLS,self.CHANNELS), dtype=np.uint8)
    y = np.zeros((m,1))
    print("x.shape is {}".format(x.shape))
    
    for i,image_file in enumerate(images) :
      image = self.read_image(image_file)
      x[i,:] = np.squeeze(image.reshape((self.ROWS, self.COLS, self.CHANNELS)))
      if 'dog' in image_file.lower() :
        y[i,0] = 1
      elif 'cat' in image_file.lower() :
        y[i,0] = 0
      else : # for test data
        y[i,0] = image_file.split('/')[-1].split('.')[0]
        
      if i%5000 == 0 :
        print("Proceed {} of {}".format(i, m))
      
    return x,y


  ''' only dealing with train data for now '''
  def generate_data(self):
    train_images = [self.TRAIN_DIR+i for i in os.listdir(self.TRAIN_DIR)]

    x_train, y_train = self.prep_data(train_images)

    x_train, y_train = shuffle(x_train, y_train)

    ''' splitting train data into array with length=num_clients '''
    x_train_arr = [None for _ in range(self.num_clients)]
    y_train_arr = [None for _ in range(self.num_clients)]

    for i in range(self.num_clients):
      x_train_arr[i] = x_train[(len(x_train)//self.num_clients)*i:(len(x_train)//self.num_clients)*(i+1)]
      y_train_arr[i] = y_train[(len(x_train)//self.num_clients)*i:(len(x_train)//self.num_clients)*(i+1)]

    return x_train_arr, y_train_arr


  def generate_model(self):
    '''
    We define the model as the instance of Sequential() and then just define the layers 
    (Conv2D, MaxPooling2D, Dropout, Dense, Sigmoid). 
    Our model architecture has 4 Convolutional layers followed by 1 Fully Connected Layer followed by a Sigmoid output.

    Loss function used — categorical_crossentropy

    Optimizer used — Adam
    '''

    model = Sequential()

    model.add(Conv2D(32, (3,3), input_shape=(self.ROWS, self.COLS, self.CHANNELS), activation='relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))

    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Dropout(0.4))

    model.add(Conv2D(128, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Dropout(0.4))

    model.add(Conv2D(256, (3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Dropout(0.4))

    model.add(Conv2D(512, (1,1), activation='relu'))

    model.add(Flatten())
    model.add(Dropout(0.4))

    model.add(Dense(units=120, activation='relu'))
    model.add(Dense(units=2, activation='sigmoid'))

    ''' DP SGD optimiser from tensorflow_privacy library 
        TODO: research on what value of l2_norm_clip to use
        refer to: https://github.com/tensorflow/privacy/search?q=gaussian_query '''
    dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=self.L2_NORM_CLIP, stddev=self.STDDEV)
    dpGradientDescentOptimizer = DPGradientDescentOptimizer(dp_sum_query=dp_sum_query, learning_rate=self.LEARNING_RATE)

    # model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=dpGradientDescentOptimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    self.model = model

  def send_model(self):
    return self.model

  ''' receive new weights from aggregator and set to model '''
  def receive_and_save_weights(self, weights):
    print('-- Saving weights... --')
    self.model.set_weights(weights)

In [None]:
class Client:

  def __init__(self, idx):
    self.idx = idx
    self.model = None
    self.x_train = None
    self.y_train = None
    self.x_train_norm = None
    self.y_train_one_hot = None
    self.x_val_norm = None
    self.y_val_one_hot = None
  
  def download_model(self,model):
    self.model = model

  def retrieve_model(self):
    return self.model

  def download_data(self,data):
    self.x_train = data[0][self.idx]
    self.y_train = data[1][self.idx]

  def manipulate_data(self):
    ''' split the dataset into the train (80%) and validation set (20%) '''
    x_train, x_val, y_train, y_val = train_test_split(self.x_train, self.y_train, test_size=0.2, random_state=1)

    print("Train shape: {}".format(x_train.shape))
    print("Train label shape: {}".format(y_train.shape))
    print("Validation shape: {}".format(x_val.shape))
    print("Validation label shape: {}".format(y_val.shape))

    ''' convert to OHE '''
    self.y_train_one_hot = to_categorical(y_train)
    print("y_train_one_hot.shape: {}".format(self.y_train_one_hot.shape))

    num_classes = self.y_train_one_hot.shape[1]
    print("num_classes: {}".format(num_classes))

    self.y_val_one_hot = to_categorical(y_val)
    print("y_val_one_hot.shape: {}".format(self.y_val_one_hot.shape))

    ''' normalise our training and validation set '''
    self.x_train_norm = x_train / 255
    self.x_val_norm = x_val / 255

  ''' fit the model with training data '''
  def train(self, epochs, batch_size):
    self.model.fit(self.x_train_norm, self.y_train_one_hot, validation_data=(self.x_val_norm, self.y_val_one_hot), epochs=epochs, batch_size = batch_size)

  def get_weights_from_model(self):
    return self.model.get_weights()


In [None]:
class Aggregator:
  def __init__(self, num_clients):
    self.weights_from_clients = [None for _ in range(num_clients)]
    self.generation = 0
    self.num_clients = num_clients
  
  def get_weights_from_clients(self, client_idx, weights):
    print('-- Received weights from Client {} --'.format(client_idx))
    self.weights_from_clients[client_idx] = weights

  def aggregate_weights(self):
    print('--- Aggregating weights... ---')

    ''' Populate return list with empty numpy arrays of appropriate size '''
    new_weights = []
    for layer in self.weights_from_clients[0]:
      new_weights.append(np.zeros(shape=layer.shape))
    
    ''' Add value of weights from all clients at each layer '''
    for client_weights in self.weights_from_clients:
      for i, w in enumerate(client_weights):
        new_weights[i] += w

    ''' Find average value of weights at each layer '''
    for layer in new_weights:
      layer /= self.num_clients

    return new_weights

  def send_weights_to_cloud(self, cloud):
    aggregated_weights = self.aggregate_weights()
    cloud.receive_and_save_weights(aggregated_weights)
    self.generation += 1



In [None]:
print('--- Preparing Cloud ---')
NUM_CLIENTS = 2
cloud = Cloud(num_clients=NUM_CLIENTS)
data = cloud.generate_data()
cloud.generate_model()
clients = [None for _ in range(cloud.num_clients)]

--- Preparing Cloud ---
x.shape is (25000, 64, 64, 3)
Proceed 0 of 25000
Proceed 5000 of 25000
Proceed 10000 of 25000
Proceed 15000 of 25000
Proceed 20000 of 25000
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_17 (Conv2D)    

In [None]:
print('--- Preparing Aggregator ---')
aggregator = Aggregator(cloud.num_clients)

--- Preparing Aggregator ---


In [None]:
''' Populate clients array and save static data into clients '''
for i in range(len(clients)):
  clients[i] = Client(i)
  clients[i].download_data(data)
  clients[i].manipulate_data()


Train shape: (10000, 64, 64, 3)
Train label shape: (10000, 1)
Validation shape: (2500, 64, 64, 3)
Validation label shape: (2500, 1)
y_train_one_hot.shape: (10000, 2)
num_classes: 2
y_val_one_hot.shape: (2500, 2)
Train shape: (10000, 64, 64, 3)
Train label shape: (10000, 1)
Validation shape: (2500, 64, 64, 3)
Validation label shape: (2500, 1)
y_train_one_hot.shape: (10000, 2)
num_classes: 2
y_val_one_hot.shape: (2500, 2)


In [None]:
NUM_GENERATION = 3

for i in range(NUM_GENERATION):
  for client in clients:
    
    print('\n---- Generation {}. For Client {} ----'.format(i+1,client.idx))
    client.download_model(cloud.send_model())
    
    # client.train(epoch=cloud.EPOCHS, batch_size=cloud.BATCH_SIZE//cloud.num_clients)
    client.train(epochs=1, batch_size=cloud.BATCH_SIZE//cloud.num_clients)
    
    ''' pass trained weights to aggregator '''
    aggregator.get_weights_from_clients(client_idx=client.idx, weights=client.get_weights_from_model())

  ''' AFTER CLIENTS ARE DONE TRAINING IN THIS GEN '''
  ''' aggregator does his thing and sends the aggregated weights over to cloud '''
  aggregator.send_weights_to_cloud(cloud)


---- Generation 1. For Client 0 ----
-- Received weights from Client 0 --

---- Generation 1. For Client 1 ----
-- Received weights from Client 1 --
--- Aggregating weights... ---
-- Saving weights... --

---- Generation 2. For Client 0 ----
-- Received weights from Client 0 --

---- Generation 2. For Client 1 ----
-- Received weights from Client 1 --
--- Aggregating weights... ---
-- Saving weights... --

---- Generation 3. For Client 0 ----
-- Received weights from Client 0 --

---- Generation 3. For Client 1 ----
-- Received weights from Client 1 --
--- Aggregating weights... ---
-- Saving weights... --
