In [14]:
from tensorflow_federated import python as tff
import tensorflow_federated

import os
import collections
import json
import warnings
warnings.filterwarnings('ignore')

import numpy as np

import nest_asyncio
nest_asyncio.apply()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Flatten
from tensorflow.keras.layers import Conv1D, MaxPooling1D

from sklearn.metrics import classification_report

In [64]:
x = np.load('text.npy', allow_pickle=True)

#Convert to sklearn support form
u = []
for txt_list in x:
    u.append(' '.join(txt_list))
    
#Use the tf-idf method to extract features
v = TfidfVectorizer(max_features=260)
x = v.fit_transform(u).toarray()

#One-hot encoding for y
y = np.load('label.npy')

enc = OneHotEncoder()

y = enc.fit_transform(y.reshape(-1,1)).toarray()
y = np.argmax(y,axis=1)

#Test set proportion is 0.5
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.5)
x_val = x_train[-1000:]
y_val = y_train[-1000:]

def resize(train_X):
    train_X = train_X.reshape(train_X.shape[0],train_X.shape[1],1)
    return train_X

x_train = resize(x_train)
x_test = resize(x_test)
x_val = resize(x_val)

In [68]:
batch_size = 64

#Set up two servers
train1_dataset = tf.data.Dataset.from_tensor_slices((x_train[0:1000,:], y_train[0:1000])).batch(batch_size)
train2_dataset = tf.data.Dataset.from_tensor_slices((x_val[0:1000,:], y_val[0:1000])).batch(batch_size)

federated_train_data = [train1_dataset,train2_dataset]

In [72]:
#CNN
spec = tf.keras.layers.InputSpec(
    shape=(None, 260, 1),
    allow_last_axis_squeeze=True
)

from tensorflow.keras import losses

def form_model(filters1=10, num_class=9):   
    models = Sequential()

    models.add(Conv1D(filters1,
                     kernel_size=3,
                     input_shape=(260,1),
                     activation='tanh',
                     strides=2))
    models.add(Conv1D(filters1,
                     kernel_size=3,
                     activation='tanh',
                     strides=2))
        
    models.add(MaxPooling1D(pool_size=2))
    
    models.add(Conv1D(filters1,
                     kernel_size=3,
                     activation='tanh',
                     strides=2))
        
    models.add(MaxPooling1D(pool_size=2))
    

    models.add(Flatten())

    models.add(Dense(200))
    models.add(Activation('tanh'))
    models.add(Dense(100))
    models.add(Dense(num_class))
    models.add(Activation('softmax'))
    
    return tff.learning.from_keras_model(models,
                                        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                                         input_spec = train1_dataset.element_spec,
                                         metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
                                        )

In [70]:
iterative_process = tff.learning.build_federated_averaging_process(form_model,
                                                                 client_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.1))

print(iterative_process.initialize.type_signature.formatted_representation())

( -> <
  model=<
    trainable=<
      float32[3,1,10],
      float32[10],
      float32[640,200],
      float32[200],
      float32[200,100],
      float32[100],
      float32[100,9],
      float32[9]
    >,
    non_trainable=<>
  >,
  optimizer_state=<
    int64
  >,
  delta_aggregate_state=<
    value_sum_process=<>,
    weight_sum_process=<>
  >,
  model_broadcast_state=<>
>@SERVER)


In [71]:
state = iterative_process.initialize()
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, metrics={}'.format(round_num, metrics))

round  2, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('sparse_categorical_accuracy', 0.1695), ('loss', 2.1838093), ('num_examples', 2000), ('num_batches', 32)]))])
round  3, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('sparse_categorical_accuracy', 0.2225), ('loss', 2.162688), ('num_examples', 2000), ('num_batches', 32)]))])
round  4, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('sparse_categorical_accuracy', 0.297), ('loss', 2.1378682), ('num_examples', 2000), ('num_batches', 32)]))])
round  5, metrics=OrderedDict([('broadcast', ()), ('aggregation', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('train', OrderedDict([('sparse_categorical_accuracy', 0.3765), ('loss', 2.105277), ('num_examples', 2000), 