In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from user import User

from tensorflow import keras
from sklearn.model_selection import train_test_split
from timeit import default_timer as timer

In [None]:
def _draw_graphs(user, loss = True, accuracy = True):
    # this is from the book 74,75
    # history = model.fit(...)
    history = user.get_history()
    history_dict = history.history

    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    acc_values = history_dict['acc']
    val_acc_values = history_dict['val_acc']
    epochs = range(1, len(loss_values) + 1)
    plt.xlabel('Epochs')

    if loss:
        plt.plot(epochs, acc_values, 'b', label='Training acc')
        plt.plot(epochs, val_acc_values, 'r', label='Validation acc')
        plt.title('Training and validation accuracy')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.show()
        plt.clf()
    if accuracy:
        plt.plot(epochs, loss_values, 'b', label='Training loss')
        plt.plot(epochs, val_loss_values, 'r', label='Validation loss')
        plt.title('Training and validation loss')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        plt.clf()
def evaluate_user(user, verbose = True):
    "prints the loss and accuracy for the given User instance"
    test_data = user.get_test_data()
    test_class = user.get_test_class()
    model = user.get_model()
    evaluation = model.evaluate(test_data,
                                test_class,
                                verbose = verbose)
    return evaluation
def standard_deviation(lst):
    avg = sum(lst)/len(lst)
    temp = 0
    for i in lst:
        temp += (i-avg)**2
    std_dev = ((1/len(lst))*(temp))**0.5
    return std_dev
def smooth_curve(points, factor=0.9):
    # from the book
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points
def read_file(file):
    "return 2d df after imputing with 0s"

    # read data
    df = pd.read_csv(file)

    # replace the question marks with NaN and then change data type to float 32
    df.replace(["?"],np.nan, inplace = True)
    df = df.astype(np.float32)

    # imputation
    df.fillna(0,inplace=True) # fill nulls with 0
    return df
def split_dataframe(df, for_user = None, val_size = 0.25, test_size =  0.75, seed = 1):
    # split into train, validation and test data using sklearn and return dfs for each
    if for_user!=None:
        df = df[df["User"] == for_user]
    if df.shape[0] == 0:
        # if no data for the user, then return 9 empty dfs as per the api
        # print(f"Dataframe for user {user} is of shape {df.shape}, no data. Skipping...")
        df = pd.DataFrame()
        return (df for _ in range(9))

    df_train, df_test = train_test_split(df,
                                         test_size = test_size,
                                         random_state = seed)

    df_train, df_val  = train_test_split(df_train,
                                         test_size = val_size,
                                         random_state = seed)

    # store class and user information (in order)
    df_val_class, df_train_class, df_test_class = df_val["Class"], df_train["Class"], df_test["Class"]
    df_val_user,  df_train_user,  df_test_user  = df_val["User"],  df_train["User"],  df_test["User"]

    # drop the class and user identifier columns from data frame
    df_val   = df_val.  drop(df_train.columns[[0,1]], axis=1)
    df_train = df_train.drop(df_train.columns[[0,1]], axis=1)
    df_test  = df_test. drop(df_test. columns[[0,1]], axis=1)

    return df_val, df_val_class,  df_val_user,\
        df_test, df_test_class, df_test_user, \
        df_train, df_train_class, df_train_user
def init_model(init_seed=1):
    model = keras.Sequential([
        keras.layers.Flatten(),
        # keras.layers.Dense(2048, activation='relu',
            # kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
        keras.layers.Dense(128, activation='relu',
            kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
        keras.layers.Dense(32, activation='relu',
            kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
        keras.layers.Dense(6, activation='softmax')
    ])

    model.compile(
        optimizer = 'adam',
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )

    return model
def init_users(df):
    print("Initialising User instances...")
    users = dict()
    num_users = df["User"].nunique()

    for user_id in range(-1,num_users):

        i = user_id

        if user_id < 0: # for global user with id -1
            user_id = None

        df_val, df_val_class,  df_val_user,\
        df_test, df_test_class, df_test_user,\
        df_train, df_train_class, df_train_user = split_dataframe(df, for_user=user_id)

        user_id = i

        if df_train.shape[0]==0:
            print(f"User {user_id} has no data, no instance created...")
            continue

        model = init_model(init_seed = 1)

        users[user_id] = User(id=user_id,
                          model = model,
                          train_class = df_train_class,
                          train_data = df_train,
                          val_class = df_val_class,
                          val_data = df_val,
                          test_class = df_test_class,
                          test_data = df_test)

    global_user = users.pop(-1)

    print(f"{len(users.keys())} User instances and a global user created!")
    return users, global_user
def train_model(user, epochs = 16,
                weights = None,
                verbose_fit = False, verbose_evaluate = False):
    # https://www.tensorflow.org/beta/tutorials/keras/basic_classification
    # same seed value for consistency sake, across all trainings too
    """
    trains the model for the user
    and updates the weights and history attribute for the user too
    """

    train_data = user.get_train_data()
    train_class = user.get_train_class()
    val_data = user.get_val_data()
    val_class = user.get_val_class()
    model = user.get_model()

    if weights != None: # if provided, update model weights
        model.set_weights(weights)

    e = evaluate_user(user, verbose = verbose_evaluate)
    user.add_pre_fit_evaluation(e)


    history = model.fit(
        train_data,
        train_class,
        epochs = epochs,
        verbose = verbose_fit,
        # batch_size = 2**8, #4k
        # use_multiprocessing = True,
        validation_data = (val_data, val_class)
    )

    e = evaluate_user(user, verbose = verbose_evaluate)
    user.add_post_fit_evaluation(e)

    # update user data
    user.set_history(history)

    """try just returning weights regarldess"""
    # if return_weights: # try just this
    # return model.get_weights()
    return

def num_users(users):
    count = 0
    for user in users.values():
        # if user.get_id() >= 0:
            count+=1
    return count

def train_users(users, epochs,
                new_weights = None,
                train_model_verbose_evaluate = 0,
                train_model_verbose_fit = False,
                verbose = True,
                threshold = 0):
    for user in users.values():
        # if user.get_id() < 0:
        #     continue

        if verbose:
            message = f"User {user.get_id()} being trained on the model...\n"
            print(message)

        train_model(
            user,
            epochs = epochs,
            weights = new_weights, # if none, then wont be updated
            verbose_fit = train_model_verbose_fit,
            verbose_evaluate = train_model_verbose_evaluate
        )

        # val_acc = user.get_history().history["val_acc"][-1] #get the final one
        # if val_acc>=threshold:
        #     weights.append(weight)
        # else:
        #     print(f"User {user.get_id():>2} skipped. Validation Acc of \
        #     {val_acc:>.4f}\did not match the threshold of at least {threshold}")

        if verbose:
            message = f"User {user.get_id()} done!\n"
            print(message)

    return
def average_weights(users):
    new_weights = []
    count_layer_indices = len(users[0].get_weights())
    for data_index in range(count_layer_indices):
        temp_layer_data = None
        for user in users.values():
            # if user.get_id() <0:
            #     continue
            user_layer_data = user.get_weights() #nested list of [weights] and [biases]
            if temp_layer_data is None:
                temp_layer_data = user_layer_data[data_index]
                continue
            temp_layer_data += user_layer_data[data_index]

        new_weights.append(temp_layer_data/num_users(users))

    return new_weights


def train_fed(epochs, rounds, users,
              threshold = 0,
              train_user_verbose = False,
              train_model_verbose_evaluate = False,
              train_model_verbose_fit = False):

    new_weights = None
    for i in range(rounds):
        message = f"{'*'*32} {i:^4} {'*'*32} "
        print(message)

        train_users(users, epochs = EPOCHS,
                   new_weights = new_weights,
                   verbose = train_user_verbose,
                   train_model_verbose_evaluate = train_model_verbose_evaluate,
                   train_model_verbose_fit = train_model_verbose_fit,
                   threshold = threshold)
        new_weights = average_weights(users)

        message = f"{'*'*32} {'DONE':^4} {'*'*32}"
        print(message)
    return


In [3]:
# algorithm:
df = read_file("../dataset/allUsers.lcl.csv")
df_unshuffled = df
# shuffle the records
df = df.sample(frac = 1)

DEVICE = '/cpu:0'
ROUNDS = 4
EPOCHS = 16 # 16 is good


with tf.device(DEVICE):
    users, global_user = init_users(df) # return a dictionary of users with data

    train_fed(users = users, rounds = ROUNDS,
              epochs = EPOCHS,threshold = 0,
              train_user_verbose = False,
              train_model_verbose_evaluate = True,
              train_model_verbose_fit = False)


#     print("** Pre local training **")
#     print_stats(users, save_as="pre-local-training-threshold-0.9.png")
#     print("** Post local training **")
#     print_stats(users, save_as="post-local-training-threshold-0.9.png")

#     users, global_user = init_users(df) # return a dictionary of users with data

#     train_fed(users = users, rounds = ROUNDS,
#               epochs = EPOCHS, threshold = 0)

#     print("** Pre local training **")
#     print_stats(users, save_as="pre-local-training-threshold-0.0.png")
#     print("** Post local training **")
#     print_stats(users, save_as="post-local-training-threshold-0.0.png")


    print("Global user training and validation")
    train_model(global_user,
       epochs = EPOCHS*ROUNDS,
       verbose_fit = False,
       verbose_evaluate = True,
    )
    draw_graphs(global_user)


W1005 14:00:10.802193 13388 deprecation.py:506] From c:\program files\python37\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Initialising User instances...
User 3 has no data, no instance created...
13 User instances and a global user created!
DONE ****************
DONE ****************
DONE ****************


DONE ****************
Global user training and validation


NameError: name 'draw_graphs' is not defined

In [None]:
evaluate_user(global_user, verbose = False)