In [1]:
import time
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

from user import User
from average import Average
from tensorflow import keras
from sklearn.model_selection import train_test_split

SEED = 0

# import os
# os.environ['PYTHONHASHSEED']=str(SEED)
# np.random.seed(SEED)
# import random
# random.seed(SEED)
# tf.set_random_seed(SEED)
# could need to force keras to not use parallelism, see documentation

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
def init_model(init_seed=None):
    """
    initialise and return a model 
    """
    model = keras.Sequential([
        keras.layers.Flatten(),
#         keras.layers.Dense(4096, activation='relu',
#             kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
#         keras.layers.Dense(1024, activation='relu',
#             kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
#         keras.layers.Dense(128, activation='relu',
#             kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
        keras.layers.Dense(32, activation='relu',
            kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed)),
        keras.layers.Dense(6, activation='softmax',
            kernel_initializer=keras.initializers.glorot_uniform(seed=init_seed))
    ])

    model.compile(
        optimizer = 'adam',
        loss = 'sparse_categorical_crossentropy',
        metrics = ['accuracy']
    )

    return model
def read_file(file):
    """
    return 2d df after imputing with 0s"""

    # read data
    df = pd.read_csv(file)

    # replace the question marks with NaN and then change data type to float 32
    df.replace(["?"],np.nan, inplace = True)
    df = df.astype(np.float32)

    # imputation
    df.fillna(0,inplace=True) # fill nulls with 0
    return df

def shuffle_df(df, seed = None):
    """Shuffle dataframe and reset the index"""
    df = df.take(np.random.RandomState(seed=SEED).permutation(df.shape[0]))
    df.reset_index(drop = True, inplace = True)
    
    return df

def acquire_user_data(df, for_user = None, seed = None):
    """
    split the dataframe into train, validation and test splits based on the same seed
    Empty dataframes if no data present
    """
    # split into train, validation and test data using sklearn and return dfs for each
    if for_user!=None:
        df = df[df["User"] == for_user]
    if df.shape[0] == 0:
        # if no data for the user, then return 9 empty dfs as per the api
        # print(f"Dataframe for user {user} is of shape {df.shape}, no data. Skipping...")
        df = pd.DataFrame()
        return df, df
    target = df["Class"]

    # drop the class and user identifier columns from data frame
    df   = df.drop(df.columns[[0,1]], axis=1)
    return df, target

In [3]:
def init_users(df, averaging_methods, averaging_metric = "accuracy", seed = None):
    """
    Requires the DF to contain a "User" column giving numeric identity to a user
    0 to unique_user_count-1
    
    Averaging method is a list of methods out of which a random one is selected
    
    initialise users based on dataframe given and assign random averaging method
    to them based on the list passed in.
    returns a dictionary of users(key: user object) and a global user object
    """    
    print("Initialising User instances...")
    users = dict()
    num_users = df["User"].nunique()

    for user_id in range(num_users):
        
        user_df, target = acquire_user_data(df = df, for_user=user_id, seed = seed)
        
        if df.shape[0]==0:
            print(f"User {user_id} has no data, no instance created...")
            continue
        
        
        dataset = tf.data.Dataset.from_tensor_slices((user_df.values, target.values))
        
        users[user_id] = dataset        

    print(f"{len(users.keys())} User datasets created!")
    return users

In [5]:
SEED = 0

df = read_file("../dataset/allUsers.lcl.csv")
df = shuffle_df(df, SEED)

averaging_methods = [Average.all,Average.std_dev,Average.weighted_avg]
# df.head()

users= init_users(df = df, 
                        averaging_methods = averaging_methods, 
                        seed = SEED)
    

Initialising User instances...
14 User datasets created!


In [8]:
for feat, targ in users[0].take(1):
    print ('Features: {}, Target: {}'.format(feat, targ))
    break

Features: Tensor("IteratorGetNext_2986:0", shape=(36,), dtype=float32), Target: Tensor("IteratorGetNext_2986:1", shape=(), dtype=float32)


In [2]:
from __future__ import absolute_import, division, print_function

import collections
import warnings
from six.moves import range
import numpy as np
import six
import tensorflow as tf
import tensorflow_federated as tff

warnings.simplefilter('ignore')

tf.compat.v1.enable_v2_behavior()

np.random.seed(0)

# NOTE: If the statement below fails, it means that you are
# using an older version of TFF without the high-performance
# executor stack. Call `tff.framework.set_default_executor()`
# instead to use the default reference runtime.
if six.PY3:
    tff.framework.set_default_executor(tff.framework.create_local_executor())

tff.federated_computation(lambda: 'Hello, World!')()

ModuleNotFoundError: No module named 'tensorflow_federated'