# Federated Learning - Minimal Example

## Setup
---

### Imports
---

In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from itertools import product
from math import floor
import time

import tensorflow as tf
import tensorflow_federated as tff
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from keras.callbacks import CSVLogger

# -> check tff
#print(tff.federated_computation(lambda: 'Hello World')()) 

2023-05-10 14:09:13.204554: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Ingest Data
---

In [316]:
# ingest data

df_locs = ['../output/data/insurance-clean.csv',
    "https://raw.githubusercontent.com/Olhaau/fl-official-statistics-addon/main/output/data/insurance-clean.csv"
]

def load_df(df_locs):
    """ Loads data from a path to a csv-file.
    
    :param df_locs: possible locations of a CSV file
    :type df_locs: str or list of str
    :output: Ingested Data.
    :rtype: pandas.DataFrame 
    """
    df = pd.DataFrame()

    if isinstance(df_locs, str): df_locs = [df_locs]
    
    for df_loc in df_locs:
        try:
            df = pd.read_csv(df_loc, index_col = 0)
            print("loaded data from {}".format(df_loc))
            if len(df) != 0: break
        except Exception as ex:
            print("{} in ".format(type(ex).__name__), df_loc)

    return df

df = load_df(df_locs)
df.head(3)

loaded data from ../output/data/insurance-clean.csv


Unnamed: 0,age,sex,bmi,children,smoker,region,charges,region0,region1,region2,region3
0,0.021739,0.0,0.321227,0.0,1.0,southwest,16884.924,0.0,0.0,0.0,1.0
1,0.0,1.0,0.47915,0.2,0.0,southeast,1725.5523,0.0,0.0,1.0,0.0
2,0.217391,1.0,0.458434,0.6,0.0,southeast,4449.462,0.0,0.0,1.0,0.0


In [317]:
# select features and target (first column)
features = ['age', 'sex', 'bmi', 'children', 'smoker'
            , 'region0', 'region1', 'region2', 'region3']
target = 'charges'

df.loc[:, [target]+features ].head(3)

Unnamed: 0,charges,age,sex,bmi,children,smoker,region0,region1,region2,region3
0,16884.924,0.021739,0.0,0.321227,0.0,1.0,0.0,0.0,0.0,1.0
1,1725.5523,0.0,1.0,0.47915,0.2,0.0,0.0,0.0,1.0,0.0
2,4449.462,0.217391,1.0,0.458434,0.6,0.0,0.0,0.0,1.0,0.0


## Stepwise
---

### Create Data Shards (Federated)
---

In [140]:
# Prepare the data
# =====================

# 1. create client data
# =====================

#client_seperator = 'region'
#clients = [
#    df.loc[df[client_seperator] == x, df.columns != client_seperator][[target] + features] 
#    for x in df[client_seperator].unique()
#    ]
# or randomly
clients = [df[[target] + features].sample(frac = 1./4, ignore_index = True) for _ in range(4)]


# 2. evaluation splits
# ====================

clients_split = [train_test_split(data, test_size = 20, random_state = 42) for data in clients]
clients_train = [split[0] for split in clients_split]
test_data_fed  = [split[1] for split in clients_split]

# for cross validation use:
#cv = RepeatedKFold(n_splits = 5, n_repeats = 5, random_state = 42)
#client_cv_splits = [cv.split(data) for data in clients]
#[[[data.iloc[train,:], data.iloc[test,:]] for train, test in cv.split(data)] for data in clients][1][1]

# prep the data (incl. convert to tensor)
# =======================================

def prep_fed_train(X_train, y_train,
    #NUM_EPOCHS = 50,
    #BATCH_SIZE = 128,
    #SHUFFLE_BUFFER = 20,
    #PREFETCH_BUFFER = 5,
    #SEED = 42
    ):
    """
    See https://www.tensorflow.org/federated/tutorials/federated_learning_for_image_classification#preprocessing_the_input_data
    """

    return tf.data.Dataset.from_tensor_slices((
        tf.convert_to_tensor(X_train), 
        tf.convert_to_tensor(y_train)
        ))

#def prep_fed_test(X_test, y_test):
#    return tf.data.Dataset.from_tensor_slices((
#        tf.convert_to_tensor(np.expand_dims(X_test, axis=0)), 
#        tf.convert_to_tensor(np.expand_dims(y_test, axis=0))
#        )) 

train_data_fed = [prep_fed_train(df[features], df[target]) for df in clients_train]
#test_data_fed  = [prep_fed_test (df[features], df[target]) for df in clients_test]

# show random_client_ds
if False: # only correct, without #.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)
    i = 0 
    for client_dfs in random_client_ds:
        print('================================ client {} =================================='.format(i))
        client_train = list(client_dfs[0].as_numpy_iterator())
        client_test  = list(client_dfs[1])
        print('Train (obs = {})'.format(len(client_train[0])))

        print(pd.DataFrame(client_train[:3], columns = ["X", "y"]))
        print('Test (obs = {})'.format(len(client_test[0])))
        #print('obs: {}')
        print(pd.DataFrame(client_test[:3], columns = ["X", "y"]))
        # print([x[:3] for x in client_test])
        i += 1

### Build Federated Model
---

In [401]:
# build a TFF model from Keras
# ============================

def create_keras_model(
    nfeatures = 9,
    units = [40, 40, 20], 
    activations = ['relu'] * 3, 
    compile = True,
    loss = tf.losses.mae,
    optimizer = tf.optimizers.legacy.Adam(learning_rate = .05),
    metrics = ["mae", 'mean_squared_error', r2_score], 
    run_eagerly = True
    ):
  
  """Construct a fully connected neural network and compile it.
  
  Parameters
  ------------
  nfeatures: int, optional
    Number of input features. Default is 9.
  units: list of int, optional
    List of number of units of the hidden dense layers. The length of ``units`` defines the number of hidden layers. Default are 3 layers with 40, 40 an 20 units, respectively.
  activations: list of str, optional
    List of activation functions used in the hidden layers.
  loss: str, optional
    Used loss function for compiling.
  optimizer: keras.optimizers, optional
    Used optimizer for compiling.
  metrics: list of str or sklearn.metrics
    List of metrics for compiling.
  run_eagerly: bool
    Parameter for compiling

  Return
  ------------
    model: keras.engine.sequential.Sequential
      Keras sequential fully connected neural network. Already compiled.
  """
  
  # construct model
  model = Sequential()
  model.add(InputLayer(input_shape = [nfeatures]))
  for ind in range(len(units)):
    model.add(Dense(
      units = units[ind], 
      activation = activations[ind]
      ))
  model.add(Dense(1))
  
  # compile model
  if compile:
    model.compile(
      loss = loss,
      optimizer = optimizer,
      metrics = metrics,
      run_eagerly = run_eagerly
    )

  return model


def model_fn(
    keras_creator,
    loss = tf.keras.losses.MeanAbsoluteError()
    #,metrics = [tf.keras.metrics.MeanAbsoluteError()]
    ):
    """ Wrap a Keras model as Tensorflow Federated model. 
    
    cf. https://www.tensorflow.org/federated/tutorials/federated_learning_for_image_classification#creating_a_model_with_keras
    """
    def _model():
        # We _must_ create a new model here, and _not_ capture it from an external
        # scope. TFF will call this within different graph contexts.
        
        #keras_model = create_keras_model(
        #    nfeatures = nfeatures, compile = False#, **kwargs
        #    )
        
        keras_model = keras_generator()

        return tff.learning.models.from_keras_model(
            keras_model,
            input_spec = (
                tf.TensorSpec((None, keras_model.input.shape[1]
                ), dtype = tf.float64),
                tf.TensorSpec((None,),           dtype = tf.float64)
            ), loss = loss, metrics =  [tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanSquaredError()]
        )

    return _model

### Train
---

In [331]:
def keras_blueprint():
    return create_keras_model(nfeatures = 9, compile = False, units =[40,40,20])

#print(keras_blueprint().summary())
#model_fed = model_fn(keras_creator = keras_blueprint)

client_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)
server_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)

# Create iterative learning process which will perform the federated learning
process_fed = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn(keras_creator = keras_blueprint),
    client_optimizer_fn = client_optimizer,
    server_optimizer_fn = server_optimizer)

# show the learning process
#print(process_fed.initialize.type_signature.formatted_representation())

In [332]:
def train_fed(process, train_data,
    NUM_ROUNDS = 50,
    NUM_EPOCHS = 50,
    BATCH_SIZE = 128,
    SHUFFLE_BUFFER = 20,
    PREFETCH_BUFFER = 5,
    SEED = 42,
    verbose = True
    ):
    
    # prep the data
    train_data = [
        data.
            repeat(NUM_EPOCHS).
            shuffle(SHUFFLE_BUFFER, seed = SEED).
            batch(BATCH_SIZE).
            prefetch(PREFETCH_BUFFER)

        for data in train_data]
    
    # initialize the process
    state = process.initialize()
    hist= []

    for round in range(NUM_ROUNDS):
        
        if SEED != None: tf.keras.utils.set_random_seed(SEED)
        result  = process.next(state, train_data)
        
        state   = result.state
        metrics = dict(result.metrics['client_work']['train'].items())

        hist.append(metrics)

        if verbose == True: print('round {:2d} / {}, metrics = {}'.format(round + 1, NUM_ROUNDS, metrics))


    return {'process': process, 'history': hist, 'state': state}




In [334]:
result = train_fed(
    process = process_fed, 
    train_data = train_data_fed, 
    NUM_ROUNDS = 50, 
    NUM_EPOCHS = 50
    )

round  1 / 50, metrics = {'loss': 5520.089, 'num_examples': 62800, 'num_batches': 492}
round  2 / 50, metrics = {'loss': 5230.4956, 'num_examples': 62800, 'num_batches': 492}
round  3 / 50, metrics = {'loss': 5094.0776, 'num_examples': 62800, 'num_batches': 492}
round  4 / 50, metrics = {'loss': 4967.2173, 'num_examples': 62800, 'num_batches': 492}
round  5 / 50, metrics = {'loss': 4846.826, 'num_examples': 62800, 'num_batches': 492}
round  6 / 50, metrics = {'loss': 4733.1904, 'num_examples': 62800, 'num_batches': 492}
round  7 / 50, metrics = {'loss': 4636.036, 'num_examples': 62800, 'num_batches': 492}
round  8 / 50, metrics = {'loss': 4513.5933, 'num_examples': 62800, 'num_batches': 492}
round  9 / 50, metrics = {'loss': 4413.205, 'num_examples': 62800, 'num_batches': 492}
round 10 / 50, metrics = {'loss': 4304.1987, 'num_examples': 62800, 'num_batches': 492}
round 11 / 50, metrics = {'loss': 4231.1055, 'num_examples': 62800, 'num_batches': 492}
round 12 / 50, metrics = {'loss': 41

### Test
---

In [346]:
model_weights = iterative_process.get_model_weights(result['state'])

In [308]:
# Model evaluation
evaluation = tff.learning.build_federated_evaluation(model_fed)
# print(evaluation.type_signature.formatted_representation())
perf = evaluation(model_weights, test_data_fed)

  evaluation = tff.learning.build_federated_evaluation(model_fed)
2023-05-10 17:27:12.413307: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-10 17:27:12.413367: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2023-05-10 17:27:12.413490: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-05-10 17:27:12.413854: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-10 17:27:12.413924: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have bee

In [309]:
perf

OrderedDict([('eval',
              OrderedDict([('loss', 2007.8079),
                           ('num_examples', 80),
                           ('num_batches', 4)]))])

In [345]:
model = keras_blueprint()
model_weights.assign_weights_to(model)
model.compile(
    loss=tf.losses.mae,
    # loss=tf.losses.mean_squared_error,
    optimizer=tf.optimizers.Adam(),
    metrics=["mae", 'mean_squared_error']
)

X_test = pd.concat([data[features] for data in clients_test])
y_test = pd.concat([data[target]   for data in clients_test])

# The evaluation results, for technical reasons the metrics_names is called afterwards. However, its order fits to the results
print(model.evaluate(X_test, y_test, verbose = 0))
print(model.metrics_names)






[1718.9088134765625, 1718.9088134765625, 24081804.0]
['loss', 'mae', 'mean_squared_error']


## Summary
---

In [403]:
# 1. create client data
# =====================
#client_seperator = 'region'
#clients = [
#    df.loc[df[client_seperator] == x, df.columns != client_seperator][[target] + features] 
#    for x in df[client_seperator].unique()
#    ]
# or randomly
clients = [df[[target] + features].sample(frac = 1./4, ignore_index = True) for _ in range(4)]

# 2. evaluation splits
# ====================
clients_split = [train_test_split(data, test_size = 20, random_state = 42) for data in clients]
clients_train = [split[0] for split in clients_split]
clients_test  = [split[1] for split in clients_split]


# 3. convert to tensor
# ====================
train_data_fed = [prep_fed_train(df[features], df[target]) for df in clients_train]
test_data_fed  = [prep_fed_test (df[features], df[target]) for df in clients_test]


# 4. Specify model and optimizer
# ==============================
def keras_blueprint(compile = False):
    return create_keras_model(
        nfeatures = 9, 
        units = [40, 40, 20], 
        activations = ['relu'] * 3, 
        compile = compile)

#print(keras_blueprint().summary())
# rem.: has to be a generator so that build_weighted_fed_avg works

client_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)
server_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)

# 5. Create iterative FL process
# ===============================
# Create iterative learning process which will perform the federated learning
process_fed = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn(keras_creator = keras_blueprint),
    client_optimizer_fn = client_optimizer,
    server_optimizer_fn = server_optimizer)

# 6. trigger training
# ===================
train_result = train_fed(
    process_fed, 
    train_data_fed,
    NUM_ROUNDS = 100,
    NUM_EPOCHS = 70,
    BATCH_SIZE = 128,
    SHUFFLE_BUFFER = 20,
    PREFETCH_BUFFER = 5,
    SEED = 42,
    verbose = True
    )


print("Performance Train: {}".format(train_result["history"][-1]))

# 7. Test
# =======
model_weights = train_result['process'].get_model_weights(result['state'])
model = create_keras_model(compile = True)
model_weights.assign_weights_to(model)

X_test = pd.concat([data[features] for data in clients_test])
y_test = pd.concat([data[target]   for data in clients_test])


test_performance = model.evaluate(X_test, y_test, verbose = 0)
test_performance = dict(zip(model.metrics_names, [round(x , 3) for x in test_performance]))
print("Performance Test: {}".format(test_performance))


2023-05-10 22:41:26.872727: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-10 22:41:26.872793: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2023-05-10 22:41:26.872934: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-05-10 22:41:26.873457: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-10 22:41:26.873548: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-05-10 22:41:26.873605: I tensor

round  1 / 100, metrics = {'mean_absolute_error': 4497.5444, 'mean_squared_error': 74540490.0, 'loss': 4500.707, 'num_examples': 87920, 'num_batches': 688}
round  2 / 100, metrics = {'mean_absolute_error': 4356.469, 'mean_squared_error': 70370050.0, 'loss': 4359.381, 'num_examples': 87920, 'num_batches': 688}
round  3 / 100, metrics = {'mean_absolute_error': 4217.549, 'mean_squared_error': 67186940.0, 'loss': 4220.341, 'num_examples': 87920, 'num_batches': 688}
round  4 / 100, metrics = {'mean_absolute_error': 4104.527, 'mean_squared_error': 64368844.0, 'loss': 4107.152, 'num_examples': 87920, 'num_batches': 688}
round  5 / 100, metrics = {'mean_absolute_error': 4007.3503, 'mean_squared_error': 61854110.0, 'loss': 4009.7048, 'num_examples': 87920, 'num_batches': 688}
round  6 / 100, metrics = {'mean_absolute_error': 3877.418, 'mean_squared_error': 58660170.0, 'loss': 3879.9304, 'num_examples': 87920, 'num_batches': 688}
round  7 / 100, metrics = {'mean_absolute_error': 3764.229, 'mean_

## Automation
---

### Setup
---

#### Process

In [405]:
# Specify model and optimizer
# ==============================
def keras_blueprint(compile = False):
    return create_keras_model(
        nfeatures = 9, 
        units = [40, 40, 20], 
        activations = ['relu'] * 3, 
        compile = compile)

#print(keras_blueprint().summary())
# rem.: has to be a generator so that build_weighted_fed_avg works

client_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)
server_optimizer = lambda: tf.optimizers.Adam(learning_rate = .05)

# 5. Create iterative FL process
# ===============================
# Create iterative learning process which will perform the federated learning
process_fed = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn(keras_creator = keras_blueprint),
    client_optimizer_fn = client_optimizer,
    server_optimizer_fn = server_optimizer)

print(process_fed.initialize.type_signature.formatted_representation())

( -> <
  global_model_weights=<
    trainable=<
      float32[9,40],
      float32[40],
      float32[40,40],
      float32[40],
      float32[40,20],
      float32[20],
      float32[20,1],
      float32[1]
    >,
    non_trainable=<>
  >,
  distributor=<>,
  client_work=<>,
  aggregator=<
    value_sum_process=<>,
    weight_sum_process=<>
  >,
  finalizer=<
    int64,
    float32[9,40],
    float32[9,40],
    float32[40],
    float32[40],
    float32[40,40],
    float32[40,40],
    float32[40],
    float32[40],
    float32[40,20],
    float32[40,20],
    float32[20],
    float32[20],
    float32[20,1],
    float32[20,1],
    float32[1],
    float32[1]
  >
>@SERVER)


#### Evaluation

In [479]:
# Specify evaluation

#client_seperator = 'region'
#clients = [
#    df.loc[df[client_seperator] == x, df.columns != client_seperator][[target] + features] 
#    for x in df[client_seperator].unique()
#    ]
# or randomly
clients = [df[[target] + features].sample(frac = 1./4, ignore_index = True) for _ in range(4)]

# 2. evaluation splits
# ====================
#client_split = [train_test_split(data, test_size = 20, random_state = 42) for data in clients]
#client_train = [split[0] for split in clients_split]
#client_test  = [split[1] for split in clients_split]

nfolds = 5
nreps  = 5

cv = RepeatedKFold(n_splits = nfolds, n_repeats = nreps, random_state = 42)
client_splits = [list(cv.split(data)) for data in clients]
#[[[data.iloc[train,:], data.iloc[test,:]] for train, test in cv.split(data)] for data in clients][1][1]

### Train

In [516]:
results = []

for i in range(nfolds * nreps):
    indices_train = [split[i][0] for split in client_splits]
    clients_train = [clients[i].iloc[indices_train[i]] for i in range(len(clients))]

    train_data_fed = [prep_fed_train(df[features], df[target]) for df in clients_train]

    result = train_fed(
        process_fed, 
        train_data_fed,
        NUM_ROUNDS = 50,
        NUM_EPOCHS = 50,
        BATCH_SIZE = 128,
        SHUFFLE_BUFFER = 20,
        PREFETCH_BUFFER = 5,
        SEED = 42,
        verbose = False
    )

    results.append(result)
    print("trial {} / {}: performance train = {}".format(i + 1, nfolds * nreps,result["history"][-1]))

In [514]:
# show train performance
pd.DataFrame([res['history'][-1] for res in results]).assign(
    RMSE = lambda x: np.sqrt(x.mean_squared_error),
    MAE = lambda x: x.mean_absolute_error
)[['MAE', 'RMSE']].describe().round(2)

Unnamed: 0,MAE,RMSE
count,25.0,25.0
mean,11154.96,16313.01
std,163.66,223.25
min,10763.61,15740.94
25%,11093.77,16229.08
50%,11158.64,16318.78
75%,11281.63,16483.5
max,11425.94,16601.49


### Test

In [486]:
# calculae test performance
test_perfs = []
for i in range(nfolds * nreps):
    
    indices_test  = [split[i][1] for split in client_splits]
    clients_test  = [clients[i].iloc[indices_test [i]] for i in range(len(clients))]

    model_weight = results[i]['process'].get_model_weights(results[i]['state'])
    model = create_keras_model(compile = True)
    model_weights.assign_weights_to(model)

    X_test = pd.concat([data[features] for data in clients_test])
    y_test = pd.concat([data[target]   for data in clients_test])


    perf = model.evaluate(X_test, y_test, verbose = 0)
    perf = dict(zip(model.metrics_names, [round(x , 3) for x in perf]))
    test_perfs.append(perf)
    print("trial {} / {}: performance test = {}".format(i, nfolds * nreps, perf))


trial 0 / 25: performance test = {'loss': 3269.684, 'mae': 3269.684, 'mean_squared_error': 46506560.0, 'r2_score': 0.645}
trial 1 / 25: performance test = {'loss': 3514.106, 'mae': 3514.106, 'mean_squared_error': 51207492.0, 'r2_score': 0.677}
trial 2 / 25: performance test = {'loss': 3937.681, 'mae': 3937.681, 'mean_squared_error': 64670368.0, 'r2_score': 0.652}
trial 3 / 25: performance test = {'loss': 3508.317, 'mae': 3508.317, 'mean_squared_error': 48280184.0, 'r2_score': 0.658}
trial 4 / 25: performance test = {'loss': 2828.481, 'mae': 2828.481, 'mean_squared_error': 38823232.0, 'r2_score': 0.64}
trial 5 / 25: performance test = {'loss': 3366.954, 'mae': 3366.954, 'mean_squared_error': 50820444.0, 'r2_score': 0.7}
trial 6 / 25: performance test = {'loss': 3479.512, 'mae': 3479.512, 'mean_squared_error': 52369656.0, 'r2_score': 0.66}
trial 7 / 25: performance test = {'loss': 3307.356, 'mae': 3307.356, 'mean_squared_error': 44350744.0, 'r2_score': 0.683}
trial 8 / 25: performance te

In [515]:
# describe test performance
pd.DataFrame(test_perfs).assign(
    RMSE = lambda x: np.sqrt(x.mean_squared_error),
    MAE = lambda x: x.mae,
    RSQ_pct = lambda x: x.r2_score * 100
)[['MAE', 'RMSE', 'RSQ_pct']].describe()[1:].round(2)

Unnamed: 0,MAE,RMSE,RSQ_pct
mean,3413.3,7050.56,65.02
std,293.85,474.0,4.2
min,2828.48,6209.19,56.6
25%,3269.68,6740.36,62.2
50%,3453.48,6954.4,65.3
75%,3515.68,7353.85,67.7
max,4016.72,8041.79,71.8


In [503]:
# show test performance completely
pd.DataFrame(test_perfs).assign(
    RMSE = lambda x: np.sqrt(x.mean_squared_error),
    MAE = lambda x: x.mae,
    RSQ_pct = lambda x: x.r2_score * 100
)[['MAE', 'RMSE', 'RSQ_pct']].round(2)

Unnamed: 0,MAE,RMSE,RSQ_pct
0,3269.68,6819.57,64.5
1,3514.11,7155.94,67.7
2,3937.68,8041.79,65.2
3,3508.32,6948.39,65.8
4,2828.48,6230.83,64.0
5,3366.95,7128.85,70.0
6,3479.51,7236.69,66.0
7,3307.36,6659.64,68.3
8,3521.03,7353.85,63.7
9,3391.83,6928.56,62.2
