# Library

In [1]:
#import sys
import pandas as pd
import numpy as np
import random
import pyspark
import itertools
import matplotlib.pyplot as plt
import itertools
import seaborn as sns
import pickle
import statsmodels.api as sm

from pyspark import SparkContext, SQLContext

from math import sqrt
from time import time as ttt

from pyspark.sql import SparkSession
from pyspark.sql import functions as f

from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.classification import DecisionTreeClassifier as DTC_spark
from pyspark.ml.feature import StringIndexer, VectorIndexer
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

import joblib
from joblib import parallel_backend
from joblib import Parallel, delayed
from joblib import parallel_backend


# Upload

In [2]:
spark = SparkSession.builder.master("local").appName("spark_app_1234").getOrCreate()
sc = spark.sparkContext
spark
d0 = (
    spark
    .read
    .format("csv") 
    .option("header","true") 
    .option("inferSchema","true") 
    .load("gs://mas-a5-storage-1/notebooks/jupyter/application_train.csv")
)

d1 = d0.filter(d0.DAYS_EMPLOYED != 365243).select('TARGET','DAYS_EMPLOYED')

print(d1.rdd.getNumPartitions())

                                                                                

4


# Function

In [3]:
def prepare_spark_data(n_part, k_mult):
    '''
    takes 'DAYS_EMPLOYED and 'TARGET'  from d1 (alreay filtered)
    rearrane in n partitions (if n==0 keeps initial number of partitions)
    prints final shape/ partition
    returns d2 - spark df
    '''
    data = d1
    data_new = data # first step in the cycle
    for i in range(k_mult-1):
        data_tmp = data.select('TARGET', \
                     f.col('DAYS_EMPLOYED')*(f.lit(0.9995) + f.rand()/1000)).\
                   toDF('TARGET','DAYS_EMPLOYED')
        data_tmp = data_tmp.select('TARGET', f.floor('DAYS_EMPLOYED'))
        data_new = data_new.union(data_tmp)

    assembler = VectorAssembler(inputCols=["DAYS_EMPLOYED"], 
                        outputCol="DAYS_EMPLOYED_vect")
    d2 = assembler.transform(data_new)
    if n_part != 0:
        d2 = d2.repartition(n_part)       
    #print(f'n-partitions initial: {d2.rdd.getNumPartitions()}; df size: {d2.count()}\n')

    return d2

# Params an run

In [None]:
## !! mind the fn NOT TO rewrite results
fn = '4cpu_by_8n__4m'
size_mult = [1, 2, 5, 10, 15, 20, 30, 50, 100]
partitions = [0, 2, 4, 6, 8, 16, 24] 
# initial, 2**i incl n_nodes till n_cpu*n_nodes and n_cpu*n_nodes
n_iter = 3

print('d1-size', d1.count())
n_part_base = d1.rdd.getNumPartitions()
print('n-partitions initial', n_part_base, '\n')

rd1 = {}
for k_size_mult in size_mult:
    rd2 = {}
    for n_part in partitions:
        df = prepare_spark_data(n_part, k_size_mult)
        df.cache()
        print('======================================================================')
        print(f'=== size_mult={k_size_mult}; \
        n_part_req={n_part}; n_part_act={df.rdd.getNumPartitions()}; df_count={df.count()} ===\n')
        times = [0 for i in range(n_iter)]
        models = {}
        for i in range(n_iter):
            dt = DTC_spark(labelCol="TARGET",
                       featuresCol="DAYS_EMPLOYED_vect",
                       minInfoGain=0.0001,
                       impurity='entropy',
                       maxDepth=14, maxBins=2**14, # it differs from scikit learn - it means number of canidate split points
                       #minInstancesPerNode = 1,
                       #checkpointInterval = 10
                       )
           
            t0 = ttt()
            model = dt.fit(df)
            t1 = ttt()
            times[i] = t1-t0
            models[i] = model 
            print('model', i, 'build time', round(times[i],2), '\n', model)
        rd2[n_part] = (models, times)
    rd1[k_size_mult] = rd2

result = rd1  

                                                                                

d1-size 252137
n-partitions initial 4 



                                                                                

=== size_mult=1;         n_part_req=0; n_part_act=2; df_count=252137 ===



                                                                                

model 0 build time 10.51 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3c772b79f38d, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 7.34 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2383f1a0e8b4, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 6.88 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_738bc0779bd7, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=2; n_part_act=2; df_count=252137 ===



                                                                                

model 0 build time 6.85 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_bfafa45d2530, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 6.78 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3cc53f0e6076, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 6.61 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_81648d65bb23, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=4; n_part_act=4; df_count=252137 ===



                                                                                

model 0 build time 7.98 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_4137bf38d637, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 7.94 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7931b39b3b40, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 7.71 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f73476114a10, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=6; n_part_act=6; df_count=252137 ===



                                                                                

model 0 build time 9.04 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c6889ee9367d, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 9.08 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ec465c63cd12, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 9.1 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c19ee47bb313, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=8; n_part_act=8; df_count=252137 ===



                                                                                

model 0 build time 10.38 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0fba8364ebac, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 10.22 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_19ee8d9b4a8b, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 10.05 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_913e54864bd4, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=16; n_part_act=16; df_count=252137 ===



                                                                                

model 0 build time 15.14 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_eea0030c1293, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 16.51 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_23484783da00, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 15.18 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1d0562017d96, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=1;         n_part_req=24; n_part_act=24; df_count=252137 ===



                                                                                

model 0 build time 21.15 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7c3a48bf0224, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 21.81 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_cfa30c0b6554, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 19.56 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1dd93f841b1c, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=0; n_part_act=20; df_count=504274 ===



                                                                                

model 0 build time 17.82 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0162577526f5, depth=14, numNodes=149, numClasses=2, numFeatures=1


                                                                                

model 1 build time 12.87 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_32f7c4a8bd33, depth=14, numNodes=149, numClasses=2, numFeatures=1


                                                                                

model 2 build time 12.22 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a98cbea9751c, depth=14, numNodes=149, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=2; n_part_act=2; df_count=504274 ===



                                                                                

model 0 build time 8.97 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_54cd0f6f2a89, depth=14, numNodes=159, numClasses=2, numFeatures=1


                                                                                

model 1 build time 8.87 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_59d6b75aaa88, depth=14, numNodes=159, numClasses=2, numFeatures=1


                                                                                

model 2 build time 8.91 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c05819db621e, depth=14, numNodes=159, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=4; n_part_act=4; df_count=504274 ===



                                                                                

model 0 build time 9.77 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_35652d42af83, depth=14, numNodes=143, numClasses=2, numFeatures=1


                                                                                

model 1 build time 9.68 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c0130f4815be, depth=14, numNodes=143, numClasses=2, numFeatures=1


                                                                                

model 2 build time 9.94 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d841c7c6c43e, depth=14, numNodes=143, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=6; n_part_act=6; df_count=504274 ===

model 0 build time 6.94 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_37789a002541, depth=14, numNodes=97, numClasses=2, numFeatures=1
model 1 build time 4.76 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_209a83f4f15a, depth=14, numNodes=97, numClasses=2, numFeatures=1
model 2 build time 4.58 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a28f54d95316, depth=14, numNodes=97, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=8; n_part_act=8; df_count=504274 ===



                                                                                

model 0 build time 6.61 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_deb6143285b1, depth=14, numNodes=121, numClasses=2, numFeatures=1


                                                                                

model 1 build time 6.35 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_21a932a4eb65, depth=14, numNodes=121, numClasses=2, numFeatures=1


[Stage 1890:>                                                       (0 + 8) / 8]                                                                                

model 2 build time 6.12 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e513453e3099, depth=14, numNodes=121, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=16; n_part_act=16; df_count=504274 ===



                                                                                

model 0 build time 17.4 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7e715339d8fa, depth=14, numNodes=127, numClasses=2, numFeatures=1


                                                                                

model 1 build time 17.46 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3aa45edcd1e7, depth=14, numNodes=127, numClasses=2, numFeatures=1


                                                                                

model 2 build time 17.46 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6b22dff5a362, depth=14, numNodes=127, numClasses=2, numFeatures=1


                                                                                

=== size_mult=2;         n_part_req=24; n_part_act=24; df_count=504274 ===



                                                                                

model 0 build time 28.01 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_08627a6c473f, depth=14, numNodes=131, numClasses=2, numFeatures=1


                                                                                

model 1 build time 27.77 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_624f791a9ae0, depth=14, numNodes=131, numClasses=2, numFeatures=1


                                                                                

model 2 build time 27.6 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_faab28cae32a, depth=14, numNodes=131, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=0; n_part_act=80; df_count=1260685 ===



                                                                                

model 0 build time 55.92 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_8221fba9f099, depth=14, numNodes=91, numClasses=2, numFeatures=1


                                                                                

model 1 build time 40.71 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d0057dfe8746, depth=14, numNodes=91, numClasses=2, numFeatures=1


                                                                                

model 2 build time 39.92 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_bcde346115d1, depth=14, numNodes=91, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=2; n_part_act=2; df_count=1260685 ===



                                                                                

model 0 build time 14.65 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c70a03953515, depth=14, numNodes=97, numClasses=2, numFeatures=1


                                                                                

model 1 build time 13.32 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_9e473146e2f3, depth=14, numNodes=97, numClasses=2, numFeatures=1


                                                                                

model 2 build time 13.32 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1413e28dc338, depth=14, numNodes=97, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=4; n_part_act=4; df_count=1260685 ===



                                                                                

model 0 build time 12.33 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6425e06dd2fd, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 1 build time 9.8 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2814c5aa3e20, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

model 2 build time 9.41 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0ff7730c5c5d, depth=14, numNodes=123, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=6; n_part_act=6; df_count=1260685 ===



                                                                                

model 0 build time 9.03 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7ab6d41a33dc, depth=14, numNodes=121, numClasses=2, numFeatures=1


                                                                                

model 1 build time 7.84 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f224ba82042a, depth=14, numNodes=121, numClasses=2, numFeatures=1


                                                                                

model 2 build time 7.23 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7ed634b2f80b, depth=14, numNodes=121, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=8; n_part_act=8; df_count=1260685 ===

model 0 build time 7.39 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d9b622e553eb, depth=14, numNodes=93, numClasses=2, numFeatures=1
model 1 build time 6.3 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_49f89af4dc5c, depth=14, numNodes=93, numClasses=2, numFeatures=1
model 2 build time 6.09 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_72ec8500efdf, depth=14, numNodes=93, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=16; n_part_act=16; df_count=1260685 ===

model 0 build time 5.93 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_68df34e5c2f1, depth=14, numNodes=95, numClasses=2, numFeatures=1




model 1 build time 5.2 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ebbf56ae208a, depth=14, numNodes=95, numClasses=2, numFeatures=1
model 2 build time 5.0 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_687e830852cd, depth=14, numNodes=95, numClasses=2, numFeatures=1


                                                                                

=== size_mult=5;         n_part_req=24; n_part_act=24; df_count=1260685 ===





model 0 build time 6.45 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_82ef88f39fe8, depth=14, numNodes=75, numClasses=2, numFeatures=1




model 1 build time 6.26 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e91f44d7fded, depth=14, numNodes=75, numClasses=2, numFeatures=1


                                                                                

model 2 build time 6.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d12b9158d484, depth=14, numNodes=75, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=0; n_part_act=320; df_count=2521370 ===



                                                                                

model 0 build time 50.37 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a487b8caa604, depth=13, numNodes=85, numClasses=2, numFeatures=1


                                                                                

model 1 build time 47.66 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1c661ec0773c, depth=13, numNodes=85, numClasses=2, numFeatures=1


                                                                                

model 2 build time 47.09 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6db582ece19a, depth=13, numNodes=85, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=2; n_part_act=2; df_count=2521370 ===



                                                                                

model 0 build time 20.34 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_131c157dcbcb, depth=14, numNodes=93, numClasses=2, numFeatures=1


                                                                                

model 1 build time 19.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_dc0433ee7d2f, depth=14, numNodes=93, numClasses=2, numFeatures=1


                                                                                

model 2 build time 20.33 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_496c3110b5a0, depth=14, numNodes=93, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=4; n_part_act=4; df_count=2521370 ===



                                                                                

model 0 build time 13.54 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f70cf21a13c2, depth=14, numNodes=69, numClasses=2, numFeatures=1


                                                                                

model 1 build time 11.77 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_830cfda37bc0, depth=14, numNodes=69, numClasses=2, numFeatures=1


                                                                                

model 2 build time 11.49 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3597af056d42, depth=14, numNodes=69, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=6; n_part_act=6; df_count=2521370 ===



                                                                                

model 0 build time 8.77 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_20acc1f25af6, depth=14, numNodes=71, numClasses=2, numFeatures=1


                                                                                

model 1 build time 8.86 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6385367c9cf9, depth=14, numNodes=71, numClasses=2, numFeatures=1


                                                                                

model 2 build time 8.69 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a0af3358b51f, depth=14, numNodes=71, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=8; n_part_act=8; df_count=2521370 ===



                                                                                

model 0 build time 9.33 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_b78a4405831e, depth=14, numNodes=81, numClasses=2, numFeatures=1


                                                                                

model 1 build time 8.69 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_9e758cc055c9, depth=14, numNodes=81, numClasses=2, numFeatures=1


                                                                                

model 2 build time 8.58 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2cc9d7900de8, depth=14, numNodes=81, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=16; n_part_act=16; df_count=2521370 ===



                                                                                

model 0 build time 7.32 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6dd3556f78a7, depth=14, numNodes=61, numClasses=2, numFeatures=1




model 1 build time 6.12 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_68f3ce24055c, depth=14, numNodes=61, numClasses=2, numFeatures=1




model 2 build time 6.24 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_8b5e234f2b22, depth=14, numNodes=61, numClasses=2, numFeatures=1


                                                                                

=== size_mult=10;         n_part_req=24; n_part_act=24; df_count=2521370 ===



                                                                                

model 0 build time 7.93 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_49aaa931d1d6, depth=14, numNodes=77, numClasses=2, numFeatures=1


                                                                                

model 1 build time 7.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1ce52acebdef, depth=14, numNodes=77, numClasses=2, numFeatures=1


                                                                                

model 2 build time 7.56 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_33a55136607f, depth=14, numNodes=77, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=0; n_part_act=480; df_count=3782055 ===



                                                                                

model 0 build time 61.95 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_00b20cc5db8e, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 1 build time 60.96 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_39fcf70cb35d, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 2 build time 60.59 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_cc0af3edb8f7, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=2; n_part_act=2; df_count=3782055 ===



                                                                                

model 0 build time 24.56 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a0d0e8dfa4c7, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

model 1 build time 25.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f069af920665, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

model 2 build time 23.55 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_813d56e92d33, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=4; n_part_act=4; df_count=3782055 ===



                                                                                

model 0 build time 15.4 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a922f45d5a75, depth=11, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 1 build time 14.1 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_691d52cebc8c, depth=11, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 2 build time 14.44 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1e027be4d218, depth=11, numNodes=39, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=6; n_part_act=6; df_count=3782055 ===



                                                                                

model 0 build time 12.62 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d5b3cb3c87b7, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

model 1 build time 11.19 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f8cdeef59981, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

model 2 build time 11.57 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0747248d7269, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=8; n_part_act=8; df_count=3782055 ===



                                                                                

model 0 build time 9.71 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_bcb8054c00b6, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

model 1 build time 10.34 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_da70914ec7fb, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=16; n_part_act=16; df_count=3782055 ===



                                                                                

model 0 build time 8.35 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_82e08805d464, depth=12, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 1 build time 7.5 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_895109f563c6, depth=12, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 2 build time 7.41 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_41a3f8430627, depth=12, numNodes=53, numClasses=2, numFeatures=1


                                                                                

=== size_mult=15;         n_part_req=24; n_part_act=24; df_count=3782055 ===



                                                                                

model 0 build time 9.18 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d2272f4dafa4, depth=13, numNodes=47, numClasses=2, numFeatures=1


                                                                                

model 1 build time 8.25 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_cf64076d6bfc, depth=13, numNodes=47, numClasses=2, numFeatures=1


                                                                                

model 2 build time 8.39 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_110a091c48dc, depth=13, numNodes=47, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=0; n_part_act=640; df_count=5042740 ===



                                                                                

model 0 build time 84.27 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_b1d92bcddbbf, depth=14, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 1 build time 84.47 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_80e8d5612789, depth=14, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 2 build time 83.4 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_38a821339a34, depth=14, numNodes=63, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=2; n_part_act=2; df_count=5042740 ===



                                                                                

model 0 build time 30.16 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_302286086e7c, depth=14, numNodes=61, numClasses=2, numFeatures=1


                                                                                

model 1 build time 27.3 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_addb802e44eb, depth=14, numNodes=61, numClasses=2, numFeatures=1


                                                                                

model 2 build time 30.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0ec66b8ee472, depth=14, numNodes=61, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=4; n_part_act=4; df_count=5042740 ===



                                                                                

model 0 build time 17.06 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2bdab6dbd03d, depth=13, numNodes=59, numClasses=2, numFeatures=1


                                                                                

model 1 build time 16.81 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7c224b3eaa2d, depth=13, numNodes=59, numClasses=2, numFeatures=1


                                                                                

model 2 build time 17.75 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_48f402711372, depth=13, numNodes=59, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=6; n_part_act=6; df_count=5042740 ===



                                                                                

model 0 build time 14.25 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_fffef7f3b889, depth=11, numNodes=47, numClasses=2, numFeatures=1


                                                                                

model 1 build time 13.05 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_b8883709bc58, depth=11, numNodes=47, numClasses=2, numFeatures=1


                                                                                

model 2 build time 13.26 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6d02554c12ea, depth=11, numNodes=47, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=8; n_part_act=8; df_count=5042740 ===



                                                                                

model 0 build time 10.97 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_9b4fb1fb9dc1, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 1 build time 12.07 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ceb923657f70, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

model 2 build time 12.36 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_84b61a888bef, depth=13, numNodes=63, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=16; n_part_act=16; df_count=5042740 ===



                                                                                

model 0 build time 8.38 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_1d5bf3441c7b, depth=14, numNodes=65, numClasses=2, numFeatures=1
model 1 build time 7.36 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_81f56352a553, depth=14, numNodes=65, numClasses=2, numFeatures=1




model 2 build time 7.25 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2370867a4e06, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

=== size_mult=20;         n_part_req=24; n_part_act=24; df_count=5042740 ===



                                                                                

model 0 build time 9.08 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_5ecf1f008475, depth=10, numNodes=37, numClasses=2, numFeatures=1
model 1 build time 7.33 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3d9a114692e8, depth=10, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 2 build time 8.01 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_dc4c35ab4785, depth=10, numNodes=37, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=0; n_part_act=960; df_count=7564110 ===



                                                                                

model 0 build time 135.7 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_96ed75c38314, depth=14, numNodes=57, numClasses=2, numFeatures=1


                                                                                

model 1 build time 135.09 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_446b6da60272, depth=14, numNodes=57, numClasses=2, numFeatures=1


                                                                                

model 2 build time 134.91 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_cd22fc66818f, depth=14, numNodes=57, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=2; n_part_act=2; df_count=7564110 ===



                                                                                

model 0 build time 39.21 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c28771dabb43, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 1 build time 39.1 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c3e8ddae1d51, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 2 build time 38.66 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0e79299d3524, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=4; n_part_act=4; df_count=7564110 ===



                                                                                

model 0 build time 21.74 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3157968b019b, depth=10, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 1 build time 23.54 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ef6cc7a59163, depth=10, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 2 build time 21.47 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_32837ff536a1, depth=10, numNodes=37, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=6; n_part_act=6; df_count=7564110 ===



                                                                                

model 0 build time 16.39 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_6e98f275d821, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 1 build time 15.41 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_820744244653, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 2 build time 17.08 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0c25e6edf07d, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=8; n_part_act=8; df_count=7564110 ===



                                                                                

model 0 build time 15.8 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e58028374ebd, depth=11, numNodes=45, numClasses=2, numFeatures=1


                                                                                

model 1 build time 13.76 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_60160d7e325e, depth=11, numNodes=45, numClasses=2, numFeatures=1


                                                                                

model 2 build time 12.34 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e4e367f146c6, depth=11, numNodes=45, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=16; n_part_act=16; df_count=7564110 ===



                                                                                

model 0 build time 10.66 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_eab70422d66b, depth=13, numNodes=45, numClasses=2, numFeatures=1


                                                                                

model 1 build time 8.72 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_23cd6ba5a817, depth=13, numNodes=45, numClasses=2, numFeatures=1


                                                                                

model 2 build time 9.25 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_2fcf5144a1e3, depth=13, numNodes=45, numClasses=2, numFeatures=1


                                                                                

=== size_mult=30;         n_part_req=24; n_part_act=24; df_count=7564110 ===



                                                                                

model 0 build time 12.87 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_237b7c125f1b, depth=13, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 1 build time 11.52 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c2e50e7fcfd1, depth=13, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 2 build time 9.84 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d111b5c191b9, depth=13, numNodes=53, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=0; n_part_act=1600; df_count=12606850 ===



                                                                                

model 0 build time 228.39 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ea41703304c0, depth=10, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 1 build time 226.99 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_b83a45b44133, depth=10, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 2 build time 225.16 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_f1e08aa4c539, depth=10, numNodes=31, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=2; n_part_act=2; df_count=12606850 ===



                                                                                

model 0 build time 51.9 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_9ff4395a7bbb, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

model 1 build time 48.15 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_4ff5852172f5, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

model 2 build time 48.4 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e9d90d2d0f0a, depth=11, numNodes=43, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=4; n_part_act=4; df_count=12606850 ===



                                                                                

model 0 build time 32.59 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_3e4f147d6da1, depth=10, numNodes=35, numClasses=2, numFeatures=1


                                                                                

model 1 build time 26.64 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_fdfcf7115e22, depth=10, numNodes=35, numClasses=2, numFeatures=1


                                                                                

model 2 build time 28.32 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_52f689de2bbd, depth=10, numNodes=35, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=6; n_part_act=6; df_count=12606850 ===



                                                                                

model 0 build time 20.94 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d3d3215b880e, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

model 1 build time 21.89 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_5fce5b18a4a3, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

model 2 build time 21.06 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_88c857e58302, depth=14, numNodes=65, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=8; n_part_act=8; df_count=12606850 ===



                                                                                

model 0 build time 19.82 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_db76821c1e7d, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 1 build time 16.51 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0d583cda1045, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 2 build time 17.68 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_fa68015af861, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=16; n_part_act=16; df_count=12606850 ===



                                                                                

model 0 build time 14.36 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_0f685a8f4ef8, depth=14, numNodes=55, numClasses=2, numFeatures=1


                                                                                

model 1 build time 11.79 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_dc304827a6a3, depth=14, numNodes=55, numClasses=2, numFeatures=1


                                                                                

model 2 build time 11.97 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_4475a74009d9, depth=14, numNodes=55, numClasses=2, numFeatures=1


                                                                                

=== size_mult=50;         n_part_req=24; n_part_act=24; df_count=12606850 ===



                                                                                

model 0 build time 14.66 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c12dcf9d3d84, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 1 build time 13.99 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_a9506b4768c9, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 2 build time 13.29 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e2bbe2125031, depth=11, numNodes=37, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=0; n_part_act=3200; df_count=25213700 ===



21/11/23 09:36:20 ERROR org.apache.spark.scheduler.AsyncEventQueue: Dropping event from queue executorManagement. This likely means one of the listeners is too slow and cannot keep up with the rate at which tasks are being started by the scheduler.
21/11/23 09:36:20 WARN org.apache.spark.scheduler.AsyncEventQueue: Dropped 1 events from executorManagement since the application started.
                                                                                

model 0 build time 461.74 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e1d85ff96927, depth=11, numNodes=35, numClasses=2, numFeatures=1


21/11/23 09:44:11 WARN org.apache.spark.scheduler.AsyncEventQueue: Dropped 2531 events from executorManagement since Tue Nov 23 09:36:20 UTC 2021.
                                                                                

model 1 build time 465.06 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c5107584644e, depth=11, numNodes=35, numClasses=2, numFeatures=1


21/11/23 09:51:47 WARN org.apache.spark.scheduler.AsyncEventQueue: Dropped 1881 events from executorManagement since Tue Nov 23 09:44:11 UTC 2021.
                                                                                

model 2 build time 462.58 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_e53c5a9553fd, depth=11, numNodes=35, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=2; n_part_act=2; df_count=25213700 ===



                                                                                

model 0 build time 97.09 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_53629b6a2dfa, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 1 build time 88.23 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ac773995d299, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 2 build time 88.7 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_c1997d1c269f, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=4; n_part_act=4; df_count=25213700 ===



                                                                                

model 0 build time 63.05 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d937618ccb43, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 1 build time 54.49 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_8f241d5a4eae, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

model 2 build time 51.06 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_613602042f1b, depth=12, numNodes=39, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=6; n_part_act=6; df_count=25213700 ===



                                                                                

model 0 build time 41.72 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_ef9d9a0364ac, depth=14, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 1 build time 38.31 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d38e44987f0b, depth=14, numNodes=53, numClasses=2, numFeatures=1


                                                                                

model 2 build time 38.34 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_98524cc41f8a, depth=14, numNodes=53, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=8; n_part_act=8; df_count=25213700 ===



                                                                                

model 0 build time 36.03 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_66a181447e4d, depth=12, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 1 build time 33.09 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_b4c2c3f8f96c, depth=12, numNodes=37, numClasses=2, numFeatures=1


                                                                                

model 2 build time 31.32 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_7cd50b7933da, depth=12, numNodes=37, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=16; n_part_act=16; df_count=25213700 ===



                                                                                

model 0 build time 22.47 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_8e347603f2de, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 1 build time 22.3 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_695984648928, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

model 2 build time 20.46 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_96987f4ea181, depth=9, numNodes=31, numClasses=2, numFeatures=1


                                                                                

=== size_mult=100;         n_part_req=24; n_part_act=24; df_count=25213700 ===



                                                                                

model 0 build time 24.16 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_5760a8b20b79, depth=11, numNodes=41, numClasses=2, numFeatures=1


                                                                                

model 1 build time 24.86 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_39f92fdb9d1d, depth=11, numNodes=41, numClasses=2, numFeatures=1




model 2 build time 25.24 
 DecisionTreeClassificationModel: uid=DecisionTreeClassifier_d110b972cb23, depth=11, numNodes=41, numClasses=2, numFeatures=1


                                                                                

# Results

In [5]:
di = result
df_res_time = pd.DataFrame()
df_res_nodes = pd.DataFrame()
for p1 in di.keys():
    for p2 in di[p1].keys():
        df_res_time.loc[p1, p2] = np.round(np.mean(di[p1][p2][1]), 1)
        m_tmp = di[p1][p2][0]
        n_nodes = []
        for i,_ in enumerate(m_tmp):
            tmp = f'{m_tmp[i]}'.split(' ')
            print(i, tmp)
            n_nodes.append([int(x.split('=')[1][:-1]) for x in tmp if x[:4]== 'numN'][0])
        df_res_nodes.loc[p1, p2] = np.round(np.mean(n_nodes), 1)

df_res_time.to_csv(f'gs://mas-a5-storage-1/notebooks/jupyter/obj/{fn}_t.csv')
df_res_nodes.to_csv(f'gs://mas-a5-storage-1/notebooks/jupyter/obj/{fn}_n.csv')

display(df_res_time)      
display(df_res_nodes)     

0 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_3c772b79f38d,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
1 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_2383f1a0e8b4,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
2 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_738bc0779bd7,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
0 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_bfafa45d2530,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
1 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_3cc53f0e6076,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
2 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_81648d65bb23,', 'depth=14,', 'numNodes=123,', 'numClasses=2,', 'numFeatures=1']
0 ['DecisionTreeClassificationModel:', 'uid=DecisionTreeClassifier_4137bf38d637,', 'depth=14,', 'numNodes=123,', 'numC

Unnamed: 0,0,2,4,6,8,16,24
1,8.2,6.7,7.9,9.1,10.2,15.6,20.8
2,14.3,8.9,9.8,5.4,6.4,17.4,27.8
5,45.5,13.8,10.5,8.0,6.6,5.4,6.3
10,48.4,20.0,12.3,8.8,8.9,6.6,7.6
15,61.2,24.5,14.6,11.8,9.8,7.8,8.6
20,84.0,29.3,17.2,13.5,11.8,7.7,8.1
30,135.2,39.0,22.2,16.3,14.0,9.5,11.4
50,226.8,49.5,29.2,21.3,18.0,12.7,14.0
100,463.1,91.3,56.2,39.5,33.5,21.7,24.8


Unnamed: 0,0,2,4,6,8,16,24
1,123.0,123.0,123.0,123.0,123.0,123.0,123.0
2,149.0,159.0,143.0,97.0,121.0,127.0,131.0
5,91.0,97.0,123.0,121.0,93.0,95.0,75.0
10,85.0,93.0,69.0,71.0,81.0,61.0,77.0
15,63.0,43.0,39.0,65.0,43.0,53.0,47.0
20,63.0,61.0,59.0,47.0,63.0,65.0,37.0
30,57.0,31.0,37.0,39.0,45.0,45.0,53.0
50,31.0,43.0,35.0,65.0,37.0,55.0,37.0
100,35.0,39.0,39.0,53.0,37.0,31.0,41.0


In [6]:
fn = '4cpu_by_8n__4m'

tmp = pd.read_csv(f'gs://mas-a5-storage-1/notebooks/jupyter/obj/{fn}_t.csv')
tmp

Unnamed: 0.1,Unnamed: 0,0,2,4,6,8,16,24
0,1,8.2,6.7,7.9,9.1,10.2,15.6,20.8
1,2,14.3,8.9,9.8,5.4,6.4,17.4,27.8
2,5,45.5,13.8,10.5,8.0,6.6,5.4,6.3
3,10,48.4,20.0,12.3,8.8,8.9,6.6,7.6
4,15,61.2,24.5,14.6,11.8,9.8,7.8,8.6
5,20,84.0,29.3,17.2,13.5,11.8,7.7,8.1
6,30,135.2,39.0,22.2,16.3,14.0,9.5,11.4
7,50,226.8,49.5,29.2,21.3,18.0,12.7,14.0
8,100,463.1,91.3,56.2,39.5,33.5,21.7,24.8


In [7]:
fn = '4cpu_by_8n__4m'
tmp = pd.read_csv(f'gs://mas-a5-storage-1/notebooks/jupyter/obj/{fn}_n.csv')
tmp

Unnamed: 0.1,Unnamed: 0,0,2,4,6,8,16,24
0,1,123.0,123.0,123.0,123.0,123.0,123.0,123.0
1,2,149.0,159.0,143.0,97.0,121.0,127.0,131.0
2,5,91.0,97.0,123.0,121.0,93.0,95.0,75.0
3,10,85.0,93.0,69.0,71.0,81.0,61.0,77.0
4,15,63.0,43.0,39.0,65.0,43.0,53.0,47.0
5,20,63.0,61.0,59.0,47.0,63.0,65.0,37.0
6,30,57.0,31.0,37.0,39.0,45.0,45.0,53.0
7,50,31.0,43.0,35.0,65.0,37.0,55.0,37.0
8,100,35.0,39.0,39.0,53.0,37.0,31.0,41.0
