# Machine Learning Isaac

In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from fastcore.basics import Path, AttrDict
import utils_isaac as utils
import numpy as np
import pickle
from datetime import datetime
from catboost import CatBoostRegressor, CatBoostClassifier
from tqdm import tqdm

# This is used to import the evaluation script, not needed for training
import sys
sys.path.append('../') 
import evaluation

In [3]:
import pickle

In [4]:
config = AttrDict(
    challenge_data_dir = Path('../../dataset/'),
    valid_ratio = 0.1,
    lag_steps = 6,
    tolerance= 6, # Default evaluation tolerance
)

In [5]:
# Define the list of feature columns
feature_cols = [
    "Eccentricity",
    "Semimajor Axis (m)",
    "Inclination (deg)",
    "RAAN (deg)",
    "Argument of Periapsis (deg)",
    "True Anomaly (deg)",
    "Latitude (deg)",
    "Longitude (deg)",
    "Altitude (m)",
    "X (m)",
    "Y (m)",
    "Z (m)",
    "Vx (m/s)",
    "Vy (m/s)",
    "Vz (m/s)"
]

In [6]:
len(feature_cols)

15

In [7]:
# from statsmodels.tsa.seasonal import seasonal_decompose
# def decompose(series,period):
#     decomposition = seasonal_decompose(series, model='additive', period=period) 
#     return decomposition.resid.ffill().bfill()

In [8]:
# Define the directory paths
train_data_dir = config.challenge_data_dir / "train"

# Load the ground truth data
ground_truth = pd.read_csv(config.challenge_data_dir / 'train_labels.csv')

# # Apply the function to the ground truth data
data, updated_feature_cols = utils.tabularize_data(train_data_dir,
                                                   feature_cols, 
                                                   ground_truth,
                                                   lag_steps=config.lag_steps,
                                                   add_heurestic=False,
                                                   nb_of_ex = 500)

data['EW'] = data['EW'].fillna('Nothing')
data['NS'] = data['NS'].fillna('Nothing')

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  added_data = merged_data.groupby('ObjectID')[factor].apply(lambda win: win.rolling(3*12, center=True, min_periods=0).max()) - \
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  merged_data.groupby('ObjectID')[factor].apply(
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  added_data = merged_data.groupby('ObjectID')[factor].apply(lambda win: win.rolling(3*12, center=True, min_periods=0).max()) - \
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  merged_data.groupby('ObjectID')[factor].apply(
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  added_data = merged_data.groupby('ObjectID')[factor].apply(lambda

In [8]:
#data[['ObjectID','Timestamp','EW_baseline_heuristic','NS_baseline_heuristic','EW_baseline_heuristic_ffill','NS_baseline_heuristic_ffill']].to_pickle('Data_With_Baseline.pkl')
#data.to_pickle('Data_With_Nothing.pkl')

In [33]:
data_baseline = pd.read_pickle('Data_With_Baseline.pkl')
#data = pd.read_pickle('Data_With_Nothing.pkl')
data = pd.merge(data,data_baseline,on=['ObjectID','Timestamp'],how='left')
data = pd.concat([data,
                  pd.get_dummies(data[['EW_baseline_heuristic']]),
                  pd.get_dummies(data[['NS_baseline_heuristic']]),
                  pd.get_dummies(data[['EW_baseline_heuristic_ffill']]),
                  pd.get_dummies(data[['NS_baseline_heuristic_ffill']])],axis=1)

In [36]:
331 - 299

32

In [46]:
len(data[['EW_baseline_heuristic']].value_counts())

8

In [47]:
len(data[['NS_baseline_heuristic']].value_counts())

7

In [48]:
8+7+7+6

28

In [35]:
data.shape

(1086326, 331)

In [10]:
del data_baseline

In [11]:
#data = data.loc[data.ObjectID.isin(list(data.ObjectID.unique())[:500])]

In [12]:
updated_feature_cols = list(data.columns)
updated_feature_cols.remove('TimeIndex')
updated_feature_cols.remove('Timestamp')
updated_feature_cols.remove('ObjectID')
updated_feature_cols.remove('EW')
updated_feature_cols.remove('NS')
updated_feature_cols.remove('EW_baseline_heuristic')
updated_feature_cols.remove('NS_baseline_heuristic')
updated_feature_cols.remove('EW_baseline_heuristic_ffill')
updated_feature_cols.remove('NS_baseline_heuristic_ffill')

In [13]:
# Create a validation set without mixing the ObjectIDs
object_ids = data['ObjectID'].unique()
train_ids, valid_ids = train_test_split(object_ids, 
                                        test_size=config.valid_ratio, 
                                        random_state=43)

train_data = data[data['ObjectID'].isin(train_ids)].copy()
valid_data = data[data['ObjectID'].isin(valid_ids)].copy()

ground_truth_train = ground_truth[ground_truth['ObjectID'].isin(train_ids)].copy()
ground_truth_valid = ground_truth[ground_truth['ObjectID'].isin(valid_ids)].copy()

# Count the number of objects in the training and validation sets
print('Number of objects in the training set:', len(train_data['ObjectID'].unique()))
print('Number of objects in the validation set:', len(valid_data['ObjectID'].unique()))

Number of objects in the training set: 450
Number of objects in the validation set: 50


Next we will make sure that there every label, both in the direction EW and NS,
is present both in the training and validation partitions

In [14]:
# Get the unique values of EW and NS in train and test data
train_EW = set(train_data['EW'].unique())
train_NS = set(train_data['NS'].unique())
valid_EW = set(valid_data['EW'].unique())
valid_NS = set(valid_data['NS'].unique())

# Get the values of EW and NS that are in test data but not in train data
missing_EW = valid_EW.difference(train_EW)
missing_NS = valid_NS.difference(train_NS)

# Check if all the values in EW are also present in NS
if not set(train_data['EW'].unique()).issubset(set(train_data['NS'].unique())):
    # Get the values of EW that are not present in NS
    missing_EW_NS = set(train_data['EW'].unique()).difference(
        set(train_data['NS'].unique())
    )
else:
    missing_EW_NS = None

# Print the missing values of EW and NS
print("Missing values of EW in test data:", missing_EW)
print("Missing values of NS in test data:", missing_NS)
print("Values of EW not present in NS:", missing_EW_NS)

Missing values of EW in test data: set()
Missing values of NS in test data: set()
Values of EW not present in NS: {'IK-EK', 'SS-EK', 'AD-NK'}


###  Train part

In [15]:
# Convert categorical data to numerical data
le_EW = LabelEncoder()
le_NS = LabelEncoder()

# Encode the 'EW' and 'NS' columns
train_data['EW_encoded'] = le_EW.fit_transform(train_data['EW'])
train_data['NS_encoded'] = le_NS.fit_transform(train_data['NS'])

In [16]:
# Define the Random Forest model for NS
model_NS_preprocess = CatBoostClassifier(n_estimators=100, random_state=42)
# Fit the model to the training data for NS
model_NS_preprocess.fit(train_data[updated_feature_cols], train_data['NS_encoded'])

Learning rate set to 0.5
0:	learn: 0.1718630	total: 2.92s	remaining: 4m 48s
1:	learn: 0.0995150	total: 4.96s	remaining: 4m 2s
2:	learn: 0.0599244	total: 7.63s	remaining: 4m 6s
3:	learn: 0.0369985	total: 9.38s	remaining: 3m 45s
4:	learn: 0.0235560	total: 11.4s	remaining: 3m 37s
5:	learn: 0.0153767	total: 12.9s	remaining: 3m 22s
6:	learn: 0.0106700	total: 14.6s	remaining: 3m 13s
7:	learn: 0.0076619	total: 16.5s	remaining: 3m 10s
8:	learn: 0.0059047	total: 18.4s	remaining: 3m 6s
9:	learn: 0.0048417	total: 20.2s	remaining: 3m 1s
10:	learn: 0.0041902	total: 21.8s	remaining: 2m 56s
11:	learn: 0.0038916	total: 23.4s	remaining: 2m 51s
12:	learn: 0.0034464	total: 25.2s	remaining: 2m 48s
13:	learn: 0.0032180	total: 26.6s	remaining: 2m 43s
14:	learn: 0.0030150	total: 28.3s	remaining: 2m 40s
15:	learn: 0.0029230	total: 30.1s	remaining: 2m 37s
16:	learn: 0.0028667	total: 31.6s	remaining: 2m 34s
17:	learn: 0.0028372	total: 33.1s	remaining: 2m 30s
18:	learn: 0.0027953	total: 34.4s	remaining: 2m 26s
1

<catboost.core.CatBoostClassifier at 0x1d331294d00>

In [17]:
added_proba_feature_NS = pd.DataFrame(model_NS_preprocess.predict_proba(train_data[model_NS_preprocess.feature_names_])).add_prefix('proba_feature_NS_')
added_proba_feature_NS.index = train_data.index
train_data = pd.concat([train_data,added_proba_feature_NS] ,axis=1)

added_proba_feature_NS = pd.DataFrame(model_NS_preprocess.predict_proba(valid_data[model_NS_preprocess.feature_names_])).add_prefix('proba_feature_NS_')
added_proba_feature_NS.index = valid_data.index
valid_data = pd.concat([valid_data,added_proba_feature_NS] ,axis=1)

In [18]:
# Define the Random Forest model for EW
model_EW = CatBoostClassifier(n_estimators=100, random_state=42)
# Fit the model to the training data for EW
model_EW.fit(train_data[updated_feature_cols+list(added_proba_feature_NS.columns)], train_data['EW_encoded'])

Learning rate set to 0.5
0:	learn: 0.0670141	total: 10s	remaining: 16m 34s
1:	learn: 0.0422621	total: 18.6s	remaining: 15m 11s
2:	learn: 0.0260920	total: 27.3s	remaining: 14m 42s
3:	learn: 0.0176083	total: 36s	remaining: 14m 24s
4:	learn: 0.0121551	total: 44.7s	remaining: 14m 9s
5:	learn: 0.0080600	total: 53.3s	remaining: 13m 55s
6:	learn: 0.0058825	total: 1m 1s	remaining: 13m 42s
7:	learn: 0.0046372	total: 1m 10s	remaining: 13m 30s
8:	learn: 0.0039259	total: 1m 18s	remaining: 13m 18s
9:	learn: 0.0034814	total: 1m 27s	remaining: 13m 7s
10:	learn: 0.0032246	total: 1m 36s	remaining: 12m 56s
11:	learn: 0.0030837	total: 1m 44s	remaining: 12m 47s
12:	learn: 0.0028501	total: 1m 53s	remaining: 12m 42s
13:	learn: 0.0027379	total: 2m 2s	remaining: 12m 32s
14:	learn: 0.0026105	total: 2m 11s	remaining: 12m 23s
15:	learn: 0.0025439	total: 2m 19s	remaining: 12m 14s
16:	learn: 0.0024002	total: 2m 28s	remaining: 12m 5s
17:	learn: 0.0023663	total: 2m 37s	remaining: 11m 56s
18:	learn: 0.0023272	total: 

<catboost.core.CatBoostClassifier at 0x1d32fb6afb0>

In [19]:
added_proba_feature_EW = pd.DataFrame( model_EW.predict_proba(train_data[model_EW.feature_names_])).add_prefix('proba_feature_EW_')
added_proba_feature_EW.index = train_data.index
train_data = pd.concat([train_data,added_proba_feature_EW] ,axis=1)

added_proba_feature_EW = pd.DataFrame( model_EW.predict_proba(valid_data[model_EW.feature_names_])).add_prefix('proba_feature_EW_')
added_proba_feature_EW.index = valid_data.index
valid_data = pd.concat([valid_data,added_proba_feature_EW] ,axis=1)

In [20]:
# Define the Random Forest model for NS
model_NS = CatBoostClassifier(n_estimators=100, random_state=42)
# Fit the model to the training data for NS
model_NS.fit(train_data[updated_feature_cols+list(added_proba_feature_EW.columns)], train_data['NS_encoded'])

Learning rate set to 0.5
0:	learn: 0.1718225	total: 2.71s	remaining: 4m 28s
1:	learn: 0.0994748	total: 4.57s	remaining: 3m 43s
2:	learn: 0.0594433	total: 6.58s	remaining: 3m 32s
3:	learn: 0.0366316	total: 8.68s	remaining: 3m 28s
4:	learn: 0.0231248	total: 11.1s	remaining: 3m 31s
5:	learn: 0.0151145	total: 12.5s	remaining: 3m 16s
6:	learn: 0.0102609	total: 14s	remaining: 3m 5s
7:	learn: 0.0073451	total: 16.2s	remaining: 3m 5s
8:	learn: 0.0057069	total: 18.2s	remaining: 3m 4s
9:	learn: 0.0046977	total: 19.5s	remaining: 2m 55s
10:	learn: 0.0039203	total: 21.3s	remaining: 2m 52s
11:	learn: 0.0034107	total: 22.9s	remaining: 2m 47s
12:	learn: 0.0030335	total: 24.7s	remaining: 2m 45s
13:	learn: 0.0028321	total: 26.3s	remaining: 2m 41s
14:	learn: 0.0026880	total: 27.6s	remaining: 2m 36s
15:	learn: 0.0026198	total: 29.5s	remaining: 2m 35s
16:	learn: 0.0025787	total: 30.8s	remaining: 2m 30s
17:	learn: 0.0024459	total: 32.3s	remaining: 2m 26s
18:	learn: 0.0023896	total: 33.6s	remaining: 2m 23s
19

<catboost.core.CatBoostClassifier at 0x1d331295060>

### Test part

In [21]:
# # Load the trained models (don't use the utils module, use pickle)
# model_EW = pickle.load(open( 'trained_model/model_EW.pkl', 'rb'))
# model_NS = pickle.load(open('trained_model/model_NS.pkl', 'rb'))
# model_NS_preprocess = pickle.load(
#     open( 'trained_model/model_NS_preprocess.pkl', 'rb'))
# le_EW = pickle.load(open( 'trained_model/le_EW.pkl', 'rb'))
# le_NS = pickle.load(open(  'trained_model/le_NS.pkl', 'rb'))

In [33]:
def do_prediction(model,data,thresh,thresh_add):
    def top_k(x,k):
        ind=np.argpartition(x,-1*k)[-1*k:]
        return ind[np.argsort(x[ind])]
    def top_k_values(x,k):
        ind=np.argpartition(x,-1*k)[-1*k:]
        return x[ind][np.argsort(x[ind])]
    
    pred_proba = pd.DataFrame(model.predict(data,prediction_type='Probability'))
    pred = pred_proba.idxmax(1)
    print('Num of ex to cut: ',sum(pred_proba.max(1)<thresh))
    nothing_index = pred.value_counts().index[0]
    pred.loc[pred_proba.max(1)<thresh] = nothing_index
    
    top_proba = pd.DataFrame(np.apply_along_axis(lambda x: top_k(x,2),1,pred_proba.to_numpy()))
    top_proba_values = pd.DataFrame(np.apply_along_axis(lambda x: top_k_values(x,2),1,pred_proba.to_numpy()))
    # print(top_proba)
    # print(pred_proba*100)
    # print(top_proba_values*100)
    compt=0
    for i in range(len(top_proba)):
        if top_proba.iloc[i,1] == nothing_index:
            if top_proba_values.iloc[i,0]>thresh_add:
                compt +=1
                pred.iloc[i] = top_proba.iloc[i,0]
    print('Num of ex to add: ',compt)
    pred = pred.to_numpy().reshape(-1,1)
    return pred

In [None]:
#pd.Series(model_EW.feature_importances_,index=model_EW.feature_names_).sort_values().tail(50)

In [None]:
# model_NS = pickle.load(open('trained_model/model_NS_small.pkl','rb'))
# model_EW = pickle.load(open('trained_model/model_EW_small.pkl','rb'))
# from sklearn.metrics import fbeta_score, precision_score, recall_score
# valid_data['EW_encoded'] = le_EW.transform(valid_data['EW'])
# valid_data['NS_encoded'] = le_NS.transform(valid_data['NS'])
# recall_score(valid_data['NS_encoded'],pd.DataFrame(model_NS.predict(valid_data[updated_feature_cols]))[0],average='macro')
# recall_score(valid_data['EW_encoded'].to_numpy().reshape(-1,1),model_EW.predict(valid_data[updated_feature_cols]),average='macro')

### Score Train

In [18]:
threshold_ew = 0.1
threshold_ns = 0.1
tresh_add_ew = 0.03
tresh_add_ns = 0.03

In [23]:
# for threshold_ew in [0.3,0.4,0.5,0.6,0.7]:
#     for threshold_ns in [0.3,0.4,0.5,0.6,0.7]:
        # Make predictions on the training data for EW
train_data['Predicted_EW'] = le_EW.inverse_transform(
    #model_EW.predict(train_data[updated_feature_cols])
    do_prediction(model_EW,train_data[model_EW.feature_names_],threshold_ew,tresh_add_ew)
)

# Make predictions on the validation data for NS
train_data['Predicted_NS'] = le_NS.inverse_transform(
    #model_NS.predict(train_data[updated_feature_cols])
    do_prediction(model_NS,train_data[model_NS.feature_names_],threshold_ns,tresh_add_ns)
)

train_data['Predicted_EW'] = train_data['Predicted_EW'].mask(train_data['Predicted_EW']=='Nothing').ffill()
train_data['Predicted_NS'] = train_data['Predicted_NS'].mask(train_data['Predicted_NS']=='Nothing').ffill() 

# # Print the first few rows of the test data with predictions for both EW and NS
# train_data[['TimeIndex', 'ObjectID', 'EW', 
#             'Predicted_EW', 'NS', 'Predicted_NS']].groupby('ObjectID').head(3)

train_results = utils.convert_classifier_output(train_data)
train_results.loc[train_results.TimeIndex==0,'Node'] = 'SS'
evaluator_train = evaluation.NodeDetectionEvaluator(ground_truth_train, train_results, 
                                              tolerance=config.tolerance)
precision, recall, f2, rmse = evaluator_train.score()
print('---------'*4)
print(threshold_ew,threshold_ns)
print(f'Precision for the train set: {precision:.2f}')
print(f'Recall for the train set: {recall:.2f}')
print(f'F2 for the train set: {f2:.2f}')
print(f'RMSE for the train set: {rmse:.2f}')

Num of ex to cut 0
238


  y = column_or_1d(y, warn=True)


Num of ex to cut 0
812


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


------------------------------------
0.1 0.1
Precision for the train set: 0.86
Recall for the train set: 0.90
F2 for the train set: 0.89
RMSE for the train set: 0.21


In [None]:
# ground_truth_train.loc[ground_truth_train.ObjectID == 141]
# train_results.loc[train_results.ObjectID == 141]

###  Score test

In [44]:
threshold_ew = 0.1
threshold_ns = 0.1
tresh_add_ew = 0.20
tresh_add_ns = 0.30

In [47]:
for i in range(0,40,5):
    print(i)

0
5
10
15
20
25
30
35


In [51]:
res = {}
for tresh_add_ew in range(1,40,5):
    tresh_add_ew = tresh_add_ew/100
    for tresh_add_ns in range(1,40,5):
        tresh_add_ns = tresh_add_ns/100
        print('---------------'*5)
        print(tresh_add_ew,tresh_add_ns)
        # Make predictions on the validation data for EW
        valid_data['Predicted_EW'] = le_EW.inverse_transform(
            #model_EW.predict(valid_data[updated_feature_cols])
            do_prediction(model_EW,valid_data[updated_feature_cols+list(added_proba_feature_NS.columns)],threshold_ew,tresh_add_ew)
        )

        # Make predictions on the validation data for NS
        valid_data['Predicted_NS'] = le_NS.inverse_transform(
            #model_NS.predict(valid_data[updated_feature_cols])
            do_prediction(model_NS,valid_data[updated_feature_cols+list(added_proba_feature_EW.columns)],threshold_ns,tresh_add_ns)
        )

        valid_data['Predicted_EW'] = valid_data['Predicted_EW'].mask(valid_data['Predicted_EW']=='Nothing').ffill()
        valid_data['Predicted_NS'] = valid_data['Predicted_NS'].mask(valid_data['Predicted_NS']=='Nothing').ffill()

        valid_results = utils.convert_classifier_output(valid_data)
        valid_results.loc[valid_results.TimeIndex==0,'Node'] = 'SS'

        evaluator_valid = evaluation.NodeDetectionEvaluator(ground_truth_valid, 
                                                      valid_results,
                                                      tolerance=config.tolerance)
        precision, recall, f2, rmse = evaluator_valid.score()
        res[str(tresh_add_ns) + ' ' + str(tresh_add_ew)] = precision, recall, f2, rmse
        print(f'Precision for the validation set: {precision:.2f}')
        print(f'Recall for the validation set: {recall:.2f}')
        print(f'F2 for the validation set: {f2:.2f}')
        print(f'RMSE for the validation set: {rmse:.2f}')

---------------------------------------------------------------------------
0.01 0.01
Num of ex to cut:  0
Num of ex to add:  75
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.73
Recall for the validation set: 0.85
F2 for the validation set: 0.83
RMSE for the validation set: 0.27
---------------------------------------------------------------------------
0.01 0.06
Num of ex to cut:  0
Num of ex to add:  75


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.87
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.26
---------------------------------------------------------------------------
0.01 0.11
Num of ex to cut:  0
Num of ex to add:  75


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.87
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.27
---------------------------------------------------------------------------
0.01 0.16
Num of ex to cut:  0
Num of ex to add:  75


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.89
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.27
---------------------------------------------------------------------------
0.01 0.21
Num of ex to cut:  0
Num of ex to add:  75


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.89
Recall for the validation set: 0.87
F2 for the validation set: 0.87
RMSE for the validation set: 0.26
---------------------------------------------------------------------------
0.01 0.26
Num of ex to cut:  0
Num of ex to add:  75
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.90
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.26
---------------------------------------------------------------------------
0.01 0.31
Num of ex to cut:  0
Num of ex to add:  75
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.26
---------------------------------------------------------------------------
0.01 0.36
Num of ex to cut:  0
Num of ex to add:  75


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.26
---------------------------------------------------------------------------
0.06 0.01
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.76
Recall for the validation set: 0.85
F2 for the validation set: 0.83
RMSE for the validation set: 0.19
---------------------------------------------------------------------------
0.06 0.06
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.90
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.18
---------------------------------------------------------------------------
0.06 0.11
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.19
---------------------------------------------------------------------------
0.06 0.16
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.19
---------------------------------------------------------------------------
0.06 0.21
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.18
---------------------------------------------------------------------------
0.06 0.26
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.18
---------------------------------------------------------------------------
0.06 0.31
Num of ex to cut:  0
Num of ex to add:  31


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.86
F2 for the validation set: 0.88
RMSE for the validation set: 0.18
---------------------------------------------------------------------------
0.06 0.36
Num of ex to cut:  0
Num of ex to add:  31
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.18
---------------------------------------------------------------------------
0.11 0.01
Num of ex to cut:  0
Num of ex to add:  24
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.76
Recall for the validation set: 0.85
F2 for the validation set: 0.83
RMSE for the validation set: 0.16
---------------------------------------------------------------------------
0.11 0.06
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.14
---------------------------------------------------------------------------
0.11 0.11
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.88
F2 for the validation set: 0.88
RMSE for the validation set: 0.15
---------------------------------------------------------------------------
0.11 0.16
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.15
---------------------------------------------------------------------------
0.11 0.21
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.14
---------------------------------------------------------------------------
0.11 0.26
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.14
---------------------------------------------------------------------------
0.11 0.31
Num of ex to cut:  0
Num of ex to add:  24


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.86
F2 for the validation set: 0.88
RMSE for the validation set: 0.14
---------------------------------------------------------------------------
0.11 0.36
Num of ex to cut:  0
Num of ex to add:  24
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.14
---------------------------------------------------------------------------
0.16 0.01
Num of ex to cut:  0
Num of ex to add:  17
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.76
Recall for the validation set: 0.84
F2 for the validation set: 0.82
RMSE for the validation set: 0.13
---------------------------------------------------------------------------
0.16 0.06
Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.16 0.11
Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.13
---------------------------------------------------------------------------
0.16 0.16
Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.13
---------------------------------------------------------------------------
0.16 0.21
Num of ex to cut:  0
Num of ex to add:  17
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.16 0.26
Num of ex to cut:  0
Num of ex to add:  17
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.16 0.31
Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.16 0.36
Num of ex to cut:  0
Num of ex to add:  17
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.21 0.01
Num of ex to cut:  0
Num of ex to add:  12


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.77
Recall for the validation set: 0.84
F2 for the validation set: 0.82
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.21 0.06
Num of ex to cut:  0
Num of ex to add:  12


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.21 0.11
Num of ex to cut:  0
Num of ex to add:  12
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.21 0.16
Num of ex to cut:  0
Num of ex to add:  12
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.11
---------------------------------------------------------------------------
0.21 0.21
Num of ex to cut:  0
Num of ex to add:  12


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.21 0.26
Num of ex to cut:  0
Num of ex to add:  12
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.21 0.31
Num of ex to cut:  0
Num of ex to add:  12


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.21 0.36
Num of ex to cut:  0
Num of ex to add:  12


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.26 0.01
Num of ex to cut:  0
Num of ex to add:  9
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.77
Recall for the validation set: 0.84
F2 for the validation set: 0.82
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.26 0.06
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.26 0.11
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.26 0.16
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.09
---------------------------------------------------------------------------
0.26 0.21
Num of ex to cut:  0
Num of ex to add:  9
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.26 0.26
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.26 0.31
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.26 0.36
Num of ex to cut:  0
Num of ex to add:  9


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.31 0.01
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.76
Recall for the validation set: 0.83
F2 for the validation set: 0.82
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.31 0.06
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.31 0.11
Num of ex to cut:  0
Num of ex to add:  5
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.31 0.16
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.31 0.21
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.85
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.31 0.26
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.85
F2 for the validation set: 0.87
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.31 0.31
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.31 0.36
Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.36 0.01
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  259


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.76
Recall for the validation set: 0.83
F2 for the validation set: 0.82
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.36 0.06
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  38


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.91
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.36 0.11
Num of ex to cut:  0
Num of ex to add:  4
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  27


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.92
Recall for the validation set: 0.86
F2 for the validation set: 0.87
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.36 0.16
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  17


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.93
Recall for the validation set: 0.85
F2 for the validation set: 0.86
RMSE for the validation set: 0.06
---------------------------------------------------------------------------
0.36 0.21
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  14


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.94
Recall for the validation set: 0.85
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.36 0.26
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  10


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.95
Recall for the validation set: 0.85
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.36 0.31
Num of ex to cut:  0
Num of ex to add:  4


  y = column_or_1d(y, warn=True)


Num of ex to cut:  0
Num of ex to add:  5


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.00
---------------------------------------------------------------------------
0.36 0.36
Num of ex to cut:  0
Num of ex to add:  4
Num of ex to cut:  0


  y = column_or_1d(y, warn=True)


Num of ex to add:  4


  y = column_or_1d(y, warn=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  keep = groups[['Node', 'Type']].apply(


Precision for the validation set: 0.96
Recall for the validation set: 0.84
F2 for the validation set: 0.86
RMSE for the validation set: 0.00


In [82]:
a

Unnamed: 0,index,0,tresh_add_ns,tresh_add_ew
0,0.01 0.01,0.732143,10000,1
1,0.06 0.01,0.867797,60000,1
2,0.11 0.01,0.873720,110000,1
3,0.16 0.01,0.885017,160000,1
4,0.21 0.01,0.887719,210000,1
...,...,...,...,...
59,0.16 0.36,0.932075,160000,36
60,0.21 0.36,0.935361,210000,36
61,0.26 0.36,0.949807,260000,36
62,0.31 0.36,0.956863,310000,36


In [88]:
pd.DataFrame(res)

Unnamed: 0,0.01 0.01,0.06 0.01,0.11 0.01,0.16 0.01,0.21 0.01,0.26 0.01,0.31 0.01,0.36 0.01,0.01 0.06,0.06 0.06,...,0.31 0.31,0.36 0.31,0.01 0.36,0.06 0.36,0.11 0.36,0.16 0.36,0.21 0.36,0.26 0.36,0.31 0.36,0.36 0.36
0,0.732143,0.867797,0.87372,0.885017,0.887719,0.900356,0.906137,0.905797,0.759259,0.904594,...,0.957031,0.956863,0.761146,0.912088,0.918819,0.932075,0.935361,0.949807,0.956863,0.956693
1,0.854167,0.879725,0.879725,0.872852,0.869416,0.869416,0.862543,0.859107,0.851211,0.876712,...,0.841924,0.838488,0.829861,0.85567,0.85567,0.848797,0.845361,0.845361,0.838488,0.835052
2,0.826613,0.877313,0.878518,0.875258,0.873016,0.875433,0.870923,0.868056,0.831081,0.88215,...,0.862676,0.85976,0.815143,0.866388,0.867596,0.864241,0.861948,0.864371,0.85976,0.856841
3,0.270501,0.257694,0.265165,0.266207,0.259217,0.259217,0.260248,0.260768,0.191273,0.176777,...,0.0,0.0,0.064685,0.0,0.063372,0.063628,0.0,0.0,0.0,0.0


In [96]:
a = pd.DataFrame(res).loc[1].reset_index()
a[['tresh_add_ns','tresh_add_ew']] = ((a['index'].str.split(' ').apply(pd.Series)))
a['tresh_add_ns']=(a['tresh_add_ns'].apply(float)*100).apply(int)
a['tresh_add_ew']=(a['tresh_add_ew'].apply(float)*100).apply(int)

In [93]:
a

Unnamed: 0,index,2,tresh_add_ns,tresh_add_ew
0,0.01 0.01,0.826613,1,1
1,0.06 0.01,0.877313,6,1
2,0.11 0.01,0.878518,11,1
3,0.16 0.01,0.875258,16,1
4,0.21 0.01,0.873016,21,1
...,...,...,...,...
59,0.16 0.36,0.864241,16,36
60,0.21 0.36,0.861948,21,36
61,0.26 0.36,0.864371,26,36
62,0.31 0.36,0.859760,31,36


In [97]:
a.pivot_table(index='tresh_add_ns',columns='tresh_add_ew',values=1)*100

tresh_add_ew,1,6,11,16,21,26,31,36
tresh_add_ns,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,85.416667,85.121107,85.121107,84.083045,83.737024,83.737024,83.333333,82.986111
6,87.972509,87.671233,87.671233,86.643836,86.30137,86.30137,85.910653,85.56701
11,87.972509,87.671233,87.671233,86.643836,86.30137,86.30137,85.910653,85.56701
16,87.285223,86.986301,86.986301,85.958904,85.616438,85.616438,85.223368,84.879725
21,86.941581,86.643836,86.643836,85.616438,85.273973,85.273973,84.879725,84.536082
26,86.941581,86.643836,86.643836,85.616438,85.273973,85.273973,84.879725,84.536082
31,86.254296,85.958904,85.958904,84.931507,84.589041,84.589041,84.19244,83.848797
36,85.910653,85.616438,85.616438,84.589041,84.246575,84.246575,83.848797,83.505155


In [None]:
res

In [None]:
pd.DataFrame(index=[i.split(' ')[0] for i in res.keys()],columns=[i.split(' ')[1] for i in res.keys()])

['0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36',
 '0.01',
 '0.06',
 '0.11',
 '0.16',
 '0.21',
 '0.26',
 '0.31',
 '0.36']

In [None]:
Precision for the validation set: 0.96
Recall for the validation set: 0.82
F2 for the validation set: 0.84
RMSE for the validation set: 0.00

In [None]:
Precision for the validation set: 0.91
Recall for the validation set: 0.87
F2 for the validation set: 0.88
RMSE for the validation set: 0.14

The `NodeDetectionEvaluator` class in the evaluation module allows not only to
compute the general score for a given dataset, but get evaluations per object, and
even plots that show how the predictions look like in a timeline

In [None]:
# # Loop over the Object IDs in the training set and call the evaluation
# # function for each object and aggregate the results
# total_tp = 0
# total_fp = 0
# total_fn = 0
# for oid in train_data['ObjectID'].unique():
#     tp, fp, fn, gt_object, p_object = evaluator.evaluate(oid)
#     total_tp += tp
#     total_fp += fp
#     total_fn += fn

# print(f'Total true positives: {total_tp}')
# print(f'Total false positives: {total_fp}')
# print(f'Total false negatives: {total_fn}')

In [None]:
# Plot the evaluation timeline for a random ObjectID from the training set
evaluator.plot(13)

In [None]:
# Save the trained random forest models (and label encoders) to disk
# Create the folder trained_model if it doesn't exist
Path('trained_model').mkdir(exist_ok=True)
pickle.dump(model_EW, open('trained_model/model_EW_small.pkl', 'wb'))
pickle.dump(model_NS, open('trained_model/model_NS_small.pkl', 'wb'))
pickle.dump(le_EW, open('trained_model/le_EW_small.pkl', 'wb'))
pickle.dump(le_NS, open('trained_model/le_NS_small.pkl', 'wb'))