# Lab | Random Forests

For this lab, you will be using the CSV files provided in the files_for_lab folder. These are cleaned versions of the learningSet data from the Case Study 'Healthcare for All'.

Instructions

 - Apply the Random Forests algorithm but this time only by upscaling the data.
 - Use Feature Selections that you have learned in class to decide if you want to use all of the features (PCA, etc)
 - Discuss the output and its impact in the bussiness scenario. Is the cost of a false positive equals to the cost of the false negative? How would you change your algorithm or data in order to maximize the return of the bussiness?

## Import libraries

In [41]:
#Standard libraries for data analysis:
    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import norm, skew, shapiro
from scipy import stats
import statsmodels.api as sm
import re #regex

# sklearn modules for data preprocessing:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#sklearn modules for Model Selection:
from sklearn import svm, tree, linear_model, neighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import confusion_matrix
from sklearn.utils import resample
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2


#Standard libraries for data visualization:
import seaborn as sns
from scipy.stats import boxcox 
from matplotlib import pyplot
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import matplotlib 
%matplotlib inline
color = sns.color_palette()
import matplotlib.ticker as mtick
from pandas.plotting import scatter_matrix
from sklearn.metrics import roc_curve



from sklearn.feature_selection import VarianceThreshold

pd.set_option('display.max_columns', None)

## Load dataset

In [42]:
categorical = pd.read_csv('categorical.csv')
numerical = pd.read_csv('numerical.csv')
target = pd.read_csv('target.csv')


In [43]:
data = pd.concat([numerical, categorical, target], axis = 1)
data.shape

(95412, 339)

In [44]:
print(numerical.shape)
print(categorical.shape)

(95412, 315)
(95412, 22)


## Feature Selection

### 1. Variance Threshold Feature Selection

In [45]:
# Split categorical and numerical features

data_features = data.drop(['TARGET_B','TARGET_D'], axis = 1)
data_cat = data_features.select_dtypes(include = object)
data_num = data_features.select_dtypes(include = np.number)


In [46]:
# Scale to ensures that all numeric variables are on the same scale

scaler = MinMaxScaler()
numerical_scaled = scaler.fit_transform(data_num)
numerical_scaled = pd.DataFrame(numerical_scaled, columns = data_num.columns)
numerical_scaled.reset_index(drop = True, inplace = True)

In [47]:
# Encode to ensures that all variables are on the same scale

encoder = OneHotEncoder(drop='first').fit(data_cat)

cols = encoder.get_feature_names_out(input_features=data_cat.columns)

categorical_encode = pd.DataFrame(encoder.transform(data_cat).toarray(),columns=cols)

categorical_encode.reset_index(drop = True, inplace = True)


In [48]:
data_normalized = pd.concat([numerical_scaled, categorical_encode], axis = 1)

In [49]:
# use the estimator with a low threshold (minimum value of variance we want in out dataset)
threshold=0.02
selection = VarianceThreshold(threshold=0.02)

# Fit
selection = selection.fit(data_normalized)

In [50]:
# Subset the DataFrame
data_variance = selection.transform(data_normalized)
data_variance = pd.DataFrame(data_variance)


print(data_normalized.shape)
print(data_variance.shape)

(95412, 354)
(95412, 114)


In [51]:
selection.variances_ > threshold
selection.get_support()
var_list = list(selection.get_support())
#list(zip(numerical.columns, var_list))

In [52]:
drop_columns = [col[0] for col in zip(data_normalized.columns, var_list) if col[1] == False]
#drop_columns

we were able to drop 415 features from the dataset.

### 2. Test if the model improved by dropping so many features.

In [53]:
# X/y split

X = data.drop(['TARGET_B','TARGET_D'], axis = 1)
y = data['TARGET_B']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [54]:
display(X_train.shape)
display(X_test.shape)
display(y_train.shape)
display(y_test.shape) 


(76329, 337)

(19083, 337)

(76329,)

(19083,)

In [55]:
# Split categorical and numerical features

X_train_cat = X_train.select_dtypes(include = object)
X_train_num =X_train.select_dtypes(include = np.number)


X_test_cat = X_test.select_dtypes(include = object)
X_test_num =X_test.select_dtypes(include = np.number)

In [56]:
# MinMaxScale numerical features to ensure that all variables are on the same scale
scaler = MinMaxScaler().fit(X_train_num)

X_train_num_scaled = scaler.transform(X_train_num)
X_train_num_scaled = pd.DataFrame(X_train_num_scaled)
X_train_num_scaled.columns = X_train_num.columns


X_test_num_scaled = scaler.transform(X_test_num)
X_test_num_scaled = pd.DataFrame(X_test_num_scaled)
X_test_num_scaled.columns = X_test_num.columns

X_train_num_scaled.reset_index(drop = True, inplace = True)
X_test_num_scaled.reset_index(drop = True, inplace = True)

In [57]:
# Encode to ensure that all variables are on the same scale

encoder = OneHotEncoder(drop='first').fit(X_train_cat)

cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)

X_train_cat_encode = pd.DataFrame(encoder.transform(X_train_cat).toarray(),columns=cols)

X_train_cat_encode.reset_index(drop = True, inplace = True)

In [58]:
cols = encoder.get_feature_names_out(input_features=X_test_cat.columns)

X_test_cat_encode = pd.DataFrame(encoder.transform(X_test_cat).toarray(),columns=cols)

X_test_cat_encode.reset_index(drop = True, inplace = True)


In [59]:
# Put back together numerical and categorical variables after scaling / encoding

X_train_normalized = pd.concat([X_train_num_scaled,X_train_cat_encode], axis=1)

X_test_normalized = pd.concat([X_test_num_scaled,X_test_cat_encode], axis=1)

y_train.reset_index(drop = True, inplace = True)
y_test.reset_index(drop = True, inplace = True)

#### Full  dataset

In [60]:
# Init, fit, score
forest = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20)

_ = forest.fit(X_train_normalized, y_train)


# Training Score
print(f"Training Score: {forest.score(X_train_normalized, y_train)}")

print(f"Test Score: {forest.score(X_test_normalized, y_test)}")


Training Score: 0.9491674199845406
Test Score: 0.9495362364408112


In [61]:
y_pred = forest.predict(X_test_normalized)
display(confusion_matrix(y_test, y_pred))

array([[18120,     0],
       [  963,     0]])

In [62]:
pred = forest.predict(X_test_normalized)

print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))

precision:  0.0
recall:  0.0
f1:  0.0


  _warn_prf(average, modifier, msg_start, len(result))


 - Feature selected dataset 

In [63]:
# Keep only selected features

X_train_normalized_vt = X_train_normalized.drop(drop_columns, axis = 1)
X_test_normalized_vt = X_test_normalized.drop(drop_columns, axis = 1)

In [64]:
print(X_train_normalized.shape)
print(X_train_normalized_vt.shape)

(76329, 354)
(76329, 114)


In [65]:
# Init, fit, score
forest = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20)

_ = forest.fit(X_train_normalized_vt, y_train)


# Training Score
print(f"Training Score: {forest.score(X_train_normalized_vt, y_train)}")

print(f"Test Score: {forest.score(X_test_normalized_vt, y_test)}")


Training Score: 0.9491674199845406
Test Score: 0.9495362364408112


In [66]:
y_pred = forest.predict(X_test_normalized_vt)
display(confusion_matrix(y_test, y_pred))

array([[18120,     0],
       [  963,     0]])

# WHY IS THE RESULT THE SAME????

In [67]:
# Data is imbalanced!!

Questions:
 - do we treat imbalance after of before selecting features?
 - 

## Imbalanced data - 

In [68]:
data['TARGET_B'].value_counts()

0    90569
1     4843
Name: TARGET_B, dtype: int64

In [69]:
# upsampling is done on the train set, so we need to put together X and y

train_set = pd.concat([X_train_normalized,y_train], axis = 1)


In [70]:

no_donate = train_set[train_set['TARGET_B']==0]
yes_donate = train_set[train_set['TARGET_B']==1]



In [71]:
from sklearn.utils import resample

yes_donate_oversampled = resample(yes_donate, 
                                    replace=True,
                                    n_samples = len(no_donate),
                                    random_state=42)


display(no_donate.shape)
display(yes_donate_oversampled.shape)

(72449, 355)

(72449, 355)

In [72]:
# Make full df
oversampled_target = pd.concat([no_donate,yes_donate_oversampled], axis = 0)
oversampled_target


Unnamed: 0,TCODE,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MAXRAMNT,LASTGIFT,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,CLUSTER,DATASRCE,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,HOMEOWNR_U,GENDER_M,GENDER_other,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,TARGET_B
0,0.000014,0.793814,1.000000,1.000000,0.161826,0.000000,0.414141,0.454545,0.484848,0.080808,0.101010,0.034483,1.000000,0.015603,0.017504,0.021100,1.000000,0.0,0.000000,0.474747,0.535354,0.898990,0.010101,0.020202,0.020202,0.171717,0.045455,0.000000,0.010101,0.000000,0.000000,0.000000,0.0,0.092784,0.0,0.000000,0.081395,0.500000,0.571429,0.619048,0.511905,0.595238,0.630952,0.213333,0.414141,0.323232,0.262626,0.101010,0.161616,0.191919,0.121212,0.28,0.262295,0.131313,0.131313,0.191919,0.272727,0.202020,0.212121,0.373737,0.161616,0.353535,0.383838,0.373737,0.252525,0.131313,0.040404,0.010101,0.545455,0.111111,0.123288,0.262626,0.203077,0.294286,0.565657,0.565657,0.000000,0.434343,0.434343,0.404040,0.000000,0.000000,0.000000,0.210667,0.256833,0.307692,0.307692,0.555556,0.454545,0.969697,0.040404,0.060606,0.191919,0.565657,0.484848,0.151515,0.747475,0.262626,0.040404,0.02,0.030303,0.171717,0.313131,0.060606,0.133333,0.494949,0.303030,0.000000,0.010101,0.000000,0.181818,0.353535,0.626263,0.828283,0.989899,0.080808,0.060606,0.444444,0.705882,0.577778,0.196721,0.100,0.060606,0.373737,0.000000,0.030303,0.030303,0.383838,0.000000,0.232323,0.505051,0.757576,1.000000,0.021368,0.563748,0.896708,0.207333,0.459333,0.340000,0.461333,0.138503,0.202020,0.202020,0.151515,0.101010,0.070707,0.254545,0.14,0.049180,0.060606,0.101010,0.111111,0.151515,0.080808,0.111111,0.222222,0.16,0.060606,0.101010,0.303030,0.020202,0.666667,0.090909,0.404040,0.606061,0.060606,0.878788,0.040404,0.000000,0.000000,0.000000,0.000000,0.04,0.040404,0.030303,0.000000,0.000000,0.144444,0.184211,0.000000,0.404040,0.656566,0.787879,0.555556,0.727273,0.515152,0.474747,0.222222,1.000000,1.000000,0.000000,0.434343,0.161616,0.050505,0.090909,0.141414,0.000000,0.000000,0.030303,0.010101,0.040404,0.010101,0.040404,0.010101,0.010101,0.000000,0.010101,0.040404,0.010101,0.015625,0.040404,0.121212,0.080808,0.020202,0.030303,0.014925,0.111111,0.202020,0.313131,0.010101,0.080808,0.101010,0.030303,0.080808,0.585859,0.121212,0.010101,0.941176,0.020202,0.020202,0.202020,0.141414,0.081081,0.282828,0.313131,0.051546,0.212121,0.000000,0.152778,0.151515,0.000000,0.000000,0.000000,0.191919,0.414141,0.100000,0.454545,0.111111,0.484848,0.040404,0.000000,0.060606,0.064516,0.119565,0.06383,0.142857,0.030303,0.036364,0.014706,0.010101,0.000000,0.038462,0.000000,0.000000,0.0,0.101010,0.383838,0.787879,0.121212,0.020202,0.080808,0.888889,0.434343,0.161616,0.161290,0.500000,0.010101,0.030303,0.040404,0.151515,0.414141,0.595960,0.000000,0.000000,0.808081,0.030303,0.141414,0.020202,0.000000,0.010101,1.000000,0.000000,1.000000,1.000000,1.000000,0.428571,0.6,0.050505,0.111111,0.483333,0.352632,0.315789,0.187500,0.019848,0.038136,0.170732,0.005,0.007407,0.042,0.014368,0.018839,0.727356,1.0,0.000000,0.196721,0.403846,0.0,0.000000,0.307692,0.0,0.206186,0.000000,0.55,0.727273,0.818182,0.818182,0.0,0.454545,0.916667,0.727273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.000000,0.624862,0.666667,1.000000,0.000000,0.000000,0.222222,0.262626,0.373737,0.030303,0.020202,0.034483,1.000000,0.548505,0.358958,0.861283,1.000000,0.0,0.000000,0.515152,0.494949,0.585859,0.040404,0.000000,0.353535,0.060606,0.045455,0.027778,0.252525,0.031250,0.021739,0.044444,0.0,0.030928,0.0,0.000000,0.023256,0.452381,0.476190,0.511905,0.500000,0.535714,0.571429,0.133333,0.404040,0.363636,0.242424,0.111111,0.262626,0.202020,0.121212,0.22,0.163934,0.090909,0.181818,0.151515,0.282828,0.202020,0.202020,0.252525,0.161616,0.232323,0.595960,0.262626,0.141414,0.080808,0.040404,0.020202,0.303030,0.131313,0.109589,0.484848,0.130769,0.242857,0.020202,0.010101,0.030303,0.969697,0.939394,0.878788,0.040404,0.010101,0.030303,0.833333,0.800167,0.384615,0.461538,0.131313,0.878788,0.919192,0.090909,0.090909,0.090909,0.282828,0.212121,0.070707,0.464646,0.545455,0.030303,0.02,0.020202,0.393939,0.373737,0.131313,0.026667,0.424242,0.141414,0.000000,0.030303,0.012346,0.909091,0.939394,0.969697,0.979798,0.989899,0.828283,0.474747,0.060606,0.352941,0.322222,0.163934,0.125,0.212121,0.757576,0.000000,0.010101,0.060606,0.787879,0.000000,0.555556,0.757576,0.888889,0.949495,0.786325,0.099846,0.916005,0.172667,0.215333,0.238667,0.308667,0.133140,0.292929,0.191919,0.161616,0.141414,0.121212,0.072727,0.06,0.016393,0.030303,0.202020,0.171717,0.151515,0.151515,0.131313,0.060606,0.10,0.020202,0.050505,0.242424,0.090909,0.484848,0.151515,0.595960,0.414141,0.252525,0.232323,0.060606,0.383838,0.343434,0.042254,0.021277,0.04,0.272727,0.050505,0.000000,0.141414,0.244444,0.342105,0.058824,0.757576,0.676768,0.737374,0.616162,0.676768,0.585859,0.575758,0.414141,0.555556,0.878788,0.181818,0.181818,0.191919,0.030303,0.141414,0.171717,0.023256,0.036364,0.141414,0.000000,0.050505,0.050505,0.020202,0.020202,0.010101,0.000000,0.030303,0.101010,0.040404,0.031250,0.030303,0.202020,0.141414,0.080808,0.070707,0.029851,0.050505,0.040404,0.141414,0.030303,0.030303,0.020202,0.030303,0.090909,0.757576,0.070707,0.000000,0.823529,0.121212,0.101010,0.161616,0.191919,0.135135,0.242424,0.131313,0.041237,0.151515,0.000000,0.111111,0.111111,0.000000,0.000000,0.000000,0.121212,0.222222,0.033333,0.262626,0.171717,0.373737,0.121212,0.000000,0.030303,0.032258,0.043478,0.00000,0.000000,0.040404,0.054545,0.000000,0.010101,0.000000,0.019231,0.052632,0.000000,0.0,0.363636,0.252525,0.585859,0.050505,0.303030,0.070707,0.454545,0.090909,0.020202,0.129032,0.980769,0.010101,0.060606,0.131313,0.191919,0.414141,0.595960,0.066667,0.064516,0.575758,0.020202,0.313131,0.020202,0.000000,0.060606,1.000000,0.000000,1.000000,1.000000,0.979798,0.142857,0.6,0.050505,0.060606,0.516667,0.384211,0.368421,0.218750,0.013197,0.076271,0.292683,0.003,0.001602,0.010,0.004789,0.005985,0.985332,1.0,0.333333,0.147541,0.000000,1.0,0.000000,0.153846,0.0,0.000000,0.090909,0.55,0.636364,0.863636,0.090909,1.0,0.090909,0.906250,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
2,0.000014,0.624862,0.666667,1.000000,0.000000,0.020202,0.414141,0.313131,0.474747,0.080808,0.000000,0.000000,1.000000,0.120495,0.152487,0.139480,0.858586,0.0,0.151515,0.494949,0.515152,0.989899,0.000000,0.000000,0.000000,0.050505,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.020619,0.0,0.025641,0.023256,0.500000,0.595238,0.630952,0.500000,0.595238,0.630952,0.253333,0.404040,0.393939,0.222222,0.080808,0.161616,0.181818,0.131313,0.34,0.311475,0.080808,0.151515,0.171717,0.323232,0.191919,0.161616,0.363636,0.080808,0.333333,0.202020,0.474747,0.333333,0.171717,0.070707,0.020202,0.656566,0.111111,0.095890,0.171717,0.252308,0.342857,0.565657,0.474747,0.070707,0.444444,0.373737,0.353535,0.000000,0.000000,0.000000,0.211167,0.331333,0.461538,0.461538,0.727273,0.282828,0.626263,0.383838,0.747475,0.262626,0.737374,0.646465,0.202020,0.848485,0.161616,0.060606,0.02,0.050505,0.131313,0.171717,0.060606,0.213333,0.535354,0.303030,0.000000,0.000000,0.000000,0.363636,0.444444,0.636364,0.888889,1.000000,0.222222,0.020202,0.404040,0.588235,0.588889,0.213115,0.075,0.151515,0.292929,0.000000,0.161616,0.080808,0.040404,0.000000,0.616162,0.848485,0.939394,0.959596,0.571047,0.204301,0.648127,0.267333,0.302667,0.348667,0.390667,0.149533,0.131313,0.161616,0.151515,0.191919,0.161616,0.127273,0.08,0.032787,0.080808,0.101010,0.121212,0.121212,0.212121,0.181818,0.090909,0.10,0.030303,0.101010,0.373737,0.030303,0.545455,0.080808,0.636364,0.373737,0.363636,0.797980,0.141414,0.000000,0.000000,0.000000,0.000000,0.04,0.030303,0.040404,0.020202,0.060606,0.188889,0.250000,0.011765,0.595960,0.585859,0.656566,0.515152,0.626263,0.494949,0.727273,0.525253,0.878788,1.000000,0.040404,0.111111,0.161616,0.030303,0.171717,0.131313,0.000000,0.036364,0.131313,0.030303,0.121212,0.030303,0.030303,0.040404,0.030303,0.000000,0.121212,0.050505,0.030303,0.015625,0.040404,0.202020,0.131313,0.060606,0.080808,0.059701,0.070707,0.050505,0.070707,0.020202,0.060606,0.010101,0.010101,0.111111,0.757576,0.060606,0.010101,0.805882,0.030303,0.080808,0.292929,0.232323,0.189189,0.202020,0.090909,0.020619,0.171717,0.033333,0.194444,0.040404,0.000000,0.000000,0.000000,0.212121,0.434343,0.033333,0.232323,0.181818,0.515152,0.070707,0.012048,0.090909,0.064516,0.119565,0.00000,0.071429,0.070707,0.090909,0.014706,0.030303,0.000000,0.019231,0.052632,0.037037,0.0,0.080808,0.131313,0.909091,0.060606,0.000000,0.040404,0.969697,0.595960,0.151515,0.129032,0.211538,0.040404,0.272727,0.525253,0.838384,0.969697,0.040404,0.122222,0.048387,0.010101,0.000000,0.979798,0.010101,0.000000,0.000000,0.939394,0.070707,0.919192,1.000000,0.989899,0.428571,0.6,0.090909,0.090909,0.083333,0.052632,0.263158,0.156250,0.001056,0.000000,0.024390,0.023,0.003604,0.023,0.008621,0.021742,0.226099,1.0,0.000000,0.196721,0.442308,1.0,0.000000,0.923077,0.0,0.000000,0.090909,0.95,0.000000,0.954545,0.000000,0.5,0.000000,1.000000,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
3,0.000000,0.329897,0.166667,0.333333,0.000000,0.000000,0.151515,0.323232,0.121212,0.040404,0.010101,0.000000,0.333333,0.011803,0.010730,0.013784,1.000000,0.0,0.000000,0.535354,0.474747,0.919192,0.000000,0.000000,0.040404,0.101010,0.000000,0.013889,0.000000,0.015625,0.000000,0.000000,0.0,0.061856,0.0,0.000000,0.046512,0.321429,0.357143,0.392857,0.333333,0.392857,0.440476,0.253333,0.565657,0.303030,0.141414,0.232323,0.414141,0.242424,0.060606,0.08,0.032787,0.010101,0.242424,0.242424,0.242424,0.151515,0.131313,0.040404,0.010101,0.040404,0.212121,0.424242,0.373737,0.151515,0.050505,0.020202,0.363636,0.161616,0.013699,0.474747,0.260000,0.341429,0.393939,0.363636,0.040404,0.606061,0.575758,0.131313,0.000000,0.000000,0.000000,0.583333,0.580500,0.615385,0.615385,0.232323,0.777778,0.949495,0.060606,0.030303,0.282828,0.525253,0.343434,0.181818,0.626263,0.383838,0.101010,0.04,0.080808,0.333333,0.232323,0.272727,0.200000,0.727273,0.040404,0.000000,0.000000,0.000000,0.919192,0.989899,0.989899,1.000000,1.000000,0.696970,0.080808,0.202020,0.505882,0.500000,0.213115,0.125,0.606061,0.000000,0.000000,0.151515,0.494949,0.131313,0.000000,1.000000,1.000000,1.000000,1.000000,0.635150,0.019969,0.911464,0.269333,0.275333,0.316000,0.345333,0.107482,0.080808,0.121212,0.232323,0.212121,0.222222,0.181818,0.04,0.032787,0.020202,0.080808,0.101010,0.252525,0.202020,0.191919,0.101010,0.06,0.030303,0.030303,0.060606,0.020202,0.242424,0.060606,0.747475,0.262626,0.101010,0.868687,0.111111,0.010101,0.010101,0.000000,0.000000,0.00,0.000000,0.020202,0.000000,0.272727,0.300000,0.394737,0.082353,0.717172,0.919192,0.969697,0.878788,0.878788,0.848485,0.787879,0.343434,1.000000,1.000000,0.000000,0.131313,0.212121,0.060606,0.121212,0.171717,0.000000,0.054545,0.090909,0.000000,0.121212,0.020202,0.030303,0.010101,0.000000,0.000000,0.161616,0.222222,0.030303,0.000000,0.050505,0.202020,0.090909,0.060606,0.010101,0.014925,0.050505,0.010101,0.080808,0.030303,0.020202,0.020202,0.000000,0.020202,0.939394,0.010101,0.000000,0.823529,0.010101,0.010101,0.333333,0.262626,0.405405,0.212121,0.030303,0.030928,0.333333,0.066667,0.208333,0.191919,0.010309,0.020202,0.000000,0.080808,0.161616,0.000000,0.434343,0.262626,0.090909,0.222222,0.000000,0.161616,0.032258,0.076087,0.00000,0.000000,0.121212,0.018182,0.000000,0.010101,0.023256,0.000000,0.000000,0.037037,0.0,0.040404,0.565657,0.939394,0.040404,0.000000,0.020202,0.989899,0.707071,0.272727,0.096774,0.250000,0.000000,0.000000,0.575758,0.656566,0.727273,0.282828,0.000000,0.000000,0.989899,0.020202,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.857143,0.6,0.030303,0.010101,0.250000,0.205263,0.210526,0.093750,0.014992,0.033898,0.048780,0.010,0.004004,0.020,0.003831,0.015957,0.818672,1.0,0.666667,0.114754,0.000000,1.0,0.000000,0.692308,0.0,0.670103,0.000000,0.90,0.454545,0.863636,0.909091,0.0,0.909091,0.968750,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,0.000389,0.649485,0.500000,0.666667,0.008299,0.000000,0.212121,0.616162,0.121212,0.020202,0.111111,0.000000,0.555556,0.011206,0.013633,0.011044,0.000000,0.0,1.000000,0.494949,0.515152,0.959596,0.030303,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.011628,0.392857,0.488095,0.511905,0.404762,0.523810,0.559524,0.373333,0.313131,0.434343,0.262626,0.111111,0.222222,0.242424,0.151515,0.30,0.131148,0.040404,0.090909,0.141414,0.363636,0.232323,0.181818,0.181818,0.060606,0.151515,0.151515,0.353535,0.515152,0.323232,0.090909,0.030303,0.686869,0.080808,0.068493,0.191919,0.315385,0.402857,0.808081,0.808081,0.000000,0.070707,0.070707,0.000000,0.000000,0.000000,0.000000,0.113000,0.113000,0.230769,0.153846,0.828283,0.181818,0.959596,0.050505,0.000000,0.454545,0.838384,0.737374,0.383838,0.919192,0.090909,0.070707,0.04,0.050505,0.090909,0.111111,0.030303,0.293333,0.616162,0.131313,0.018182,0.020202,0.012346,0.000000,0.000000,0.111111,0.404040,0.717172,0.000000,0.010101,0.454545,0.611765,0.600000,0.245902,0.125,0.070707,0.000000,0.121212,0.080808,0.060606,0.000000,0.030303,0.020202,0.070707,0.131313,0.737374,0.000000,0.302611,0.594779,0.236000,0.268667,0.233333,0.261333,0.060181,0.262626,0.101010,0.131313,0.282828,0.212121,0.036364,0.00,0.000000,0.000000,0.181818,0.060606,0.141414,0.343434,0.252525,0.020202,0.00,0.000000,0.000000,0.181818,0.020202,0.333333,0.121212,0.434343,0.575758,0.060606,0.919192,0.080808,0.000000,0.000000,0.000000,0.000000,0.00,0.020202,0.000000,0.020202,0.434343,0.200000,0.302632,0.070588,0.484848,0.717172,0.898990,0.505051,0.818182,0.484848,0.505051,0.363636,1.000000,0.000000,0.000000,0.090909,0.101010,0.020202,0.151515,0.080808,0.000000,0.090909,0.090909,0.000000,0.151515,0.141414,0.020202,0.111111,0.000000,0.016393,0.030303,0.292929,0.000000,0.046875,0.010101,0.242424,0.111111,0.000000,0.040404,0.000000,0.050505,0.121212,0.030303,0.030303,0.020202,0.111111,0.000000,0.080808,0.767677,0.030303,0.000000,0.705882,0.131313,0.212121,0.272727,0.181818,0.162162,0.090909,0.060606,0.123711,0.212121,0.200000,0.277778,0.060606,0.000000,0.000000,0.000000,0.121212,0.212121,0.066667,0.616162,0.242424,0.121212,0.060606,0.000000,0.040404,0.032258,0.032609,0.00000,0.000000,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000,0.105263,0.000000,0.0,0.000000,0.868687,0.989899,0.020202,0.000000,0.000000,0.949495,0.777778,0.363636,0.161290,0.288462,0.040404,0.242424,0.373737,0.626263,0.797980,0.212121,0.000000,0.000000,0.131313,0.383838,0.424242,0.020202,0.000000,0.050505,0.757576,0.222222,0.121212,1.000000,0.989899,0.285714,0.4,0.101010,0.020202,0.550000,0.431579,0.315789,0.187500,0.033361,0.088983,0.243902,0.005,0.005005,0.020,0.006705,0.013686,0.139086,1.0,0.000000,0.688525,0.750000,1.0,0.333333,0.153846,0.0,0.350515,0.000000,0.65,0.454545,0.909091,0.000000,0.0,1.000000,0.906250,0.181818,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2504,0.000014,0.628866,0.166667,0.111111,0.012448,0.030303,0.232323,0.424242,0.434343,0.080808,0.181818,0.091954,1.000000,0.009341,0.010098,0.009999,1.000000,0.0,0.000000,0.444444,0.565657,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.416667,0.559524,0.619048,0.452381,0.571429,0.619048,0.333333,0.383838,0.393939,0.222222,0.131313,0.181818,0.171717,0.131313,0.32,0.229508,0.090909,0.151515,0.151515,0.313131,0.232323,0.161616,0.373737,0.121212,0.353535,0.303030,0.292929,0.414141,0.242424,0.111111,0.050505,0.343434,0.171717,0.178082,0.363636,0.260000,0.364286,0.797980,0.747475,0.070707,0.191919,0.121212,0.090909,0.000000,0.000000,0.000000,0.058167,0.060500,0.153846,0.076923,0.535354,0.474747,0.909091,0.101010,0.000000,0.343434,0.686869,0.323232,0.141414,0.858586,0.151515,0.202020,0.02,0.191919,0.161616,0.323232,0.020202,0.000000,0.000000,0.000000,0.381818,0.555556,0.296296,0.000000,0.000000,0.000000,0.030303,0.191919,0.000000,0.070707,0.383838,0.588235,0.533333,0.213115,0.125,0.121212,0.070707,0.010101,0.292929,0.090909,0.080808,0.000000,0.010101,0.010101,0.040404,0.303030,0.918803,0.354839,0.703746,0.107333,0.128000,0.130000,0.154000,0.039731,0.444444,0.282828,0.141414,0.090909,0.050505,0.000000,0.00,0.000000,0.000000,0.313131,0.313131,0.191919,0.121212,0.070707,0.000000,0.00,0.000000,0.000000,0.383838,0.070707,0.040404,0.282828,0.353535,0.656566,0.030303,0.646465,0.212121,0.050505,0.000000,0.000000,0.106383,0.00,0.101010,0.000000,0.030303,0.030303,0.177778,0.276316,0.105882,0.575758,0.666667,0.717172,0.606061,0.585859,0.606061,0.898990,0.313131,0.969697,0.000000,0.060606,0.060606,0.030303,0.000000,0.060606,0.010101,0.046512,0.000000,0.424242,0.020202,0.121212,0.101010,0.090909,0.070707,0.020202,0.000000,0.060606,0.090909,0.080808,0.031250,0.000000,0.191919,0.000000,0.000000,0.101010,0.000000,0.252525,0.141414,0.020202,0.020202,0.080808,0.181818,0.080808,0.060606,0.606061,0.010101,0.000000,0.676471,0.171717,0.393939,0.222222,0.131313,0.189189,0.000000,0.020202,0.000000,0.252525,0.033333,0.305556,0.020202,0.020619,0.030303,0.000000,0.121212,0.232323,0.066667,0.424242,0.000000,0.434343,0.060606,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.959596,1.000000,0.000000,0.000000,0.000000,0.707071,0.373737,0.121212,0.322581,0.634615,0.000000,0.000000,0.000000,0.212121,0.333333,0.676768,0.000000,0.000000,0.838384,0.020202,0.151515,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.989899,0.929293,0.190476,0.4,0.080808,0.090909,0.183333,0.121053,0.263158,0.156250,0.003062,0.008475,0.000000,0.010,0.002603,0.018,0.004789,0.012731,0.247792,1.0,0.000000,0.819672,0.596154,1.0,0.666667,0.846154,0.0,0.371134,0.000000,0.85,0.818182,0.954545,0.181818,0.5,0.181818,0.979167,0.818182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1
69669,0.000000,1.000000,0.166667,0.666667,0.004149,0.020202,0.464646,0.303030,0.474747,0.111111,0.111111,0.126437,0.666667,0.018895,0.022048,0.017795,1.000000,0.0,0.000000,0.474747,0.535354,0.898990,0.030303,0.000000,0.010101,0.434343,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.422680,0.0,0.000000,0.023256,0.464286,0.571429,0.607143,0.464286,0.571429,0.619048,0.333333,0.282828,0.424242,0.313131,0.121212,0.131313,0.181818,0.181818,0.36,0.245902,0.060606,0.111111,0.111111,0.292929,0.272727,0.222222,0.343434,0.090909,0.292929,0.151515,0.343434,0.515152,0.333333,0.151515,0.070707,0.616162,0.080808,0.095890,0.232323,0.313846,0.420000,0.969697,0.959596,0.000000,0.040404,0.040404,0.030303,0.000000,0.000000,0.000000,0.120833,0.129167,0.384615,0.307692,0.888889,0.121212,1.000000,0.010101,0.000000,0.363636,0.838384,0.707071,0.292929,0.939394,0.070707,0.070707,0.04,0.050505,0.060606,0.181818,0.020202,0.226667,0.494949,0.232323,0.018182,0.020202,0.000000,0.000000,0.010101,0.101010,0.434343,0.979798,0.000000,0.000000,0.868687,0.752941,0.744444,0.245902,0.100,0.010101,0.030303,0.000000,0.090909,0.000000,0.030303,0.000000,0.262626,0.767677,0.898990,0.929293,0.247863,0.569892,0.868331,0.222667,0.236000,0.278667,0.288000,0.082780,0.101010,0.181818,0.252525,0.202020,0.151515,0.090909,0.06,0.016393,0.020202,0.070707,0.161616,0.242424,0.262626,0.151515,0.060606,0.04,0.020202,0.020202,0.404040,0.030303,0.535354,0.101010,0.272727,0.737374,0.050505,0.777778,0.121212,0.010101,0.010101,0.000000,0.000000,0.04,0.030303,0.060606,0.050505,0.000000,0.177778,0.263158,0.058824,0.595960,0.636364,0.727273,0.565657,0.717172,0.535354,0.707071,0.515152,0.333333,0.424242,0.171717,0.232323,0.191919,0.070707,0.181818,0.171717,0.046512,0.054545,0.020202,0.000000,0.070707,0.010101,0.020202,0.000000,0.000000,0.000000,0.040404,0.111111,0.030303,0.125000,0.050505,0.141414,0.080808,0.060606,0.020202,0.029851,0.040404,0.161616,0.040404,0.131313,0.111111,0.111111,0.111111,0.060606,0.595960,0.030303,0.000000,0.782353,0.101010,0.111111,0.202020,0.282828,0.162162,0.191919,0.060606,0.020619,0.343434,0.033333,0.333333,0.101010,0.010309,0.020202,0.012821,0.191919,0.464646,0.000000,0.303030,0.464646,0.474747,0.070707,0.000000,0.050505,0.032258,0.032609,0.00000,0.000000,0.020202,0.018182,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.141414,0.535354,0.535354,0.424242,0.000000,0.040404,0.979798,0.656566,0.222222,0.516129,0.538462,0.000000,0.000000,0.020202,0.060606,0.686869,0.323232,0.000000,0.000000,0.979798,0.000000,0.020202,0.000000,0.033333,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.238095,0.6,0.090909,0.090909,0.316667,0.236842,0.315789,0.187500,0.006757,0.016949,0.048780,0.010,0.004004,0.025,0.031609,0.014132,0.672297,1.0,0.333333,0.442623,0.211538,1.0,0.000000,0.384615,0.0,0.010309,0.090909,0.75,0.454545,0.954545,0.000000,0.5,0.000000,0.927083,0.727273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1
71333,0.000000,0.835052,1.000000,0.777778,0.000000,0.000000,0.444444,0.131313,0.626263,0.020202,0.000000,0.000000,1.000000,0.012219,0.014306,0.020930,1.000000,0.0,0.000000,0.424242,0.585859,1.000000,0.000000,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.010309,0.0,0.000000,0.000000,0.821429,0.833333,0.845238,0.738095,0.761905,0.773810,0.040000,0.444444,0.282828,0.282828,0.040404,0.090909,0.070707,0.090909,0.24,0.311475,0.404040,0.181818,0.131313,0.242424,0.131313,0.323232,0.656566,0.333333,0.646465,0.484848,0.454545,0.060606,0.010101,0.010101,0.000000,0.545455,0.101010,0.315068,0.131313,0.158462,0.228571,0.141414,0.121212,0.020202,0.828283,0.808081,0.767677,0.020202,0.020202,0.000000,0.323000,0.414500,0.538462,0.615385,0.757576,0.252525,0.727273,0.282828,0.595960,0.030303,0.464646,0.414141,0.020202,0.616162,0.393939,0.010101,0.00,0.010101,0.161616,0.414141,0.060606,0.026667,0.333333,0.646465,0.000000,0.000000,0.000000,0.484848,0.646465,0.888889,0.969697,1.000000,0.323232,0.050505,0.141414,0.470588,0.477778,0.163934,0.075,0.080808,0.737374,0.000000,0.040404,0.050505,0.151515,0.000000,0.696970,0.868687,0.919192,0.939394,0.957265,0.198157,0.622020,0.322000,0.524667,0.419333,0.538000,0.271385,0.151515,0.111111,0.121212,0.131313,0.171717,0.127273,0.16,0.049180,0.151515,0.080808,0.070707,0.121212,0.131313,0.070707,0.121212,0.26,0.050505,0.222222,0.666667,0.030303,0.656566,0.090909,0.616162,0.393939,0.222222,0.565657,0.191919,0.000000,0.000000,0.000000,0.000000,0.00,0.070707,0.181818,0.020202,0.090909,0.166667,0.315789,0.164706,0.525253,0.353535,0.404040,0.323232,0.404040,0.292929,1.000000,1.000000,0.000000,0.000000,0.000000,0.181818,0.232323,0.050505,0.181818,0.171717,0.000000,0.036364,0.111111,0.020202,0.040404,0.000000,0.000000,0.000000,0.020202,0.000000,0.040404,0.000000,0.020202,0.031250,0.070707,0.151515,0.090909,0.202020,0.030303,0.029851,0.151515,0.000000,0.171717,0.020202,0.020202,0.000000,0.000000,0.212121,0.676768,0.101010,0.000000,0.817647,0.010101,0.090909,0.262626,0.323232,0.081081,0.181818,0.121212,0.030928,0.060606,0.033333,0.013889,0.070707,0.000000,0.000000,0.000000,0.191919,0.444444,0.000000,0.131313,0.111111,0.626263,0.070707,0.000000,0.121212,0.000000,0.119565,0.00000,0.142857,0.060606,0.054545,0.014706,0.030303,0.000000,0.019231,0.157895,0.037037,0.0,0.020202,0.040404,0.939394,0.000000,0.000000,0.070707,0.929293,0.323232,0.050505,0.129032,0.250000,0.090909,0.262626,0.414141,0.808081,0.858586,0.151515,0.411111,0.112903,0.000000,0.000000,0.979798,0.020202,0.000000,0.010101,1.000000,0.000000,0.989899,1.000000,0.989899,0.952381,0.8,0.080808,0.070707,0.316667,0.257895,0.263158,0.171875,0.006440,0.021186,0.048780,0.005,0.002402,0.015,0.003831,0.011062,0.211243,1.0,0.000000,0.196721,0.423077,1.0,0.000000,0.538462,0.0,0.154639,0.727273,0.70,0.636364,0.818182,0.818182,0.5,0.181818,0.947917,0.636364,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1
36209,0.000000,0.624862,0.666667,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.020030,0.024825,0.020083,0.757576,0.0,0.252525,0.505051,0.505051,0.949495,0.020202,0.010101,0.020202,0.040404,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.0,0.041237,0.0,0.000000,0.000000,0.357143,0.416667,0.428571,0.345238,0.440476,0.452381,0.373333,0.545455,0.333333,0.131313,0.070707,0.414141,0.313131,0.131313,0.10,0.032787,0.010101,0.252525,0.222222,0.303030,0.151515,0.080808,0.050505,0.000000,0.030303,0.111111,0.393939,0.505051,0.292929,0.080808,0.020202,0.777778,0.060606,0.013699,0.151515,0.306154,0.397143,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.258833,0.290000,0.692308,0.538462,1.000000,0.010101,0.919192,0.090909,0.030303,0.454545,0.838384,0.797980,0.424242,0.919192,0.090909,0.020202,0.02,0.010101,0.111111,0.080808,0.060606,0.320000,0.676768,0.030303,0.000000,0.010101,0.000000,0.323232,0.535354,0.919192,1.000000,1.000000,0.050505,0.000000,0.868687,0.835294,0.800000,0.245902,0.075,0.000000,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.676768,0.676768,0.787879,0.898990,0.662393,0.422427,0.854711,0.422667,0.435333,0.462667,0.483333,0.171737,0.010101,0.020202,0.040404,0.242424,0.363636,0.363636,0.12,0.065574,0.040404,0.010101,0.020202,0.020202,0.191919,0.414141,0.202020,0.12,0.040404,0.040404,0.080808,0.000000,0.646465,0.010101,0.979798,0.030303,0.555556,0.818182,0.121212,0.020202,0.020202,0.000000,0.000000,0.04,0.010101,0.030303,0.010101,0.040404,0.333333,0.394737,0.000000,0.898990,0.808081,0.909091,0.707071,0.898990,0.686869,0.595960,0.555556,0.000000,0.000000,0.010101,0.252525,0.252525,0.060606,0.181818,0.090909,0.023256,0.000000,0.010101,0.000000,0.101010,0.040404,0.010101,0.000000,0.000000,0.000000,0.030303,0.292929,0.040404,0.093750,0.101010,0.111111,0.101010,0.050505,0.020202,0.029851,0.101010,0.010101,0.060606,0.020202,0.010101,0.020202,0.000000,0.030303,0.909091,0.030303,0.000000,0.941176,0.000000,0.010101,0.141414,0.272727,0.216216,0.343434,0.161616,0.041237,0.262626,0.166667,0.194444,0.121212,0.000000,0.010101,0.000000,0.151515,0.272727,0.033333,0.494949,0.131313,0.141414,0.070707,0.036145,0.030303,0.000000,0.119565,0.00000,0.000000,0.080808,0.072727,0.000000,0.010101,0.000000,0.000000,0.000000,0.037037,0.0,0.040404,0.171717,0.919192,0.040404,0.020202,0.020202,1.000000,0.878788,0.171717,0.032258,0.019231,0.797980,1.000000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.619048,0.6,0.020202,0.020202,0.116667,0.078947,0.052632,0.062500,0.006018,0.008475,0.048780,0.020,0.004004,0.025,0.022989,0.022076,0.979064,1.0,0.000000,0.213115,0.750000,1.0,0.333333,0.692308,0.0,0.000000,0.090909,0.80,0.000000,0.909091,0.000000,0.0,1.000000,0.968750,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1


In [73]:
# scrambled rows to avoid yes/no clusters
oversampled_total = oversampled_target.sample(frac=1)

# X/y split after upsampling
X_train_oversampled = oversampled_total.drop(['TARGET_B'], axis = 1)

y_train_oversampled =oversampled_total['TARGET_B']

In [74]:
# Init, fit, score
forest = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20)

_ = forest.fit(X_train_oversampled, y_train_oversampled)


# Training Score
print(f"Training Score: {forest.score(X_train_oversampled, y_train_oversampled)}")

print(f"Test Score: {forest.score(X_test_normalized, y_test)}")


Training Score: 0.6196910930447626
Test Score: 0.610595818267568


In [75]:
pred = forest.predict(X_test_normalized)

print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))

precision:  0.0699468085106383
recall:  0.5462097611630322
f1:  0.12401273134504304


Actually the scores deteriorated after upsampling, what is expected. The previous model was 

## Feature Selection with SelectKBest

In [None]:
# Select the features using SelectKBest based on the chi2. In this case we want the top 10 features.


kbest = SelectKBest(chi2, k=10).fit_transform(X_train_oversampled, y_train_oversampled) 
#kbest is used to extract best features of given dataset.  In this case 10


# Here we choose 10 so that is easier to analyze results later, as we will see
selected = pd.DataFrame(kbest)
selected.head()



In [None]:
def scale_encode(df):
    
    # X/y split
    X = df.drop(['TARGET_B','TARGET_D'], axis = 1)
    y = df['TARGET_B']
    
    
    # Split into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    
    # Distinguish categorical from numerical features
    X_train_cat = X_train.select_dtypes(include = object)
    X_train_num =X_train.select_dtypes(include = np.number)

    X_test_cat = X_test.select_dtypes(include = object)
    X_test_num =X_test.select_dtypes(include = np.number)

    
    
    # MinMaxScale numerical features to ensure that all variables are on the same scale
    scaler = MinMaxScaler().fit(X_train_num)

    X_train_num_scaled = scaler.transform(X_train_num)
    X_train_num_scaled = pd.DataFrame(X_train_num_scaled)
    X_train_num_scaled.columns = X_train_num.columns


    X_test_num_scaled = scaler.transform(X_test_num)
    X_test_num_scaled = pd.DataFrame(X_test_num_scaled)
    X_test_num_scaled.columns = X_test_num.columns

    X_train_num_scaled.reset_index(drop = True, inplace = True)
    X_test_num_scaled.reset_index(drop = True, inplace = True)
    
    
    
    # Encode to ensure that all variables are on the same scale

    encoder = OneHotEncoder(drop='first').fit(X_train_cat)
    
    cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)
    X_train_cat_encode = pd.DataFrame(encoder.transform(X_train_cat).toarray(),columns=cols)
    X_train_cat_encode.reset_index(drop = True, inplace = True)
    
    
    
    cols = encoder.get_feature_names_out(input_features=X_test_cat.columns)
    X_test_cat_encode = pd.DataFrame(encoder.transform(X_test_cat).toarray(),columns=cols)
    X_test_cat_encode.reset_index(drop = True, inplace = True)

    
    
    # Put back together numerical and categorical variables after scaling / encoding

    X_train_normalized = pd.concat([X_train_num_scaled,X_train_cat_encode], axis=1)
    X_test_normalized = pd.concat([X_test_num_scaled,X_test_cat_encode], axis=1)

    y_train.reset_index(drop = True, inplace = True)
    y_test.reset_index(drop = True, inplace = True)
    
    
    # Create full df with only training data
    X_training = pd.concat([X_train_normalized,y_train], axis=1)
    
    

In [78]:
def scaling_encoding(X_train_num,X_test_num,X_train_cat,X_test_cat):
    

    # MinMaxScale numerical features to ensure that all variables are on the same scale
    scaler = MinMaxScaler().fit(X_train_num)

    X_train_num_scaled = scaler.transform(X_train_num)
    X_train_num_scaled = pd.DataFrame(X_train_num_scaled)
    X_train_num_scaled.columns = X_train_num.columns


    X_test_num_scaled = scaler.transform(X_test_num)
    X_test_num_scaled = pd.DataFrame(X_test_num_scaled)
    X_test_num_scaled.columns = X_test_num.columns

    X_train_num_scaled.reset_index(drop = True, inplace = True)
    X_test_num_scaled.reset_index(drop = True, inplace = True)
    
    
    
    # Encode to ensure that all variables are on the same scale

    encoder = OneHotEncoder(drop='first').fit(X_train_cat)
    
    cols = encoder.get_feature_names_out(input_features=X_train_cat.columns)
    X_train_cat_encode = pd.DataFrame(encoder.transform(X_train_cat).toarray(),columns=cols)
    X_train_cat_encode.reset_index(drop = True, inplace = True)
    
    
    
    cols = encoder.get_feature_names_out(input_features=X_test_cat.columns)
    X_test_cat_encode = pd.DataFrame(encoder.transform(X_test_cat).toarray(),columns=cols)
    X_test_cat_encode.reset_index(drop = True, inplace = True)

    
    
    # Put back together numerical and categorical variables after scaling / encoding

    X_train_normalized = pd.concat([X_train_num_scaled,X_train_cat_encode], axis=1)
    X_test_normalized = pd.concat([X_test_num_scaled,X_test_cat_encode], axis=1)

    y_train.reset_index(drop = True, inplace = True)
    y_test.reset_index(drop = True, inplace = True)
    
    
    # Create full df with only training data
    X_training = pd.concat([X_train_normalized,y_train], axis=1)
    
    return  X_train_normalized, X_test_normalized

In [79]:
def random_forest(X_train,y_train,X_test,y_test):
    
    # Init, fit, score
    forest = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20)

    _ = forest.fit(X_train, y_train)


    # Training Score
    print(f"Training Score: {forest.score(X_train, y_train)}")
    print(f"Test Score: {forest.score(X_test, y_test)}")

    
    
    # confusion_matrix
    y_pred = forest.predict(X_test)
    print(confusion_matrix(y_test, y_pred))
    
    
    
    # precision, recall and F1
    print("precision: ",precision_score(y_test,y_pred))
    print("recall: ",recall_score(y_test,y_pred))
    print("f1: ",f1_score(y_test,y_pred))
    