# Lab | Handling Data Imbalance in Classification Models

## - Import the required libraries and modules that you would need.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
pd.set_option('display.max_columns', None)

## - Read that data into Python and call the dataframe donors.

We have already cleaned the data and treated the NULL values in the previous lab. So we will continue with the saved files which are already split in categorical, numerical and target.

In [2]:
numerical = pd.read_csv('numerical7_02.csv')
categorical = pd.read_csv('categorical7_02.csv')
target = pd.read_csv('target7_02.csv')

In [3]:
numerical.shape

(95412, 318)

In [4]:
categorical.shape

(95412, 73)

In [5]:
target.shape

(95412, 2)

## - Check the datatypes of all the columns in the data.

#### Categorical data

In [6]:
# We check if all types in the categorical columns are objects:
# categorical.info(verbose=True)

In [7]:
# Only 'DOMAIN2' is actually categorical but has integer datatype, we change that.
categorical['DOMAIN2'] = categorical['DOMAIN2'].apply(lambda x: str(x))

In [8]:
# Check if a column has a lot of different values and is in fact numerical
# We check if any column has more than 20 individual values:
col_list = []
for col in categorical.columns:
    indi = len(categorical[col].unique())
    if indi > 15:
        col_list.append(col)
# col_list

- The meaning of the STATE column is obvious.
- From the description we see, that MDMAUD is a classification of major donors and therefore categorical.
- Domain is a code and therefore also categorical.
- CLUSTER represents a classification of the socio-economic status of a donor and is also categorical.

In [9]:
categorical[col_list].head(6)

Unnamed: 0,STATE,MDMAUD,CLUSTER,RFA_3,RFA_4,RFA_5,RFA_6,RFA_7,RFA_8,RFA_9,RFA_10,RFA_11,RFA_12,RFA_13,RFA_14,RFA_15,RFA_16,RFA_17,RFA_18,RFA_19,RFA_20,RFA_21,RFA_22,RFA_23,RFA_24
0,IL,XXXX,36,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E,S4E
1,CA,XXXX,14,A2G,A2G,A2G,A2G,A1E,A1E,A1E,A1E,A1E,A1E,,,,L1E,,,N1E,N1E,N1E,N1E,,F1E
2,NC,XXXX,43,S4E,S4E,S4E,S4E,S4F,S4F,S4F,,S4F,S4F,S4F,S4F,S4F,S4F,,S4D,S4D,,,S4D,S4D,S3D
3,CA,XXXX,44,S4E,S4E,S4E,S4E,S4E,S4E,S4E,,S4E,S4E,S4E,S4E,S4E,S4E,S2D,S2D,A1D,A1D,A1D,A1D,,
4,FL,XXXX,16,A2F,A2F,A2F,A1D,I2D,A1E,A1E,L1D,A1E,A1E,L1D,L3D,,L3D,A2D,A2D,A3D,A3D,A3D,I4E,A3D,A3D
5,AL,XXXX,40,A1F,A1F,A1F,A1F,A2F,A2F,A2F,A2F,A2F,A2F,A2F,A2F,A2F,N3F,N2E,N2E,N2E,N2E,N2E,N2E,,F1E


We have a look into the RFA column description.
The values are the status the donors were given during a promotional campaign. Usually I would now check for multicollinearity and eleminate columns. But this exercise is about systematic feature selection, so I will leave that for later.

#### Numerical data

In [10]:
# We know that all values in numerical are number types, since we selected it this way.
# To see if there are categorical values, we check if any column has less than 100 unique values,
# in this case we inspect further.
col_list = []
for col in numerical.columns:
    indi = len(numerical[col].unique())
    if indi < 15:
        col_list.append(col)
# col_list

In [11]:
# We inspect the columns:
numerical[col_list].head(20)

Unnamed: 0,INCOME,WEALTH1,WEALTH2,HV3,HV4,ANC6,MHUC2,NEXTDATE,HPHONE_D,RFA_2F
0,5.0,5.0,5.0,3,2,0,2,1.0,0,4
1,6.0,9.0,9.0,12,10,1,4,1.0,0,2
2,3.0,1.0,1.0,2,1,0,2,1.0,1,4
3,1.0,4.0,0.0,2,1,0,2,1.0,1,4
4,3.0,2.0,5.0,4,3,0,2,1.0,1,2
5,5.0,5.0,5.0,3,3,0,3,1.0,0,1
6,4.0,6.0,3.0,3,3,1,2,1.0,1,1
7,2.0,9.0,5.0,3,3,0,2,1.0,1,3
8,3.0,2.0,2.0,2,1,0,2,1.0,1,1
9,5.0,5.0,9.0,3,3,0,2,1.0,0,1


A quick inspection leads to the conclusion, that these columns are in fact categorical, and therefore we change them.

In [12]:
print(numerical.shape)
print(categorical.shape)

(95412, 318)
(95412, 73)


In [13]:
# Making strings out of the values and assign them to the categorical dataframe
for col in col_list:
    categorical[col] = numerical[col].apply(lambda x: str(x))
    numerical = numerical.drop(col, axis = 1)

In [14]:
print(numerical.shape)
print(categorical.shape)

(95412, 308)
(95412, 83)


# - Check for null values in the dataframe. Replace the null values using the methods learned in class.

In [15]:
# Checking numericals, just for good measure
nullframe_num = pd.DataFrame(numerical.isna().sum(), columns = ['nulls'])
nullframe_num[nullframe_num['nulls'] != 0]

Unnamed: 0,nulls


In [16]:
# Checkign categoricals
nullframe_num = pd.DataFrame(categorical.isna().sum(), columns = ['nulls'])
nullframe_num[nullframe_num['nulls'] != 0]

Unnamed: 0,nulls


No more null values in our data

# - Split the data into numerical and catagorical. Decide if any columns need their dtype changed.

Done

# - Concatenate numerical and categorical back together again for your X dataframe. Designate the Target as y.

In [17]:
alldata = pd.concat([categorical, numerical], axis = 1)
alldata.shape

(95412, 391)

## - Split the data into a training set and a test set.

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train_all, y_test_all = train_test_split(alldata, target, test_size=0.2)

## - Split further into train_num and train_cat. Also test_num and test_cat.

In [20]:
train_num  = X_train.select_dtypes(include = np.number)
train_cat = X_train.select_dtypes(include = object)

test_num  = X_test.select_dtypes(include = np.number)
test_cat = X_test.select_dtypes(include = object)

## - Scale the features either by using normalizer or a standard scaler. (train_num, test_num)

In [21]:
from sklearn.preprocessing import MinMaxScaler

In [22]:
# Fitting the transformer
transformer = MinMaxScaler().fit(train_num)
# Scaling train and test data
train_num_scaled = pd.DataFrame(transformer.transform(train_num), columns = train_num.columns, index = train_num.index)
test_num_scaled = pd.DataFrame(transformer.transform(test_num), columns = train_num.columns, index = test_num.index)

## - Encode the categorical features using One-Hot Encoding or Ordinal Encoding. (train_cat, test_cat)

In [23]:
train_cat.head(10)

Unnamed: 0,STATE,MAILCODE,PVASTATE,NOEXCH,RECINHSE,RECP3,RECPGVG,RECSWEEP,MDMAUD,CLUSTER,AGEFLAG,HOMEOWNR,CHILD03,CHILD07,CHILD12,CHILD18,GENDER,DATASRCE,SOLP3,SOLIH,MAJOR,GEOCODE,COLLECT1,VETERANS,BIBLE,CATLG,HOMEE,PETS,CDPLAY,STEREO,PCOWNERS,PHOTO,CRAFTS,FISHER,GARDENIN,BOATS,WALKER,KIDSTUFF,CARDS,PLATES,LIFESRC,PEPSTRFL,RFA_2,RFA_3,RFA_4,RFA_5,RFA_6,RFA_7,RFA_8,RFA_9,RFA_10,RFA_11,RFA_12,RFA_13,RFA_14,RFA_15,RFA_16,RFA_17,RFA_18,RFA_19,RFA_20,RFA_21,RFA_22,RFA_23,RFA_24,RFA_2R,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,GEOCODE2,DOMAIN1,DOMAIN2,INCOME,WEALTH1,WEALTH2,HV3,HV4,ANC6,MHUC2,NEXTDATE,HPHONE_D,RFA_2F
53816,MN,,,0,,,,,XXXX,20,I,,,,,,M,,,,,,,,,,,,,,,,,,,,,,,,,X,L4F,S4F,S4F,S4F,S4F,S4E,S4E,S4E,S4E,S4E,S4E,S3E,S3E,S3E,S2E,,S2E,S2E,S2E,S2E,S2E,S2E,A1D,L,F,X,X,X,A,S,2,5.0,5.0,7.0,5,5,0,2,1.0,0,4
35907,CA,,,0,,,,,XXXX,14,E,H,,,M,,F,3.0,,,,2.0,,,,,,,,,,,,,,,,,,,,,L1F,,,,L1F,L2F,L2F,L2F,,L2F,L2F,,L2F,,A1F,A1F,A1F,N2F,N2F,N2F,N2F,N2F,N2F,L,F,X,X,X,A,S,1,6.0,9.0,9.0,10,10,1,2,1.0,0,1
76574,FL,,,0,,,,,XXXX,49,E,H,,,,,F,3.0,,,,,,,,,,,,,,,,,,,,,,,1.0,X,L4D,A4D,A4D,A4D,A4D,A4D,A4D,A4D,A4D,A4D,A4D,N4D,N4D,N4D,N4D,N3D,N3D,N2D,,F1D,F1D,,P1D,L,D,X,X,X,D,R,2,6.0,4.0,5.0,3,2,0,2,1.0,1,4
91505,NV,,,0,,,,,XXXX,11,,,,,,,other,,,,,3.0,,,,,,,,,,,,,,,,,,,,X,L2E,A2E,A2E,A2E,A2E,A3E,A3E,A3E,,A3E,A3E,A2E,A2E,,A2E,A2E,A2E,A1E,,,A1E,A1E,A1E,L,E,X,X,X,B,S,1,5.0,5.0,9.0,6,6,0,4,1.0,0,2
52087,AZ,,,0,,,,,XXXX,29,E,H,,,,,M,3.0,,,,,,,,,,,,,,,,,,,Y,,,,2.0,,L1G,F1G,F1G,,F1G,P1G,P1G,,,,,,,,,,,,,,,,,L,G,X,X,X,A,C,2,4.0,8.0,5.0,5,4,2,2,0.0,1,1
62453,CO,,,0,,,,,XXXX,34,E,H,,,,,M,3.0,,,,,,Y,Y,Y,,Y,Y,,Y,,Y,Y,,Y,,,,,3.0,,L1F,A1F,A1F,,A1F,A1F,A1F,A1F,A1F,A1F,A1F,A1F,A1F,,L1D,,,N1D,N1D,N1D,N1D,,,L,F,X,X,X,B,T,1,7.0,9.0,5.0,6,6,0,3,1.0,0,1
3065,TX,,,0,X,,,,XXXX,13,E,H,,,,,M,3.0,,,,,,Y,,,,Y,Y,,Y,,,Y,,Y,Y,,,,3.0,,L1F,A1F,A1F,,L2E,N2E,N2E,N2E,N2E,N2E,N2E,N2E,N2E,N2E,N2E,,,,,P1E,P1E,,,L,F,X,X,X,B,S,1,6.0,9.0,5.0,9,9,1,4,1.0,1,1
25037,CA,,,0,,,,,XXXX,6,,H,,,,,F,3.0,,,,,,,,,,,,,,,,,,,,,,,,,L1F,F1F,F1F,,,P1F,P1F,,,,,,,,,,,,,,,,,L,F,X,X,X,A,U,2,4.0,3.0,5.0,4,4,0,3,0.0,1,1
58996,CA,,,0,,,,,XXXX,27,,,,,,,F,,,,,,,,,,,,,,,,,,,,,,,,,,L1F,A1F,A1F,,L1D,N1D,N1D,N1D,N1D,N1D,N1D,,F1D,,F1D,,,P1D,,,,,,L,F,X,X,X,A,C,2,5.0,5.0,5.0,5,5,0,2,1.0,0,1
82698,AL,,,0,,,,,XXXX,35,E,H,,,,,M,3.0,,,,3.0,Y,Y,,Y,,,Y,Y,Y,Y,Y,Y,Y,,Y,Y,Y,,3.0,X,L3E,A3E,A3E,A3E,A3E,A3E,A3E,A3E,A3E,A3E,A3E,A3E,A2E,,A1E,I1D,I1D,L1D,,L1D,L1D,,,L,E,X,X,X,B,T,1,5.0,9.0,5.0,4,4,0,3,1.0,1,3


### Ordinal encoding

After looking at the columns we decide to ordinal encode the following:
['INCOME', 'WEALTH1','WEALTH2','DOMAIN2']

Since they are all already represented as numbers, we have only to change the datatype to float.

In [24]:
for col in ['INCOME', 'WEALTH1','WEALTH2','DOMAIN2']:
    train_cat[col] = train_cat[col].apply(lambda x: float(x))
    test_cat[col] = test_cat[col].apply(lambda x: float(x))

### OneHot encoding

Since we now have our ordinal encoded data as integers, we can easily split our dataframes up again to onehot encode the rest.

In [25]:
# Splitting the ordinal from the onehot
train_cat_ord = train_cat.select_dtypes(include = np.number)
train_cat_hot = train_cat.select_dtypes(include = object)

test_cat_ord = test_cat.select_dtypes(include = np.number)
test_cat_hot = test_cat.select_dtypes(include = object)

In [26]:
# Importing OneHotEncoder
from sklearn.preprocessing import OneHotEncoder

In [27]:
# Fitting the encoder
encoder = OneHotEncoder(handle_unknown='ignore').fit(train_cat_hot)

In [28]:
# Getting the column names for the later selection
column_name = encoder.get_feature_names_out(train_cat_hot.columns)

In [29]:
print(train_cat_hot.shape)
print(test_cat_hot.shape)

(76329, 79)
(19083, 79)


In [30]:
# Encoding, transforming to dataframe, assigning fitting column names, setting the index into right order
train_encoded = pd.DataFrame(encoder.transform(train_cat_hot).toarray(),columns = column_name, index=train_cat_hot.index)
test_encoded = pd.DataFrame(encoder.transform(test_cat_hot).toarray(),columns = column_name, index=test_cat_hot.index)

In [31]:
print(train_encoded.shape)
print(test_encoded.shape)

(76329, 2406)
(19083, 2406)


### Concatenating the prepared data

In [32]:
# I had to revert back to the scaling to ensure that all indexes are lined up

In [33]:
X_train = pd.concat([train_num_scaled,train_cat_ord,train_encoded], axis= 1)
X_test = pd.concat([test_num_scaled,test_cat_ord,test_encoded], axis= 1)

In [34]:
print(X_train.shape)
print(X_test.shape)

(76329, 2718)
(19083, 2718)


In [35]:
# Free memory
del(train_num_scaled)
del(train_cat_ord)
del(train_encoded)
del(train_cat_hot)

del(test_num_scaled)
del(test_cat_ord)
del(test_encoded)
del(test_cat_hot)

## - Fit a logistic regression model on the training data.
## - Check the accuracy on the test data.

In [36]:
# _We select our y, for our model it is TARGET_B
y_train = pd.DataFrame(y_train_all['TARGET_B'])
y_test = pd.DataFrame(y_test_all['TARGET_B'])

In [37]:
# Importing logistic regression
from sklearn.linear_model import LogisticRegression

In [38]:
classification = LogisticRegression(solver='lbfgs')
classification.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [39]:
predictions = classification.predict(X_test)

In [40]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, predictions)

array([[18135,     0],
       [  948,     0]], dtype=int64)

So, this is horrific, our model just put everything in the majority class, except for one value and even predicted that one false, looks like it is for feature selection!

# Resampling

In [41]:
# I already ran into memory issues when trying out Feature selection, because of the size of the
# encoded dataset, therefore I will try undersampling and SMOTE

#### Undersampling

In [43]:
from sklearn.utils import resample
# separate majority/minority classes
# I already ensured to keep the indexes the same during the encoding process
train = pd.concat([X_train, y_train],axis=1)
no = train[train['TARGET_B']==0]
yes = train[train['TARGET_B']==1]
del(train)

In [44]:
# undersample majority
no_undersampled = resample(no,
                                    replace=False,
                                    n_samples = len(yes),
                                    random_state=0)

In [46]:
# Check if both dataframes now have the same length
yes.shape

(3895, 2719)

In [47]:
no_undersampled.shape

(3895, 2719)

In [48]:
X_train_under = pd.concat([yes,no_undersampled],axis = 0)
X_train_under.shape

(7790, 2719)

In [49]:
# Of course we now have to do the X-y split again
y_under = X_train_under['TARGET_B']
X_train_under = X_train_under.drop(columns=['TARGET_B'])

In [50]:
# Clear memory
del(no_undersampled)
del(yes)
del(no)

# Feature selection

#### Creating a function for the models
Since we want to evaluade if the feature selection improved our models, we have to test after each step.
Therefore we put the modeling process into a function to not repeat infinately.

In [65]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import neighbors
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
def model_test(X_train,X_test,y_train,y_test):
    # Logistic regression
    # Trying out sag resolver, was reconmended for large datasets
    classification = LogisticRegression(random_state=0, solver='sag')
    classification.fit(X_train, y_train)
    predictions = classification.predict(X_test)
    print(confusion_matrix(y_test, predictions))
    print("LogRegression precision: ",precision_score(y_test,predictions))
    print("LogRegression recall: ",recall_score(y_test,predictions))
    print("LogRegression f1: ",f1_score(y_test,predictions))
    # Decision tree classifier
    # We for now stay with depth of 3 and see how the outcome is
    model = DecisionTreeClassifier(max_depth=3)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print(confusion_matrix(y_test, predictions))
    print("DecisionTree precision: ",precision_score(y_test,predictions))
    print("DecisionTree recall: ",recall_score(y_test,predictions))
    print("DecisionTree f1: ",f1_score(y_test,predictions))
    # KNN classifier
    # I first just stay with neighbors = 3, when we found the desired features, we can tweak it later
    clf = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance')
    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)
    print(confusion_matrix(y_test, predictions))
    print("KNN precision: ",precision_score(y_test,predictions))
    print("KNN recall: ",recall_score(y_test,predictions))
    print("KNN f1: ",f1_score(y_test,predictions))
    
    # Clear memory
    del(X_train)
    del(X_test)
    del(y_train)
    del(y_test)

In [52]:
# Clear some memory, ran into issues later
# del(alldata)
# del(X_train)
# del(X_test)
# del(y_train_all)
# del(y_test_all)

## Variance selection

In [53]:
from sklearn.feature_selection import VarianceThreshold
# We set the variance threshhold at 0.2, we could test higher ones later,
# but we have a lot of columns to eliminate.
var_threshold = 0.02
sel = VarianceThreshold(threshold=(var_threshold))

In [54]:
# We fit it with our encoded and scaled data
sel = sel.fit(X_train_under)

In [55]:
# We create a temporary dataframe with the dropped columns
XTr_temp = pd.DataFrame(sel.transform(X_train_under), index = X_train_under.index)
XTr_temp.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469
56317,0.784946,0.0,0.624862,0.060606,0.171717,0.0,0.929293,0.080808,0.979798,0.010101,0.313131,0.606061,0.585859,0.313131,0.212121,0.141414,0.0975,0.1255,0.555556,0.454545,0.050505,0.666667,0.525253,0.646465,0.040404,0.10101,0.232323,0.343434,0.616162,0.010101,0.282828,0.030303,0.060606,0.131313,0.010101,0.040404,0.494949,0.767677,0.410256,0.333333,0.272727,0.292929,0.333333,0.464646,0.545455,0.121212,0.464646,0.737374,0.676768,0.69697,0.393939,0.626263,1.0,0.363636,0.40404,0.717172,0.969697,0.484848,0.288462,0.222222,0.262626,0.464646,0.606061,0.40404,0.141414,0.686869,0.989899,0.020202,0.838384,0.238095,0.183333,0.035176,0.333333,0.274204,0.836066,3.0,5.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
58863,0.283154,0.484861,0.505155,0.171717,0.090909,1.0,0.0,0.0,0.484848,0.010101,0.686869,1.0,1.0,0.0,0.0,0.0,0.578167,0.565333,0.939394,0.070707,0.0,0.959596,0.868687,0.323232,0.919192,0.929293,0.939394,0.939394,0.949495,0.737374,0.787879,0.0,0.0,0.0,0.969697,0.969697,0.969697,0.969697,0.63515,0.020155,0.070707,0.191919,0.666667,0.363636,0.646465,0.505051,0.727273,0.89899,0.868687,0.363636,0.262626,1.0,1.0,0.171717,0.090909,0.40404,0.515152,0.919192,0.192308,0.030303,0.565657,0.676768,1.0,0.0,0.858586,0.090909,1.0,0.0,1.0,0.761905,0.516667,0.025126,0.3,0.816949,0.131148,1.0,5.0,5.0,9.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
67516,0.641577,0.412049,0.587629,0.181818,0.080808,0.0,0.0,1.0,0.929293,0.050505,0.393939,0.828283,0.818182,0.0,0.0,0.0,0.252333,0.262667,0.737374,0.272727,0.565657,0.707071,0.545455,0.59596,0.171717,0.515152,0.787879,0.929293,0.979798,0.040404,0.343434,0.0,0.171717,0.0,0.313131,0.515152,0.636364,0.747475,0.0,0.100775,0.181818,0.262626,0.555556,0.505051,0.505051,0.151515,0.353535,0.494949,0.494949,0.545455,0.454545,0.353535,1.0,0.181818,0.080808,0.535354,0.929293,0.606061,0.326923,0.070707,0.191919,0.69697,0.787879,0.222222,0.0,0.090909,0.0,0.949495,0.020202,0.52381,0.283333,0.045226,0.3,0.886295,0.672131,1.0,3.0,5.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
60863,0.354839,0.402163,0.597938,0.232323,0.393939,0.0,0.0,1.0,1.0,0.0,0.424242,0.777778,0.777778,0.0,0.0,0.0,0.062833,0.069833,0.89899,0.111111,0.070707,0.777778,0.69697,0.585859,0.0,0.0,0.010101,0.10101,0.313131,0.0,0.474747,0.0,0.212121,0.0,0.0,0.0,0.080808,0.282828,0.0,0.289922,0.222222,0.40404,0.383838,0.434343,0.575758,0.161616,0.646465,0.636364,0.636364,0.838384,0.89899,1.0,0.0,0.191919,0.292929,0.717172,0.979798,0.69697,0.442308,0.20202,0.333333,0.555556,0.656566,0.353535,0.020202,0.474747,0.747475,0.121212,0.0,0.238095,0.433333,0.497487,0.666667,0.517612,0.95082,3.0,6.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
87976,0.928315,0.0,0.624862,0.232323,0.444444,0.0,0.0,1.0,0.979798,0.010101,0.393939,0.676768,0.666667,0.040404,0.030303,0.030303,0.080167,0.091667,0.79798,0.212121,0.353535,0.757576,0.666667,0.515152,0.010101,0.030303,0.090909,0.222222,0.484848,0.0,0.292929,0.010101,0.282828,0.020202,0.010101,0.070707,0.181818,0.494949,0.205128,0.168992,0.434343,0.444444,0.323232,0.505051,0.505051,0.444444,0.464646,0.59596,0.545455,0.575758,0.414141,0.858586,0.939394,0.222222,0.474747,0.79798,0.989899,0.59596,0.326923,0.141414,0.323232,0.676768,0.838384,0.171717,0.212121,0.393939,0.717172,0.292929,0.272727,0.333333,0.066667,0.492462,0.266667,0.607685,1.0,3.0,2.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
20949,0.21147,0.216375,0.783505,0.262626,0.515152,1.0,0.0,0.0,0.949495,0.010101,0.212121,0.494949,0.464646,0.010101,0.0,0.0,0.59,0.613167,0.838384,0.171717,0.818182,0.575758,0.474747,0.464646,0.888889,0.959596,0.989899,1.0,1.0,0.707071,0.333333,0.0,0.494949,0.0,0.717172,0.79798,0.909091,0.929293,0.782051,0.023256,0.20202,0.414141,0.626263,0.484848,0.525253,0.070707,0.575758,0.686869,0.686869,0.838384,0.575758,0.858586,0.0,0.262626,0.515152,0.414141,0.939394,0.484848,0.480769,0.010101,0.111111,0.272727,0.777778,0.232323,0.919192,0.070707,1.0,0.0,0.989899,0.761905,0.383333,0.497487,0.166667,0.792629,0.360656,1.0,5.0,5.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
562,0.856631,0.648919,0.350515,0.636364,0.050505,1.0,0.0,0.0,0.838384,0.111111,0.676768,0.787879,0.69697,0.212121,0.212121,0.212121,0.150167,0.155,0.535354,0.474747,0.050505,0.89899,0.858586,0.59596,0.0,0.010101,0.232323,0.909091,1.0,0.0,0.707071,0.020202,0.0,0.20202,0.444444,0.515152,0.878788,1.0,0.632479,0.466667,0.121212,0.060606,0.626263,0.717172,0.292929,0.212121,0.575758,0.949495,0.888889,0.525253,0.424242,0.717172,0.0,0.636364,0.050505,0.212121,0.939394,0.686869,0.307692,0.0,0.10101,1.0,1.0,0.0,0.959596,0.050505,1.0,0.0,1.0,0.47619,0.183333,0.045226,0.166667,0.557147,0.639344,1.0,5.0,8.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
13461,0.928315,0.370855,0.628866,0.454545,0.191919,1.0,0.0,0.0,0.858586,0.111111,0.616162,0.969697,0.717172,0.040404,0.030303,0.020202,0.1335,0.133333,0.888889,0.121212,0.111111,0.848485,0.676768,0.555556,0.0,0.0,0.070707,0.616162,0.979798,0.0,0.636364,0.010101,0.0,0.0,0.707071,0.878788,0.939394,0.969697,0.286325,0.196899,0.080808,0.212121,0.373737,0.555556,0.454545,0.121212,0.747475,0.848485,0.79798,0.69697,0.474747,0.686869,1.0,0.454545,0.191919,0.252525,0.818182,0.757576,0.326923,0.070707,0.121212,0.767677,1.0,0.010101,0.0,1.0,1.0,0.0,1.0,0.380952,0.05,0.502513,0.266667,0.194705,0.131148,2.0,6.0,5.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
93347,0.21147,0.401751,0.597938,0.363636,0.505051,1.0,0.0,0.0,1.0,0.0,0.464646,0.989899,0.969697,0.010101,0.0,0.0,0.1335,0.154333,0.929293,0.080808,0.050505,0.777778,0.676768,0.585859,0.010101,0.080808,0.393939,0.535354,0.89899,0.0,0.636364,0.0,0.0,0.0,0.313131,0.59596,0.727273,0.868687,0.594017,0.379845,0.090909,0.282828,0.676768,0.131313,0.878788,0.292929,0.707071,0.767677,0.747475,0.646465,0.626263,1.0,1.0,0.363636,0.505051,0.787879,0.969697,0.79798,0.653846,0.020202,0.040404,0.333333,0.414141,0.59596,0.787879,0.222222,1.0,0.0,1.0,0.285714,0.316667,0.045226,0.033333,0.025629,0.295082,1.0,5.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
39016,0.21147,0.134706,0.85567,0.181818,0.505051,1.0,0.0,0.0,0.989899,0.0,0.393939,0.787879,0.69697,0.222222,0.212121,0.191919,0.273667,0.292333,0.868687,0.141414,0.414141,0.828283,0.757576,0.575758,0.262626,0.616162,0.959596,0.989899,1.0,0.030303,0.737374,0.060606,0.0,0.080808,0.575758,0.949495,0.959596,0.959596,0.884615,0.203101,0.070707,0.373737,0.717172,0.535354,0.474747,0.20202,0.636364,0.666667,0.656566,0.636364,0.505051,0.545455,1.0,0.181818,0.505051,0.090909,0.959596,0.707071,0.192308,0.050505,0.59596,1.0,1.0,0.0,0.010101,0.989899,1.0,0.010101,1.0,0.619048,0.5,0.502513,0.233333,0.235242,0.098361,1.0,4.0,7.0,9.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0


In [56]:
# To test we have to treat the test data the same way
Xte_temp = pd.DataFrame(sel.transform(X_test), index = X_test.index)

In [66]:
# We test our models, we put in the undersampled traindata X and y als well as the original test-y and the treated test-X
model_test(XTr_temp,Xte_temp,y_under, y_test)



[[10628  7507]
 [  405   543]]
LogRegression precision:  0.06745341614906833
LogRegression recall:  0.5727848101265823
LogRegression f1:  0.1206934874416537
[[8683 9452]
 [ 306  642]]
DecisionTree precision:  0.06360213988508025
DecisionTree recall:  0.6772151898734177
DecisionTree f1:  0.11628328201412788
[[9876 8259]
 [ 457  491]]
KNN precision:  0.056114285714285714
KNN recall:  0.5179324894514767
KNN f1:  0.1012579913384203


The results are much better and actually useful, we try again with a lower threshhold.

In [67]:
# Trying again with lower variance threshhold
var_threshold = 0.1
sel = VarianceThreshold(threshold=(var_threshold))
# We fit it with our encoded and scaled data
sel = sel.fit(X_train_under)
XTr_temp = pd.DataFrame(sel.transform(X_train_under), index = X_train_under.index)
Xte_temp = pd.DataFrame(sel.transform(X_test), index = X_test.index)
model_test(XTr_temp,Xte_temp,y_under, y_test)



[[10930  7205]
 [  395   553]]
LogRegression precision:  0.07128125805620004
LogRegression recall:  0.5833333333333334
LogRegression f1:  0.12703882379967837
[[10596  7539]
 [  404   544]]
DecisionTree precision:  0.06730174440183101
DecisionTree recall:  0.5738396624472574
DecisionTree f1:  0.12047392315358212
[[9601 8534]
 [ 466  482]]
KNN precision:  0.05346051464063886
KNN recall:  0.5084388185654009
KNN f1:  0.0967482938578884


The results got marginally worse, this seems to be the best we can get with this selection method.

In [None]:
del(sel)
del(XTr_temp)
del(Xte_temp)

#### Conclusion
I would have liked to get more in depth here and have a look at the selected columns, but time is runnign out and I have to work on my presentation,
so this will be all I can do for now.
I will just try out one more possibility with pca.

## Principal component analysis

In [68]:
from sklearn.decomposition import PCA
# We first try with 10 dimensions
pca = PCA(10)
# Fitting the pca transformer
pca = pca.fit(X_train_under)
# Transforming train and test data
XTr_temp = pd.DataFrame(pca.transform(X_train_under), index = X_train_under.index)
Xte_temp = pd.DataFrame(pca.transform(X_test), index = X_test.index)
# Testing the models
model_test(XTr_temp,Xte_temp,y_under, y_test)

[[10046  8089]
 [  364   584]]
LogRegression precision:  0.0673354087397671
LogRegression recall:  0.6160337552742616
LogRegression f1:  0.12140110175657416
[[11913  6222]
 [  493   455]]
DecisionTree precision:  0.06814437621686387
DecisionTree recall:  0.479957805907173
DecisionTree f1:  0.11934426229508198
[[9327 8808]
 [ 420  528]]
KNN precision:  0.056555269922879174
KNN recall:  0.5569620253164557
KNN f1:  0.10268378063010501


This looks a little better, we get more predictiosn irght on the minority class.

In [72]:
# We will try with more dimensions and see if the results get better.
pca = PCA(15)
# Fitting the pca transformer
pca = pca.fit(X_train_under)
# Transforming train and test data
XTr_temp = pd.DataFrame(pca.transform(X_train_under), index = X_train_under.index)
Xte_temp = pd.DataFrame(pca.transform(X_test), index = X_test.index)
# Testing the models
model_test(XTr_temp,Xte_temp,y_under, y_test)

[[10596  7539]
 [  383   565]]
LogRegression precision:  0.06971865745310958
LogRegression recall:  0.5959915611814346
LogRegression f1:  0.12483429076447194
[[11858  6277]
 [  487   461]]
DecisionTree precision:  0.06841792816859603
DecisionTree recall:  0.48628691983122363
DecisionTree f1:  0.1199583658600052
[[9451 8684]
 [ 417  531]]
KNN precision:  0.05762344004340749
KNN recall:  0.560126582278481
KNN f1:  0.10449670372921381


#### Conclusion
Not much improvement here, we predict over 50% of the minority class right, but need almost 50% of the predictions to do so, the model still needs much improvement!

My next step would be to still try out SMOTE resampling and Kbest, but I lack the time to get minto the individual columns.