<a href="https://colab.research.google.com/github/Himanshu-Dharma/secom_mpmd/blob/master/Secom_Presentation2_Himanshu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## SECOM

### Libraries

In [1]:
import pandas as pd 
import requests as rq
import numpy as np
import datetime as dt
#from sklearn import datasets
import seaborn as sn
import matplotlib.pyplot as plt
import scipy.stats as stats

#train/test
from sklearn.model_selection import train_test_split

#scaling
from sklearn.preprocessing import MinMaxScaler

# kNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import KNNImputer

# MICE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression

#Boruta
!pip install boruta
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy

from sklearn.metrics import accuracy_score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting boruta
  Downloading Boruta-0.3-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 1.0 MB/s 
Installing collected packages: boruta
Successfully installed boruta-0.3


### **Label File**

In [2]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/secom/secom_labels.data'
r = rq.get(url, allow_redirects=True)

open('secomlabels.csv', 'wb').write(r.content)

40638

In [3]:
headers = ['result','datetime']
secom_label = pd.read_csv('secomlabels.csv', encoding='latin-1', sep=' ',  names=headers, index_col=False, header=None)
df = pd.DataFrame(secom_label)

In [4]:
df

Unnamed: 0,result,datetime
0,-1,19/07/2008 11:55:00
1,-1,19/07/2008 12:32:00
2,1,19/07/2008 13:17:00
3,-1,19/07/2008 14:43:00
4,-1,19/07/2008 15:22:00
...,...,...
1562,-1,16/10/2008 15:13:00
1563,-1,16/10/2008 20:49:00
1564,-1,17/10/2008 05:26:00
1565,-1,17/10/2008 06:01:00


In [5]:
df_new = df
df_new['date'] = pd.to_datetime(df['datetime']).dt.date
df_new['time'] = pd.to_datetime(df['datetime']).dt.time
df_new.drop('datetime', inplace=True, axis=1)
print(df)

      result        date      time
0         -1  2008-07-19  11:55:00
1         -1  2008-07-19  12:32:00
2          1  2008-07-19  13:17:00
3         -1  2008-07-19  14:43:00
4         -1  2008-07-19  15:22:00
...      ...         ...       ...
1562      -1  2008-10-16  15:13:00
1563      -1  2008-10-16  20:49:00
1564      -1  2008-10-17  05:26:00
1565      -1  2008-10-17  06:01:00
1566      -1  2008-10-17  06:07:00

[1567 rows x 3 columns]


In [6]:
df_new[df_new.duplicated(['date','time'], keep=False)]

Unnamed: 0,result,date,time
31,-1,2008-07-27,22:28:00
32,-1,2008-07-27,22:28:00
114,-1,2008-05-08,07:12:00
115,1,2008-05-08,07:12:00
285,-1,2008-08-19,05:11:00
...,...,...,...
1518,-1,2008-10-15,02:40:00
1545,-1,2008-10-16,02:16:00
1546,-1,2008-10-16,02:16:00
1551,-1,2008-10-16,04:02:00


### **Data File**

In [7]:
url1 = 'https://archive.ics.uci.edu/ml/machine-learning-databases/secom/secom.data'
r1 = rq.get(url1, allow_redirects=True)

open('secomdata.csv', 'wb').write(r1.content)

5389983

In [8]:
header_data = ['Feature{}'.format(x) for x in range(1,591)]
secom_data = pd.read_csv('secomdata.csv', encoding='latin-1',sep=' ', names=header_data, index_col=False, header=None)
df_data = pd.DataFrame(secom_data)
df_data

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Feature10,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
0,3030.93,2564.00,2187.7333,1411.1265,1.3602,100.0,97.6133,0.1242,1.5005,0.0162,...,,,0.5005,0.0118,0.0035,2.3630,,,,
1,3095.78,2465.14,2230.4222,1463.6606,0.8294,100.0,102.3433,0.1247,1.4966,-0.0005,...,0.0060,208.2045,0.5019,0.0223,0.0055,4.4447,0.0096,0.0201,0.0060,208.2045
2,2932.61,2559.94,2186.4111,1698.0172,1.5102,100.0,95.4878,0.1241,1.4436,0.0041,...,0.0148,82.8602,0.4958,0.0157,0.0039,3.1745,0.0584,0.0484,0.0148,82.8602
3,2988.72,2479.90,2199.0333,909.7926,1.3204,100.0,104.2367,0.1217,1.4882,-0.0124,...,0.0044,73.8432,0.4990,0.0103,0.0025,2.0544,0.0202,0.0149,0.0044,73.8432
4,3032.24,2502.87,2233.3667,1326.5200,1.5334,100.0,100.3967,0.1235,1.5031,-0.0031,...,,,0.4800,0.4766,0.1045,99.3032,0.0202,0.0149,0.0044,73.8432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,2899.41,2464.36,2179.7333,3085.3781,1.4843,100.0,82.2467,0.1248,1.3424,-0.0045,...,0.0047,203.1720,0.4988,0.0143,0.0039,2.8669,0.0068,0.0138,0.0047,203.1720
1563,3052.31,2522.55,2198.5667,1124.6595,0.8763,100.0,98.4689,0.1205,1.4333,-0.0061,...,,,0.4975,0.0131,0.0036,2.6238,0.0068,0.0138,0.0047,203.1720
1564,2978.81,2379.78,2206.3000,1110.4967,0.8236,100.0,99.4122,0.1208,,,...,0.0025,43.5231,0.4987,0.0153,0.0041,3.0590,0.0197,0.0086,0.0025,43.5231
1565,2894.92,2532.01,2177.0333,1183.7287,1.5726,100.0,98.7978,0.1213,1.4622,-0.0072,...,0.0075,93.4941,0.5004,0.0178,0.0038,3.5662,0.0262,0.0245,0.0075,93.4941


### **Merged Dataset**

In [9]:
merged_df = pd.merge(df, df_data, left_index=True, right_index=True)
merged_df

Unnamed: 0,result,date,time,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
0,-1,2008-07-19,11:55:00,3030.93,2564.00,2187.7333,1411.1265,1.3602,100.0,97.6133,...,,,0.5005,0.0118,0.0035,2.3630,,,,
1,-1,2008-07-19,12:32:00,3095.78,2465.14,2230.4222,1463.6606,0.8294,100.0,102.3433,...,0.0060,208.2045,0.5019,0.0223,0.0055,4.4447,0.0096,0.0201,0.0060,208.2045
2,1,2008-07-19,13:17:00,2932.61,2559.94,2186.4111,1698.0172,1.5102,100.0,95.4878,...,0.0148,82.8602,0.4958,0.0157,0.0039,3.1745,0.0584,0.0484,0.0148,82.8602
3,-1,2008-07-19,14:43:00,2988.72,2479.90,2199.0333,909.7926,1.3204,100.0,104.2367,...,0.0044,73.8432,0.4990,0.0103,0.0025,2.0544,0.0202,0.0149,0.0044,73.8432
4,-1,2008-07-19,15:22:00,3032.24,2502.87,2233.3667,1326.5200,1.5334,100.0,100.3967,...,,,0.4800,0.4766,0.1045,99.3032,0.0202,0.0149,0.0044,73.8432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,-1,2008-10-16,15:13:00,2899.41,2464.36,2179.7333,3085.3781,1.4843,100.0,82.2467,...,0.0047,203.1720,0.4988,0.0143,0.0039,2.8669,0.0068,0.0138,0.0047,203.1720
1563,-1,2008-10-16,20:49:00,3052.31,2522.55,2198.5667,1124.6595,0.8763,100.0,98.4689,...,,,0.4975,0.0131,0.0036,2.6238,0.0068,0.0138,0.0047,203.1720
1564,-1,2008-10-17,05:26:00,2978.81,2379.78,2206.3000,1110.4967,0.8236,100.0,99.4122,...,0.0025,43.5231,0.4987,0.0153,0.0041,3.0590,0.0197,0.0086,0.0025,43.5231
1565,-1,2008-10-17,06:01:00,2894.92,2532.01,2177.0333,1183.7287,1.5726,100.0,98.7978,...,0.0075,93.4941,0.5004,0.0178,0.0038,3.5662,0.0262,0.0245,0.0075,93.4941


In [10]:
feature_df = merged_df.drop(['result','date','time'], axis=1)
feature_df

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Feature10,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
0,3030.93,2564.00,2187.7333,1411.1265,1.3602,100.0,97.6133,0.1242,1.5005,0.0162,...,,,0.5005,0.0118,0.0035,2.3630,,,,
1,3095.78,2465.14,2230.4222,1463.6606,0.8294,100.0,102.3433,0.1247,1.4966,-0.0005,...,0.0060,208.2045,0.5019,0.0223,0.0055,4.4447,0.0096,0.0201,0.0060,208.2045
2,2932.61,2559.94,2186.4111,1698.0172,1.5102,100.0,95.4878,0.1241,1.4436,0.0041,...,0.0148,82.8602,0.4958,0.0157,0.0039,3.1745,0.0584,0.0484,0.0148,82.8602
3,2988.72,2479.90,2199.0333,909.7926,1.3204,100.0,104.2367,0.1217,1.4882,-0.0124,...,0.0044,73.8432,0.4990,0.0103,0.0025,2.0544,0.0202,0.0149,0.0044,73.8432
4,3032.24,2502.87,2233.3667,1326.5200,1.5334,100.0,100.3967,0.1235,1.5031,-0.0031,...,,,0.4800,0.4766,0.1045,99.3032,0.0202,0.0149,0.0044,73.8432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,2899.41,2464.36,2179.7333,3085.3781,1.4843,100.0,82.2467,0.1248,1.3424,-0.0045,...,0.0047,203.1720,0.4988,0.0143,0.0039,2.8669,0.0068,0.0138,0.0047,203.1720
1563,3052.31,2522.55,2198.5667,1124.6595,0.8763,100.0,98.4689,0.1205,1.4333,-0.0061,...,,,0.4975,0.0131,0.0036,2.6238,0.0068,0.0138,0.0047,203.1720
1564,2978.81,2379.78,2206.3000,1110.4967,0.8236,100.0,99.4122,0.1208,,,...,0.0025,43.5231,0.4987,0.0153,0.0041,3.0590,0.0197,0.0086,0.0025,43.5231
1565,2894.92,2532.01,2177.0333,1183.7287,1.5726,100.0,98.7978,0.1213,1.4622,-0.0072,...,0.0075,93.4941,0.5004,0.0178,0.0038,3.5662,0.0262,0.0245,0.0075,93.4941


In [11]:
# correlations 

matrix = feature_df.corr()
matrix

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Feature10,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
Feature1,1.000000,-0.145071,0.004775,-0.007655,-0.011047,,0.002281,0.031510,-0.052731,0.009052,...,-0.070137,-0.028380,0.000225,0.023469,0.019921,0.023605,0.018472,-0.025909,-0.028196,0.004177
Feature2,-0.145071,1.000000,0.005802,-0.007603,-0.001641,,-0.025702,-0.012084,0.031321,0.024015,...,0.073211,0.083463,0.043690,0.002905,-0.001264,0.002273,-0.009417,0.017290,0.010134,0.044834
Feature3,0.004775,0.005802,1.000000,0.298935,0.095891,,-0.136225,-0.273970,0.023609,0.016291,...,-0.018721,-0.010759,-0.006061,0.015711,0.018237,0.015765,-0.025548,-0.029479,-0.030943,-0.033226
Feature4,-0.007655,-0.007603,0.298935,1.000000,-0.058483,,-0.685835,0.138290,-0.103656,0.068998,...,-0.057051,-0.096619,0.009045,0.025461,0.024754,0.026043,0.034779,-0.039309,-0.033780,-0.081157
Feature5,-0.011047,-0.001641,0.095891,-0.058483,1.000000,,-0.074368,-0.916410,-0.026035,0.054619,...,-0.041290,0.088327,0.045361,-0.001301,-0.001598,-0.001617,-0.044016,-0.031145,-0.026204,0.051428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Feature586,0.023605,0.002273,0.015765,0.026043,-0.001617,,-0.039569,0.010345,0.017930,-0.036132,...,-0.000395,-0.000673,-0.197363,0.999890,0.995342,1.000000,0.002744,-0.002931,-0.002531,-0.003801
Feature587,0.018472,-0.009417,-0.025548,0.034779,-0.044016,,-0.041296,0.058171,0.010436,0.033759,...,0.249926,-0.475397,-0.016726,0.002257,0.001606,0.002744,1.000000,0.167913,0.164238,-0.486559
Feature588,-0.025909,0.017290,-0.029479,-0.039309,-0.031145,,0.034184,-0.021472,0.022853,0.059341,...,0.975470,0.396369,-0.024481,-0.002650,-0.002498,-0.002931,0.167913,1.000000,0.974276,0.390813
Feature589,-0.028196,0.010134,-0.030943,-0.033780,-0.026204,,0.032359,-0.020962,0.026261,0.060803,...,1.000000,0.379167,-0.020712,-0.002261,-0.001957,-0.002531,0.164238,0.974276,1.000000,0.389211


### **Function Definitons**

In [None]:
# Min Max Scaling 
def scaler(X):
    scaler = MinMaxScaler()
    scaler.fit(X)
    return scaler.transform(X)

In [12]:
# KNN imputer

def knn_impute(X):
  impute_knn = KNNImputer(n_neighbors=5)
  return impute_knn.fit_transform(X)


In [13]:
# MICE imputation

def mice_impute(X):
  lr = LinearRegression()
  impute_mice = IterativeImputer(estimator = lr, verbose = 2, max_iter = 5, tol = 1e-10, imputation_order = "roman")
  return impute_mice.fit_transform(X)

In [14]:
# Remove features with 55% missing values

def remove_na(X):
    threshold = int(0.55*X.shape[0])
    df4 = pd.DataFrame(X.isna().sum())
    rem_col = list(df4[df4[0]>=threshold].index)
    return X.drop(columns = rem_col, axis = 1)
  

In [15]:
# select columns having constant values 

def remove_constant_value_features(X):
    return [e for e in X.columns if X[e].nunique() == 1]

### **Train test Split**

In [16]:
df_non_faulty = merged_df[merged_df['result']== -1]
df_faulty = merged_df[merged_df['result']== 1]

In [17]:
faulty_train, faulty_test = train_test_split(df_non_faulty, test_size = 0.2)
nfaulty_train, nfaulty_test = train_test_split(df_faulty, test_size = 0.2)

test_data = pd.concat([faulty_test, nfaulty_test])
train_data = pd.concat([faulty_train, nfaulty_test])

In [18]:
test_data['result'].value_counts()
test_data.shape

(314, 593)

In [19]:
train_data['result'].value_counts()
train_data

Unnamed: 0,result,date,time,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
1282,-1,2008-03-10,22:08:00,2927.71,2525.39,2199.6556,1140.3983,1.3369,100.0,103.0967,...,,,0.5055,0.0195,0.0048,3.8618,0.0184,0.0148,0.0054,80.1759
1368,-1,2008-06-10,16:42:00,2916.12,2806.91,2231.0555,1303.5386,0.9751,100.0,95.7878,...,,,0.5028,0.0136,0.0036,2.7137,0.0331,0.0469,0.0151,141.6245
411,-1,2008-08-22,09:29:00,2933.88,2488.49,2178.6889,1657.3518,1.6603,100.0,100.8022,...,,,0.4999,0.0115,0.0027,2.2998,0.0145,0.0213,0.0074,146.5131
580,-1,2008-08-30,11:14:00,2977.46,2497.09,2183.3444,1111.4499,1.5548,100.0,97.5311,...,0.0083,65.1043,0.4999,0.0100,0.0030,2.0027,0.0332,0.0216,0.0083,65.1043
658,-1,2008-02-09,03:35:00,3034.55,2473.39,2233.1556,1434.9983,1.5188,100.0,102.6611,...,,,0.5060,0.0134,0.0032,2.6485,0.0140,0.0112,0.0038,80.4663
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
327,1,2008-08-20,09:17:00,2954.36,2559.27,2238.5444,1659.1424,0.9010,100.0,99.3100,...,0.0034,46.9253,0.4972,0.0260,0.0056,5.2369,0.0226,0.0106,0.0034,46.9253
709,1,2008-03-09,01:15:00,3026.32,2485.81,2205.7222,906.9522,1.3443,100.0,105.6600,...,,,0.5025,0.0131,0.0030,2.6053,0.0161,0.0350,0.0105,217.1506
448,1,2008-08-24,13:03:00,2942.41,2523.71,2207.0444,1269.6078,1.7571,100.0,97.0189,...,,,0.5017,0.0161,0.0040,3.1998,0.0235,0.0355,0.0099,150.7761
282,1,2008-08-19,03:59:00,3008.84,2522.90,2177.3222,1089.3655,1.3101,100.0,101.1478,...,,,0.5031,0.0153,0.0040,3.0408,0.0121,0.0169,0.0054,139.8330


In [20]:
train_data

Unnamed: 0,result,date,time,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
1282,-1,2008-03-10,22:08:00,2927.71,2525.39,2199.6556,1140.3983,1.3369,100.0,103.0967,...,,,0.5055,0.0195,0.0048,3.8618,0.0184,0.0148,0.0054,80.1759
1368,-1,2008-06-10,16:42:00,2916.12,2806.91,2231.0555,1303.5386,0.9751,100.0,95.7878,...,,,0.5028,0.0136,0.0036,2.7137,0.0331,0.0469,0.0151,141.6245
411,-1,2008-08-22,09:29:00,2933.88,2488.49,2178.6889,1657.3518,1.6603,100.0,100.8022,...,,,0.4999,0.0115,0.0027,2.2998,0.0145,0.0213,0.0074,146.5131
580,-1,2008-08-30,11:14:00,2977.46,2497.09,2183.3444,1111.4499,1.5548,100.0,97.5311,...,0.0083,65.1043,0.4999,0.0100,0.0030,2.0027,0.0332,0.0216,0.0083,65.1043
658,-1,2008-02-09,03:35:00,3034.55,2473.39,2233.1556,1434.9983,1.5188,100.0,102.6611,...,,,0.5060,0.0134,0.0032,2.6485,0.0140,0.0112,0.0038,80.4663
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
327,1,2008-08-20,09:17:00,2954.36,2559.27,2238.5444,1659.1424,0.9010,100.0,99.3100,...,0.0034,46.9253,0.4972,0.0260,0.0056,5.2369,0.0226,0.0106,0.0034,46.9253
709,1,2008-03-09,01:15:00,3026.32,2485.81,2205.7222,906.9522,1.3443,100.0,105.6600,...,,,0.5025,0.0131,0.0030,2.6053,0.0161,0.0350,0.0105,217.1506
448,1,2008-08-24,13:03:00,2942.41,2523.71,2207.0444,1269.6078,1.7571,100.0,97.0189,...,,,0.5017,0.0161,0.0040,3.1998,0.0235,0.0355,0.0099,150.7761
282,1,2008-08-19,03:59:00,3008.84,2522.90,2177.3222,1089.3655,1.3101,100.0,101.1478,...,,,0.5031,0.0153,0.0040,3.0408,0.0121,0.0169,0.0054,139.8330


### **Train data**

In [None]:
#remove 55% 

remove_na(train_data)

In [None]:
newdff = remove_constant_value_features(train_data)
len(newdff)

116

In [None]:
# Remove featues with constant values

constant_value_colummns = remove_constant_value_features(train_data)

new_df_columns = [e for e in train_data.columns if e not in constant_value_colummns]
new_df_train = train_data[new_df_columns]
new_df_train




In [None]:
train_data_df = new_df_train.drop(['result','date','time'], axis = 1)
train_data_df

#Use train_data_df for further operations

**imputations on train data**

In [24]:
X_train = knn_impute(train_data_df)

### Test data

In [None]:
test_data

In [None]:
test_data
y_test = test_data['result'].values
X_test1 = test_data.drop(['result','time','date'], axis = 1)

In [None]:
remove_na_xtest = remove_na(X_test1)

In [None]:
new_df_columns = [e for e in X_test1.columns if e not in constant_value_colummns]
X_test = X_test1[new_df_columns]
X_test

Unnamed: 0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature7,Feature8,Feature9,Feature10,Feature11,...,Feature581,Feature582,Feature583,Feature584,Feature585,Feature586,Feature587,Feature588,Feature589,Feature590
256,3073.67,2349.48,2244.1111,1676.7316,0.9197,100.8067,0.1204,1.4596,0.0034,0.0057,...,,,0.5018,0.0105,0.0032,2.0884,0.0135,0.0114,0.0043,84.4337
1112,3135.88,2578.76,2227.9778,3619.7397,1.6656,87.2200,0.1239,1.3849,-0.0060,-0.0005,...,0.0144,156.4628,0.5012,0.0081,0.0023,1.6201,0.0257,0.0402,0.0144,156.4628
35,3040.72,2477.35,2191.6667,1153.9011,1.2569,100.6767,0.1210,1.3475,-0.0152,0.0115,...,,,0.5013,0.0138,0.0035,2.7468,0.0268,0.0199,0.0070,74.1555
1255,2970.86,2510.19,2236.1111,1546.5931,2.0300,90.4233,0.1224,1.3582,-0.0364,0.0032,...,,,0.5001,0.0117,0.0032,2.3479,0.0243,0.0100,0.0031,41.0782
1321,2907.65,2516.34,2208.9000,934.7558,1.9469,119.3544,0.1222,1.6193,-0.0127,-0.0164,...,0.0053,91.4264,0.4966,0.0124,0.0032,2.4984,0.0230,0.0210,0.0053,91.4264
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,3069.31,2448.37,2174.7555,1206.3506,1.4202,104.2622,0.1246,1.4372,-0.0047,-0.0148,...,0.0091,66.1687,0.4994,0.0099,0.0028,1.9755,0.0406,0.0268,0.0091,66.1687
244,3107.79,2470.81,2280.8222,1125.7334,0.6815,101.9111,0.1221,1.4454,-0.0152,-0.0087,...,,,0.4958,0.0112,0.0029,2.2523,0.0154,0.0271,0.0104,176.0329
189,2938.03,2480.90,2138.8778,1046.6043,1.2559,103.3400,0.1240,1.4701,-0.0056,0.0107,...,,,0.4915,0.0112,0.0031,2.2701,0.0104,0.0221,0.0067,211.6182
14,2963.97,2629.48,2224.6222,947.7739,1.2924,104.8489,0.1197,1.4474,0.0144,-0.0119,...,0.0084,142.9080,0.5077,0.0094,0.0026,1.8483,0.0202,0.0289,0.0084,142.9080


### boruta

In [None]:
y_train = train_data['result'].values
y_train

In [26]:
model = RandomForestClassifier(
   n_jobs = -1, 
   max_depth = 5,
   n_estimators=500,
   random_state=1
)

feat_selector = BorutaPy(
    verbose=2,
    estimator=model,
    n_estimators='auto',
    max_iter=100,
    random_state=42,
)



In [None]:
feat_selector.fit(np.array(X_train), np.array(y_train))

In [None]:
feat_selector.support_

In [None]:
feat_selector.support_weak_

In [None]:
feat_selector.ranking_

In [None]:
X_filtered = feat_selector.transform(X_train)
X_filtered

In [None]:
feature_ranks = list(zip(feature_names, 
                         feat_selector.ranking_, 
                         feat_selector.support_))