In [768]:
import numpy as np
import pandas as pd
import gc

from scipy.signal import hilbert
from scipy.signal import hann
from scipy.signal import convolve
from scipy.signal import stft as stftfunc

from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import NuSVR, SVR

import xgboost as xgb
import lightgbm as lgb

from sklearn.model_selection import train_test_split, KFold

pd.options.display.precision = 15

In [2]:
df_train = pd.read_csv("train.csv")

In [302]:
def add_trend_feature(arr, abs_values=False):
    idx = np.array(range(len(arr)))
    if abs_values:
        arr = np.abs(arr)
    lr = LinearRegression()
    lr.fit(idx.reshape(-1, 1), arr)
    return lr.coef_[0]

def classic_sta_lta(x, length_sta, length_lta):
    
    sta = np.cumsum(x ** 2)

    # Convert to float
    sta = np.require(sta, dtype=np.float)

    # Copy for LTA
    lta = sta.copy()

    # Compute the STA and the LTA
    sta[length_sta:] = sta[length_sta:] - sta[:-length_sta]
    sta /= length_sta
    lta[length_lta:] = lta[length_lta:] - lta[:-length_lta]
    lta /= length_lta

    # Pad zeros
    sta[:length_lta - 1] = 0

    # Avoid division by zero by setting zero values to tiny float
    dtiny = np.finfo(0.0).tiny
    idx = lta < dtiny
    lta[idx] = dtiny

    return sta / lta

In [651]:
def ExtractFeatures(df, x, i):
    df.loc[i, '50000ave'] = x[:50000].mean()
    df.loc[i, '50000std'] = x[:50000].std()
    df.loc[i, '50000max'] = x[:50000].max()
    df.loc[i, '50000min'] = x[:50000].min()
    
    df.loc[i, '100000ave'] = x[:100000].mean()
    df.loc[i, '100000std'] = x[:100000].std()
    df.loc[i, '100000max'] = x[:100000].max()
    df.loc[i, '100000min'] = x[:100000].min()
    
    df.loc[i, '150000ave'] = x.mean()
    df.loc[i, '150000std'] = x.std()
    df.loc[i, '150000max'] = x.max()
    df.loc[i, '150000min'] = x.min()
    
    df.loc[i, 'mid50000ave'] = x[50000:100000].mean()
    df.loc[i, 'mid50000std'] = x[50000:100000].std()
    df.loc[i, 'mid50000max'] = x[50000:100000].max()
    df.loc[i, 'mid50000min'] = x[50000:100000].min()
    
    df.loc[i, 'last50000ave'] = x[-50000:].mean()
    df.loc[i, 'last50000std'] = x[-50000:].std()
    df.loc[i, 'last50000max'] = x[-50000:].max()
    df.loc[i, 'last50000min'] = x[-50000:].min()
    
    df.loc[i, 'last100000ave'] = x[-100000:].mean()
    df.loc[i, 'last100000std'] = x[-100000:].std()
    df.loc[i, 'last100000max'] = x[-100000:].max()
    df.loc[i, 'last100000min'] = x[-100000:].min()
       
    # Getting the short time fourier transformation information
    # The data was recorded at 4Mhz, hence the sampling frequency is 4_000_000
    
    f,t,stft_50000 = stft(x[:50000],fs=4_000_000)
    f,t,stft_100000 = stft(x[:100000],fs=4_000_000)
    f,t,stft_mid50000 = stft(x[50000:100000],fs=4_000_000)
    f,t,stft_last50000 = stft(x[-50000:],fs=4_000_000)
    f,t,stft_last100000 = stft(x[-100000:],fs=4_000_000)
    f,t,stft_150000 = stft(x,fs=4_000_000)
    
    # The mean of an stft is always 0
    # The max and min are just negatives of each other, so we only take the max
    df.loc[i, '50000stftstd'] = stft_50000.std()
    df.loc[i, '50000stftmax'] = np.real(stft_50000.max())
    
    df.loc[i, '100000stftstd'] = stft_100000.std()
    df.loc[i, '100000stftmax'] = np.real(stft_100000.max())
    
    df.loc[i, '150000stftstd'] = stft_150000.std()
    df.loc[i, '150000stftmax'] = np.real(stft_150000.max())
    
    df.loc[i, 'mid50000stftstd'] = stft_mid50000.std()
    df.loc[i, 'mid50000stftmax'] = np.real(stft_mid50000.max())
    
    df.loc[i, 'last50000stftstd'] = stft_last50000.std()
    df.loc[i, 'last50000stftmax'] = np.real(stft_last50000.max())
    
    df.loc[i, 'last100000stftstd'] = stft_last100000.std()
    df.loc[i, 'last100000stftmax'] = np.real(stft_last100000.max())
    
    df.loc[i, 'q95'] = np.quantile(x,0.95)
    df.loc[i, 'q99'] = np.quantile(x,0.99)
    df.loc[i, 'q05'] = np.quantile(x,0.05)
    df.loc[i, 'q01'] = np.quantile(x,0.01)
    
    df.loc[i, 'iqr'] = np.subtract(*np.percentile(x, [75, 25]))
    df.loc[i, 'q999'] = np.quantile(x,0.999)
    df.loc[i, 'q001'] = np.quantile(x,0.001)
    
    df.loc[i, 'max_to_min'] = x.max() / np.abs(x.min())
    df.loc[i, 'max_to_min_diff'] = x.max() - np.abs(x.min())
    df.loc[i, 'count_big'] = len(x[np.abs(x) > 500])
    df.loc[i, 'sum'] = x.sum()
    
    df.loc[i, 'trend'] = add_trend_feature(x)
    df.loc[i, 'abs_trend'] = add_trend_feature(x, abs_values=True)
    df.loc[i, 'abs_mean'] = np.abs(x).mean()
    df.loc[i, 'abs_std'] = np.abs(x).std()
    
    df.loc[i, 'mean_change_rate_first_50000'] = np.mean(np.nonzero((np.diff(x[:50000]) / x[:50000][:-1]))[0])
    df.loc[i, 'mean_change_rate_last_50000'] = np.mean(np.nonzero((np.diff(x[-50000:]) / x[-50000:][:-1]))[0])
    df.loc[i, 'mean_change_rate_first_10000'] = np.mean(np.nonzero((np.diff(x[:10000]) / x[:10000][:-1]))[0])
    df.loc[i, 'mean_change_rate_last_10000'] = np.mean(np.nonzero((np.diff(x[-10000:]) / x[-10000:][:-1]))[0])
    
    df.loc[i, 'mad'] = x.mad()
    df.loc[i, 'kurt'] = x.kurtosis()
    df.loc[i, 'skew'] = x.skew()
    df.loc[i, 'med'] = x.median()
    
    df.loc[i, 'Hilbert_mean'] = np.abs(hilbert(x)).mean()
    df.loc[i, 'Hann_window_mean'] = (convolve(x, hann(150), mode='same') / sum(hann(150))).mean()
    
    df.loc[i, 'classic_sta_lta1_mean'] = classic_sta_lta(x, 500, 10000).mean()
    df.loc[i, 'classic_sta_lta2_mean'] = classic_sta_lta(x, 5000, 100000).mean()
    df.loc[i, 'classic_sta_lta3_mean'] = classic_sta_lta(x, 3333, 6666).mean()
    df.loc[i, 'classic_sta_lta4_mean'] = classic_sta_lta(x, 10000, 25000).mean()
    
    df.loc[i, 'Moving_average_700_mean'] = x.rolling(window=700).mean().mean(skipna=True)
    df.loc[i, 'Moving_average_1500_mean'] = x.rolling(window=1500).mean().mean(skipna=True)
    df.loc[i, 'Moving_average_3000_mean'] = x.rolling(window=3000).mean().mean(skipna=True)
    df.loc[i, 'Moving_average_6000_mean'] = x.rolling(window=6000).mean().mean(skipna=True)
    
    ewma = pd.Series.ewm
    df.loc[i, 'exp_Moving_average_300_mean'] = (ewma(x, span=300).mean()).mean(skipna=True)
    df.loc[i, 'exp_Moving_average_3000_mean'] = ewma(x, span=3000).mean().mean(skipna=True)
    df.loc[i, 'exp_Moving_average_30000_mean'] = ewma(x, span=6000).mean().mean(skipna=True)
    
    no_of_std = 2
    df.loc[i, 'MA_700MA_std_mean'] = x.rolling(window=700).std().mean()
    df.loc[i,'MA_700MA_BB_high_mean'] = (df.loc[i, 'Moving_average_700_mean'] + no_of_std * df.loc[i, 'MA_700MA_std_mean']).mean()
    df.loc[i,'MA_700MA_BB_low_mean'] = (df.loc[i, 'Moving_average_700_mean'] - no_of_std * df.loc[i, 'MA_700MA_std_mean']).mean()
    df.loc[i, 'MA_400MA_std_mean'] = x.rolling(window=400).std().mean()
    df.loc[i,'MA_400MA_BB_high_mean'] = (df.loc[i, 'Moving_average_700_mean'] + no_of_std * df.loc[i, 'MA_400MA_std_mean']).mean()
    df.loc[i,'MA_400MA_BB_low_mean'] = (df.loc[i, 'Moving_average_700_mean'] - no_of_std * df.loc[i, 'MA_400MA_std_mean']).mean()
    df.loc[i, 'MA_1000MA_std_mean'] = x.rolling(window=1000).std().mean()
    
    for windows in [10, 100, 1000]:
        x_roll_std = x.rolling(windows).std().dropna().values
        x_roll_mean = x.rolling(windows).mean().dropna().values
        
        df.loc[i, 'ave_roll_std_' + str(windows)] = x_roll_std.mean()
        df.loc[i, 'std_roll_std_' + str(windows)] = x_roll_std.std()
        df.loc[i, 'max_roll_std_' + str(windows)] = x_roll_std.max()
        df.loc[i, 'min_roll_std_' + str(windows)] = x_roll_std.min()
        df.loc[i, 'q01_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.01)
        df.loc[i, 'q05_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.05)
        df.loc[i, 'q95_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.95)
        df.loc[i, 'q99_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.99)
        df.loc[i, 'av_change_abs_roll_std_' + str(windows)] = np.mean(np.diff(x_roll_std))
        df.loc[i, 'av_change_rate_roll_std_' + str(windows)] = np.mean(np.nonzero((np.diff(x_roll_std) / x_roll_std[:-1]))[0])
        df.loc[i, 'abs_max_roll_std_' + str(windows)] = np.abs(x_roll_std).max()
        
        df.loc[i, 'ave_roll_mean_' + str(windows)] = x_roll_mean.mean()
        df.loc[i, 'std_roll_mean_' + str(windows)] = x_roll_mean.std()
        df.loc[i, 'max_roll_mean_' + str(windows)] = x_roll_mean.max()
        df.loc[i, 'min_roll_mean_' + str(windows)] = x_roll_mean.min()
        df.loc[i, 'q01_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.01)
        df.loc[i, 'q05_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.05)
        df.loc[i, 'q95_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.95)
        df.loc[i, 'q99_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.99)
        df.loc[i, 'av_change_abs_roll_mean_' + str(windows)] = np.mean(np.diff(x_roll_mean))
        df.loc[i, 'av_change_rate_roll_mean_' + str(windows)] = np.mean(np.nonzero((np.diff(x_roll_mean) / x_roll_mean[:-1]))[0])
        df.loc[i, 'abs_max_roll_mean_' + str(windows)] = np.abs(x_roll_mean).max()
    
    return df

In [652]:
train_rows = 150_000

chunks = int(len(df_train) / train_rows)

y = np.zeros(shape=(chunks, 1))

X_train = pd.DataFrame(index=range(chunks), dtype=np.float64, columns=features)
y_train = pd.DataFrame(index=range(chunks), dtype=np.float64, columns=['ttf'])

for i in tqdm(range(chunks)):
    seg = df_train.iloc[i*train_rows:i*train_rows+train_rows]
    x = seg['acoustic_data']
    y = seg['time_to_failure'].values[-1]
                
    X_train = ExtractFeatures(X_train, x, i)
        
    y_train.loc[i, 'ttf'] = y






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  9%|███████                                                                        | 373/4194 [02:50<29:03,  2.19it/s]




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 17%|█████████████▌                                                                 | 722/4194 [05:35<29:51,  1.94it/s]

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 22%|█████████████████▌                                                             | 934/4194 [07:15<25:57,  2.09it/s]



















 22%|█████████████████▋                                                             | 938/4194 [07:17<26:20,  2.06it/s]
































































































 23%|██████████████████                                                             | 957/4194 [07:26<26:03,  2.07it/s]















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 49%|██████████████████████████████████████▎                                       | 2063/4194 [16:28<15:52,  2.24it/s]




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 67%|████████████████████████████████████████████████████▎                         | 2810/4194 [22:01<10:07,  2.28it/s]









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 74%|█████████████████████████████████████████████████████████▌                    | 3094/4194 [24:07<07:53,  2.32it/s]
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 79%|█████████████████████████████████████████████████████████████▉                | 3331/4194 [25:53<06:17,  2.28it/s]



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 99%|█████████████████████████████████████████████████████████████████████████████▎| 4157/4194 [32:01<00:16,  2.25it/s]


























































































































































































100%|██████████████████████████████████████████████████████████████████████████████| 4194/4194 [32:18<00:00,  2.29it/s]






In [653]:
X_train.head()

Unnamed: 0,50000ave,50000std,50000max,50000min,100000ave,100000std,100000max,100000min,150000ave,150000std,...,std_roll_mean_1000,max_roll_mean_1000,min_roll_mean_1000,q01_roll_mean_1000,q05_roll_mean_1000,q95_roll_mean_1000,q99_roll_mean_1000,av_change_abs_roll_mean_1000,av_change_rate_roll_mean_1000,abs_max_roll_mean_1000
0,4.9621,6.488551888989741,104.0,-98.0,5.01594,5.680238425232313,104.0,-98.0,4.884113333333334,5.101106130606765,...,0.295714650592636,5.629,3.896,4.072,4.379,5.338,5.484,-1.704697987e-06,74222.34344264082,5.629
1,4.6984,7.305232697765033,181.0,-154.0,4.69448,7.073147660255528,181.0,-154.0,4.725766666666667,6.588823781946801,...,0.231586699576737,5.667,3.412,4.233,4.345,5.066,5.223,-2.44966443e-06,74364.42647752765,5.667
2,4.7061,6.104836457315087,97.0,-87.0,4.81588,5.98173700837611,97.0,-87.0,4.906393333333333,6.967397033524431,...,0.267011771793293,5.957,4.055,4.239,4.446,5.344,5.486,1.140939597e-06,74594.9845844746,5.957
3,4.84364,6.238109475682244,75.0,-63.0,4.83663,7.475954731142466,197.0,-199.0,4.90224,6.922305187180113,...,0.266699817775971,5.858,3.722,4.304,4.433,5.317,5.453,-2.55033557e-06,74333.1436226136,5.858
4,4.89116,5.323829516204886,93.0,-96.0,4.94855,7.095942955066946,145.0,-125.0,4.90872,7.301110189758817,...,0.228004751350508,6.078,3.918,4.434,4.543,5.306,5.491,1.55033557e-06,74428.43596888422,6.078


In [654]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled_array = scaler.transform(X_train)

X_train_scaled = pd.DataFrame(X_train_scaled_array, columns=X_train.columns)

In [655]:
rf = RandomForestRegressor(max_depth=7, random_state=0, n_estimators=100)

rf.fit(X_train_scaled, y_train)

rf_preds = rf.predict(X_train_scaled)

mean_absolute_error(y_train, rf_preds)

  This is separate from the ipykernel package so we can avoid doing imports until


1.7522105316586065

# Reading in Test Data
<hr>

In [None]:
submission = pd.read_csv('sample_submission.csv', index_col='seg_id')

X_test = pd.DataFrame(columns=X_train.columns, dtype=np.float64, index=submission.index)

for seg_id in tqdm(X_test.index):
    seg = pd.read_csv('test/' + seg_id + '.csv')
    
    x = seg['acoustic_data']
    
    X_test = ExtractFeatures(X_test, x, seg_id)


In [353]:
X_test_scaled_array = scaler.transform(X_test)

X_test_scaled = pd.DataFrame(X_test_scaled_array, columns=X_train.columns)

# Model Building with 5 fold CV
<hr>

In [822]:
n_fold = 5

folds = KFold(n_splits=n_fold, shuffle=True, random_state=11)

xgb_params = {'eta': 0.05,
              'max_depth': 4,
              'subsample': 0.85,
              'alpha' : 0.2,
              'objective': 'reg:linear',
              'eval_metric': 'mae',
              'silent': True,
              'nthread': 4}

lgb_params = {'num_leaves': 11,
          'min_data_in_leaf': 30,
          'objective': 'huber',
          'max_depth': -1,
          'learning_rate': 0.01,
          "boosting": "gbdt",
          "bagging_freq": 4,
          "bagging_fraction": 0.8126672064208567,
          "bagging_seed": 11,
          "metric": 'mae',
          "verbosity": -1,
          'reg_alpha': 0.1302650970728192,
          'reg_lambda': 0.3603427518866501
         }

def xgb_CV(X, y, X_test, folds, params):
    
    prediction = np.zeros(len(X_test))
    scores = []
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
        
        train_data = xgb.DMatrix(data=X_train, label=y_train, feature_names=X.columns)
        valid_data = xgb.DMatrix(data=X_valid, label=y_valid, feature_names=X.columns)
        watchlist = [(train_data, 'train'), (valid_data, 'valid_data')]
        
        model = xgb.train(dtrain=train_data, num_boost_round=20000, evals=watchlist, \
                          early_stopping_rounds=200, verbose_eval=150, params=params)
        
        y_pred_valid = model.predict(xgb.DMatrix(X_valid, feature_names=X.columns), ntree_limit=model.best_ntree_limit)
        
        y_pred = model.predict(xgb.DMatrix(X_test, feature_names=X.columns), ntree_limit=model.best_ntree_limit)
        
        scores.append(mean_absolute_error(y_valid, y_pred_valid))
        
        prediction += y_pred
        
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
        
    prediction /= n_fold
    
    return prediction

def lgb_CV(X, y, X_test, folds, params):
    
    prediction = np.zeros(len(X_test))
    scores = []
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
                
        model = lgb.LGBMRegressor(**params, n_estimators = 50000)
        
        model.fit(X_train, y_train, 
                    eval_set=[(X_train, y_train), (X_valid, y_valid)], eval_metric='mae',
                    verbose=500, early_stopping_rounds=200)
        
        y_pred_valid = model.predict(X_valid)
        
        y_pred = model.predict(X_test, num_iteration=model.best_iteration_)
        
        scores.append(mean_absolute_error(y_valid, y_pred_valid))
        
        prediction += y_pred
        
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
        
    prediction /= n_fold
    
    return prediction

def rf_CV(X, y, X_test, folds):
    
    prediction = np.zeros(len(X_test))
    scores = []
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
                
        model = RandomForestRegressor(max_depth=7, random_state=0, n_estimators=100)
        
        model.fit(X_train, y_train)
        
        y_pred_valid = model.predict(X_valid)
        
        y_pred = model.predict(X_test)
        
        scores.append(mean_absolute_error(y_valid, y_pred_valid))
        
        prediction += y_pred
        
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
        
    prediction /= n_fold
    
    return prediction

def NuSVR_CV(X, y, X_test, folds):
    
    prediction = np.zeros(len(X_test))
    scores = []
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
                
        model = NuSVR(gamma='scale', nu=0.7, C=1.3, tol=0.05)
        
        model.fit(X_train, y_train)
        
        y_pred_valid = model.predict(X_valid)
        
        y_pred = model.predict(X_test)
        
        scores.append(mean_absolute_error(y_valid, y_pred_valid))
        
        prediction += y_pred
        
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
        
    prediction /= n_fold
    
    return prediction

def SVR_CV(X, y, X_test, folds):
    
    prediction = np.zeros(len(X_test))
    scores = []
    
    for fold_n, (train_index, valid_index) in enumerate(folds.split(X)):
        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
                
        model = SVR(gamma='scale', C=1.3, tol=0.05)
        
        model.fit(X_train, y_train)
        
        y_pred_valid = model.predict(X_valid)
        
        y_pred = model.predict(X_test)
        
        scores.append(mean_absolute_error(y_valid, y_pred_valid))
        
        prediction += y_pred
        
    print('CV mean score: {0:.4f}, std: {1:.4f}.'.format(np.mean(scores), np.std(scores)))
        
    prediction /= n_fold
    
    return prediction

In [823]:
print("NuSVR")
nusvr_preds = NuSVR_CV(X_train_scaled, y_train, X_test_scaled, folds)
print("*"*80)
print("SVR")
svr_preds = SVR_CV(X_train_scaled, y_train, X_test_scaled, folds)
print("*"*80)
print("Random Forest")
rf_preds = rf_CV(X_train_scaled, y_train, X_test_scaled, folds)
print("*"*80)
print("LGB")
lgb_preds = lgb_CV(X_train_scaled, y_train, X_test_scaled, folds, lgb_params)
print("*"*80)
print("XGB")
xgb_preds = xgb_CV(X_train_scaled, y_train, X_test_scaled, folds, xgb_params)
print("*"*80)

NuSVR


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


CV mean score: 2.0545, std: 0.0737.
********************************************************************************
SVR


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


CV mean score: 2.0600, std: 0.0773.
********************************************************************************
Random Forest




CV mean score: 2.0692, std: 0.0748.
********************************************************************************
LGB
Training until validation scores don't improve for 200 rounds.
[500]	training's l1: 2.00328	valid_1's l1: 2.1587
[1000]	training's l1: 1.83596	valid_1's l1: 2.06936
[1500]	training's l1: 1.73266	valid_1's l1: 2.06138
Early stopping, best iteration is:
[1508]	training's l1: 1.73121	valid_1's l1: 2.06114
Training until validation scores don't improve for 200 rounds.
[500]	training's l1: 2.01766	valid_1's l1: 2.11041
[1000]	training's l1: 1.84292	valid_1's l1: 2.01428
Early stopping, best iteration is:
[1276]	training's l1: 1.78167	valid_1's l1: 2.00671
Training until validation scores don't improve for 200 rounds.
[500]	training's l1: 1.99235	valid_1's l1: 2.18074
[1000]	training's l1: 1.81602	valid_1's l1: 2.13348
Early stopping, best iteration is:
[1236]	training's l1: 1.76362	valid_1's l1: 2.13215
Training until validation scores don't improve for 200 rounds.
[500]	

# Test Prediction

In [824]:
submission['time_to_failure'] = (rf_preds + lgb_preds + xgb_preds + nusvr_preds + svr_preds) / 5
submission.to_csv('submission.csv')

In [825]:
submission.head()

Unnamed: 0_level_0,time_to_failure
seg_id,Unnamed: 1_level_1
seg_00030f,2.931539963426096
seg_0012b5,5.426110903177053
seg_00184e,5.082739683184064
seg_003339,8.125672522686804
seg_0042cc,7.024048957011166
