In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis
from scipy.stats import skew
import seaborn as sns
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV,KFold

In [3]:
train1 = pd.read_csv(r"F:\project\train.csv",dtype={'acoustic_data':np.int16, 'time_to_failure':np.float64},nrows=153600000)

In [5]:
train2 = pd.read_csv(r"F:\project\train.csv",dtype={'acoustic_data':np.int16, 'time_to_failure':np.float64},skiprows=153600000,nrows=153600000,names=['acoustic_data','time_to_failure'])
#train2.columns['acoustic_data','time_to_failure']


In [6]:
train3 = pd.read_csv(r"F:\project\train.csv",dtype={'acoustic_data':np.int16, 'time_to_failure':np.float64},skiprows=153600000*2,nrows=153600000,names=['acoustic_data','time_to_failure'])

In [7]:
train4 = pd.read_csv(r"F:\project\train.csv",dtype={'acoustic_data':np.int16, 'time_to_failure':np.float64},skiprows=153600000*3,nrows=153600000,names=['acoustic_data','time_to_failure'])

In [72]:
rows = 150000
segments = int(np.floor(train1.shape[0] / rows))
print("Number of segments: ", segments)
print(type(segments))

X_train = pd.DataFrame(index = range(segments), dtype = np.float64, columns = ['ave', 'std', 'max', 'min','sum','skew','kurt','var','quantile_1','quantile_2','quantile_3','median','rstd','rmax','rmin','rsum','rmedian','rvar','rcov','change'])
y_train = pd.DataFrame(index = range(segments), dtype= np.float64, columns = ['time_to_failure'])

Number of segments:  1024
<class 'int'>


In [73]:
def add_features(data,n):
    for i in range(segments):
        # Selecting the rows from each segment in the data.
        seg = data.iloc[i*rows : (i*rows)+rows]
        i = i+1024*n
        
        x = seg['acoustic_data'].values
        y = seg['time_to_failure'].values
        
        y_train.loc[i,'time_to_failure'] = y.mean()

        X_train.loc[i, 'std'] = x.std()
        X_train.loc[i, 'max'] = x.max()
        X_train.loc[i, 'min'] = x.min()
        X_train.loc[i, 'sum'] = x.sum()
        X_train.loc[i, 'skew'] = skew(x)
        X_train.loc[i, 'kurt'] = kurtosis(x)
        X_train.loc[i,'var'] = x.var()
        X_train.loc[i,'quantile_1'] = seg['acoustic_data'].quantile(0.25)
        X_train.loc[i,'quantile_2'] = seg['acoustic_data'].quantile(0.50)
        X_train.loc[i,'quantile_3'] = seg['acoustic_data'].quantile(0.75)
        X_train.loc[i,'median'] = seg['acoustic_data'].median()
        
        rmean = pd.DataFrame(seg['acoustic_data'].rolling(window=3).mean())
        rmean=rmean.fillna(method='backfill',axis=0)
        X_train.loc[i, 'ave'] = rmean['acoustic_data'].mean()
        
        rstd = pd.DataFrame(seg['acoustic_data'].rolling(window=3).std())
        rstd = rstd.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rstd'] = rstd['acoustic_data'].mean()
        
        rmax = pd.DataFrame(seg['acoustic_data'].rolling(window=3).max())
        rmax = rmax.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rmax'] = rmax['acoustic_data'].mean()
        
        rmin = pd.DataFrame(seg['acoustic_data'].rolling(window=3).min())
        rmin = rmin.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rmin'] = rmin['acoustic_data'].mean()
        
        rsum = pd.DataFrame(seg['acoustic_data'].rolling(window=3).sum())
        rsum = rsum.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rsum'] = rsum['acoustic_data'].mean()
        
        rmedian = pd.DataFrame(seg['acoustic_data'].rolling(window=3).median())
        rmedian = rmedian.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rmedian'] = rmedian['acoustic_data'].mean()
        
        rvar = pd.DataFrame(seg['acoustic_data'].rolling(window=3).var())
        rvar = rvar.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rvar'] = rvar['acoustic_data'].mean()
        
        rcov = pd.DataFrame(seg['acoustic_data'].rolling(window=3).cov())
        rcov = rcov.fillna(method='backfill',axis=0)
        X_train.loc[i, 'rcov'] = rcov['acoustic_data'].mean()
        
        stepsize = np.diff(seg['acoustic_data'])
        seg = seg.drop(seg.index[len(seg)-1])
        X_train.loc[i,'change'] = stepsize.mean()
       


In [74]:
add_features(train1,0)

In [75]:
print(X_train)

           ave       std    max    min       sum      skew        kurt  \
0     4.884158  5.101089  104.0  -98.0  732617.0 -0.024061   33.661319   
1     4.725778  6.588802  181.0 -154.0  708865.0  0.390557   98.755185   
2     4.906409  6.967374  140.0 -106.0  735959.0  0.217388   33.554053   
3     4.902171  6.922282  197.0 -199.0  735336.0  0.757270  116.544247   
4     4.908691  7.301086  145.0 -126.0  736308.0  0.064530   52.976099   
...        ...       ...    ...    ...       ...       ...         ...   
1019  4.565758  5.317743  127.0 -110.0  684857.0  0.232706   43.891363   
1020  4.692956  4.717642  100.0  -75.0  703946.0  0.163081   32.803570   
1021  4.609582  5.587461  161.0 -152.0  691420.0  0.908637  121.332117   
1022  4.708453  4.451870   75.0  -58.0  706265.0  0.046368   18.597334   
1023  4.643924  4.568277  111.0  -85.0  696589.0  0.157422   49.859972   

            var  quantile_1  quantile_2  quantile_3  median      rstd  \
0     26.021110         3.0         5.

In [76]:
print(X_train.shape)
print(y_train.shape)

(1024, 20)
(1024, 1)


In [77]:
print(X_train.isna().sum().any())

False


In [78]:
add_features(train2,1)

In [79]:
add_features(train3,2)

In [80]:
add_features(train4,3)

In [101]:
print(X_train.shape)
print(y_train.shape)

pandas.core.series.Series

In [82]:
X_train.isna().sum().any()

False

In [102]:
X_train.to_csv(r"F:\project\X_train_final.csv",index=False)
y_train = pd.DataFrame(y_train)
y_train.to_csv(r"F:\project\y_train_final.csv",index=False)

In [85]:
submission = pd.read_csv(r"D:\Project Dataset\LANL-Earthquake-Prediction\sample_submission.csv", index_col='seg_id')
submission.head()

Unnamed: 0_level_0,time_to_failure
seg_id,Unnamed: 1_level_1
seg_00030f,0
seg_0012b5,0
seg_00184e,0
seg_003339,0
seg_0042cc,0


In [86]:
X_test = pd.DataFrame(columns=X_train.columns, dtype=np.float64, index=submission.index)

In [91]:
for seg_id in X_test.index:
    seg = pd.read_csv("D:/Project Dataset/LANL-Earthquake-Prediction/test/" + seg_id + ".csv")
    
    x = seg['acoustic_data'].values
    
    X_test.loc[seg_id, 'ave'] = x.mean()
    X_test.loc[seg_id, 'std'] = x.std()
    X_test.loc[seg_id, 'max'] = x.max()
    X_test.loc[seg_id, 'min'] = x.min()
    X_test.loc[seg_id, 'sum'] = x.sum()
    X_test.loc[seg_id, 'skew'] = skew(x)
    X_test.loc[seg_id, 'kurt'] = kurtosis(x)
    X_test.loc[seg_id,'var'] = x.var()
    X_test.loc[seg_id,'quantile_1'] = seg['acoustic_data'].quantile(0.25)
    X_test.loc[seg_id,'quantile_2'] = seg['acoustic_data'].quantile(0.50)
    X_test.loc[seg_id,'quantile_3'] = seg['acoustic_data'].quantile(0.75)
    X_test.loc[seg_id,'median'] = seg['acoustic_data'].median()
    
    rstd = pd.DataFrame(seg['acoustic_data'].rolling(window=3).std())
    rstd = rstd.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rstd'] = rstd['acoustic_data'].mean()
    
    rmax = pd.DataFrame(seg['acoustic_data'].rolling(window=3).max())
    rmax = rmax.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rmax'] = rmax['acoustic_data'].mean()
        
    rmin = pd.DataFrame(seg['acoustic_data'].rolling(window=3).min())
    rmin = rmin.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rmin'] = rmin['acoustic_data'].mean()
        
    rsum = pd.DataFrame(seg['acoustic_data'].rolling(window=3).sum())
    rsum = rsum.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rsum'] = rsum['acoustic_data'].mean()
        
    rmedian = pd.DataFrame(seg['acoustic_data'].rolling(window=3).median())
    rmedian = rmedian.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rmedian'] = rmedian['acoustic_data'].mean()
        
    rvar = pd.DataFrame(seg['acoustic_data'].rolling(window=3).var())
    rvar = rvar.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rvar'] = rvar['acoustic_data'].mean()
    
    rcov = pd.DataFrame(seg['acoustic_data'].rolling(window=3).cov())
    rcov = rcov.fillna(method='backfill',axis=0)
    X_test.loc[seg_id, 'rcov'] = rcov['acoustic_data'].mean()
        
    stepsize = np.diff(seg['acoustic_data'])
    seg = seg.drop(seg.index[len(seg)-1])
    X_test.loc[seg_id,'change'] = stepsize.mean()

In [92]:
print(X_test)
X_test.isna().sum().any()

                 ave        std    max    min       sum      skew        kurt  \
seg_id                                                                          
seg_00030f  4.491780   4.893673  115.0  -75.0  673767.0  0.327904   28.836567   
seg_0012b5  4.171153   5.922820  152.0 -140.0  625673.0  0.295705   56.217041   
seg_00184e  4.610260   6.946967  248.0 -193.0  691539.0  0.428684  162.112840   
seg_003339  4.531473   4.114133   85.0  -93.0  679721.0  0.061889   41.240413   
seg_0042cc  4.128340   5.797144  177.0 -147.0  619251.0  0.073898   79.537016   
...              ...        ...    ...    ...       ...       ...         ...   
seg_ff4236  4.095773   5.082426   71.0  -50.0  614366.0 -0.001993   12.280456   
seg_ff7478  4.138493   6.031576  164.0 -208.0  620774.0 -0.843307  132.557911   
seg_ff79d9  3.748587   9.467500  310.0 -303.0  562288.0 -0.472015  202.478046   
seg_ffbd6a  4.341687  13.199310  362.0 -319.0  651253.0  0.057967  109.228711   
seg_ffe7cc  4.199720   3.573

False

In [93]:
X_test.to_csv(r"F:\project\X_test_final.csv",index=False)

In [94]:
kfold=KFold(n_splits=5,random_state=2020)

depth_range = [12,16,15,17,18]
minsplit_range = [5,10,20,25,30,3]
minleaf_range = [5,10,15,3]

parameters = dict(max_depth=depth_range,
                  min_samples_split=minsplit_range, 
                  min_samples_leaf=minleaf_range)
rfr = RandomForestRegressor(oob_score=True,random_state=2020)

cv = GridSearchCV(rfr,param_grid=parameters,cv=kfold,scoring='neg_mean_absolute_error',verbose=3)
y_train = y_train['time_to_failure']



In [95]:
cv.fit(X_train,y_train)

Fitting 5 folds for each of 120 candidates, totalling 600 fits
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=5 ...........


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=5, score=-2.192, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=5 ...........


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.8s remaining:    0.0s


[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=5, score=-2.442, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=5 ...........


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.7s remaining:    0.0s


[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=5, score=-2.716, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=5, score=-1.540, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=5, score=-2.612, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=10 ..........
[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=10, score=-2.192, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=10 ..........
[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=10, score=-2.442, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=10 ..........
[CV]  max_depth=12, min_samples_leaf=5, min_samples_split=10, score=-2.716, total=   1.9s
[CV] max_depth=12, min_samples_leaf=5, min_samples_split=10 ..........
[CV]  max_depth=12, min_samples_leaf=

[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=30, score=-1.517, total=   1.6s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=30 .........
[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=30, score=-2.628, total=   1.6s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=3, score=-2.191, total=   1.7s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=3, score=-2.428, total=   1.7s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=3, score=-2.707, total=   1.7s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=12, min_samples_leaf=10, min_samples_split=3, score=-1.519, total=   1.7s
[CV] max_depth=12, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=12, min_samples_

[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=20, score=-2.621, total=   1.9s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=25, score=-2.190, total=   1.8s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=25, score=-2.434, total=   1.8s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=25, score=-2.695, total=   1.9s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=25, score=-1.526, total=   1.8s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=12, min_samples_leaf=3, min_samples_split=25, score=-2.619, total=   1.9s
[CV] max_depth=12, min_samples_leaf=3, min_samples_split=30 ..........
[CV]  max_depth=12, min_samples_le

[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=10, score=-2.190, total=   1.8s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=10, score=-2.428, total=   1.8s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=10, score=-2.710, total=   1.8s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=10, score=-1.522, total=   1.7s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=10, score=-2.632, total=   1.7s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=20 .........
[CV]  max_depth=16, min_samples_leaf=10, min_samples_split=20, score=-2.190, total=   1.8s
[CV] max_depth=16, min_samples_leaf=10, min_samples_split=20 .........
[CV]  max_depth=16, min_samp

[CV]  max_depth=16, min_samples_leaf=15, min_samples_split=3, score=-2.415, total=   1.6s
[CV] max_depth=16, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=16, min_samples_leaf=15, min_samples_split=3, score=-2.702, total=   1.6s
[CV] max_depth=16, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=16, min_samples_leaf=15, min_samples_split=3, score=-1.517, total=   1.5s
[CV] max_depth=16, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=16, min_samples_leaf=15, min_samples_split=3, score=-2.604, total=   1.5s
[CV] max_depth=16, min_samples_leaf=3, min_samples_split=5 ...........
[CV]  max_depth=16, min_samples_leaf=3, min_samples_split=5, score=-2.209, total=   2.2s
[CV] max_depth=16, min_samples_leaf=3, min_samples_split=5 ...........
[CV]  max_depth=16, min_samples_leaf=3, min_samples_split=5, score=-2.470, total=   2.1s
[CV] max_depth=16, min_samples_leaf=3, min_samples_split=5 ...........
[CV]  max_depth=16, min_samples_leaf

[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=25, score=-2.707, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=25 ..........
[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=25, score=-1.528, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=25 ..........
[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=25, score=-2.622, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=30 ..........
[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=30, score=-2.186, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=30 ..........
[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=30, score=-2.429, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=30 ..........
[CV]  max_depth=15, min_samples_leaf=5, min_samples_split=30, score=-2.704, total=   1.8s
[CV] max_depth=15, min_samples_leaf=5, min_samples_split=30 ..........
[CV]  max_depth=15, min_samples_le

[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=10, score=-1.517, total=   1.5s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=10 .........
[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=10, score=-2.604, total=   1.5s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=20 .........
[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=20, score=-2.194, total=   1.6s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=20 .........
[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=20, score=-2.415, total=   1.6s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=20 .........
[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=20, score=-2.702, total=   1.5s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=20 .........
[CV]  max_depth=15, min_samples_leaf=15, min_samples_split=20, score=-1.517, total=   1.5s
[CV] max_depth=15, min_samples_leaf=15, min_samples_split=20 .........
[CV]  max_depth=15, min_samp

[CV]  max_depth=15, min_samples_leaf=3, min_samples_split=3, score=-2.638, total=   2.2s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=17, min_samples_leaf=5, min_samples_split=5, score=-2.192, total=   2.0s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=17, min_samples_leaf=5, min_samples_split=5, score=-2.447, total=   2.0s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=17, min_samples_leaf=5, min_samples_split=5, score=-2.729, total=   2.0s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=17, min_samples_leaf=5, min_samples_split=5, score=-1.552, total=   2.0s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=5 ...........
[CV]  max_depth=17, min_samples_leaf=5, min_samples_split=5, score=-2.619, total=   2.0s
[CV] max_depth=17, min_samples_leaf=5, min_samples_split=10 ..........
[CV]  max_depth=17, min_samples_leaf=5, 

[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=30, score=-2.188, total=   1.7s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=30 .........
[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=30, score=-2.424, total=   1.7s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=30 .........
[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=30, score=-2.705, total=   1.6s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=30 .........
[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=30, score=-1.519, total=   1.7s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=30 .........
[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=30, score=-2.629, total=   1.6s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=17, min_samples_leaf=10, min_samples_split=3, score=-2.190, total=   1.7s
[CV] max_depth=17, min_samples_leaf=10, min_samples_split=3 ..........
[CV]  max_depth=17, min_sampl

[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=20, score=-2.444, total=   2.0s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=20 ..........
[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=20, score=-2.705, total=   1.9s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=20 ..........
[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=20, score=-1.532, total=   2.0s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=20 ..........
[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=20, score=-2.628, total=   2.0s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=25, score=-2.192, total=   1.9s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=17, min_samples_leaf=3, min_samples_split=25, score=-2.442, total=   1.9s
[CV] max_depth=17, min_samples_leaf=3, min_samples_split=25 ..........
[CV]  max_depth=17, min_samples_le

[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=5, score=-2.710, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=5 ..........
[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=5, score=-1.522, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=5 ..........
[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=5, score=-2.632, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=10, score=-2.189, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=10, score=-2.428, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=18, min_samples_leaf=10, min_samples_split=10, score=-2.710, total=   1.7s
[CV] max_depth=18, min_samples_leaf=10, min_samples_split=10 .........
[CV]  max_depth=18, min_samples

[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=30, score=-1.517, total=   1.5s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=30 .........
[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=30, score=-2.604, total=   1.5s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=3, score=-2.194, total=   1.5s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=3, score=-2.415, total=   1.6s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=3, score=-2.702, total=   1.6s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=18, min_samples_leaf=15, min_samples_split=3, score=-1.517, total=   1.6s
[CV] max_depth=18, min_samples_leaf=15, min_samples_split=3 ..........
[CV]  max_depth=18, min_samples_

[Parallel(n_jobs=1)]: Done 600 out of 600 | elapsed: 17.9min finished


GridSearchCV(cv=KFold(n_splits=5, random_state=2020, shuffle=False),
             error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=True, random_state=2020,
                                             ver

In [96]:
print(cv.best_score_)
print(cv.best_params_)
best=cv.best_estimator_

-2.286063207019937
{'max_depth': 12, 'min_samples_leaf': 15, 'min_samples_split': 5}


In [97]:
y_pred=best.predict(X_test)
print(y_pred)
type(y_pred)


[4.50243203 4.22664815 4.91310234 ... 4.71832432 2.22049168 9.54732119]


numpy.ndarray

In [98]:
y_pred = pd.DataFrame(y_pred)
y_pred.shape
y_pred.head()

Unnamed: 0,0
0,4.502432
1,4.226648
2,4.913102
3,8.875269
4,6.440912


In [99]:
y_pred.to_csv(r"F:\project\sample2.csv",index=False)