In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import csv
from tsfresh.feature_extraction import extract_features, MinimalFCParameters
%matplotlib inline

WINDOW = 5000
FILE = r'train.csv'


In [18]:
i = 0
target = []
mydata = pd.DataFrame()
for chunk in pd.read_csv(FILE, chunksize=WINDOW,dtype={'acoustic_data': np.int8, 'time_to_failure': np.float32 },nrows=100000):
    i += 1
    chunk['id'] = i
    extracted_features = extract_features(chunk.drop(columns=['time_to_failure']), column_id='id', n_jobs=4,default_fc_parameters=MinimalFCParameters())
    mydata = pd.concat([mydata, extracted_features])
    target.append(chunk.time_to_failure.iloc[-1])

mydata['time_to_failure'] = target
mydata.to_csv(r'ft_1m5.csv', index=False)

Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 481.88it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 646.87it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 714.78it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 469.27it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 751.40it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 732.63it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 716.85it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 498.49it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 838.19it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 741.83it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 654.03it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 542.04it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 526.33it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 487.14it/s]
Feature Extraction: 100%|██████████| 1/1 [00:00<00:00, 729.44i

In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor

In [22]:
dataframe = pd.read_csv(r'ft_1m5.csv')
dataframe = dataframe.dropna(axis=1)
dataframe.shape

(20, 9)

In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(dataframe.drop(columns=['time_to_failure']), dataframe.time_to_failure, test_size=0.3)

In [24]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
def create_model(optimizer='adam'):
    model = Sequential()
    model.add(Dense(64,
                    input_dim=X_train.shape[1],
                    #kernel_initializer='normal',
                    activation='relu'                   
                   ))
    model.add(Dense(1,
                   #kernel_initializer='normal',
                   activation='linear'
                   ))
    model.compile(loss='mse', optimizer=optimizer)
    return model

In [26]:
model = KerasRegressor(build_fn=create_model, epochs=50, batch_size=64, verbose=1, shuffle=True)

In [27]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, verbose=1)
grid_result = grid.fit(X_train_scaled, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    5.0s finished
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Best: -0.038088 using {'optimizer': 'SGD'}
-0.038088 (0.020851) with: {'optimizer': 'SGD'}
-0.226696 (0.173509) with: {'optimizer': 'RMSprop'}
-0.081079 (0.043207) with: {'optimizer': 'Adagrad'}
-0.124573 (0.030331) with: {'optimizer': 'Adadelta'}
-0.320824 (0.225687) with: {'optimizer': 'Adam'}
-0.094786 (0.080188) with: {'optimizer': 'Adamax'}
-0.088741 (0.044011) with: {'optimizer': 'Nadam'}


In [36]:
from sklearn import preprocessing
print(y_train)
lab_enc = preprocessing.LabelEncoder()
y_train_encoded = lab_enc.fit_transform(y_train)
print(y_train_encoded)

11    1.454197
5     1.461699
19    1.443598
6     1.460598
15    1.448898
9     1.456399
2     1.465897
1     1.466998
16    1.447897
12    1.453196
17    1.446796
10    1.455298
0     1.468099
13    1.451100
Name: time_to_failure, dtype: float64
[ 6 10  0  9  3  8 11 12  2  5  1  7 13  4]


In [31]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(X_train,y_train_encoded)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [32]:
predictions = LR.predict(X_test)
print(predictions)

[ 9  3 11 11  9  0]
