<a href="https://colab.research.google.com/github/AndreJesusBrito/Turbine_autoencoder_feature_reduction/blob/main/Turbine_autoencoder_feature_reduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Comparing Random Forest and Auto-encoder for Wind Turbine Fault Location Classification Feature Reduction

imports


In [None]:
import scipy.io
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression

Load Dataset

In [None]:
# load dataset mat file from google drive (must be mounted)
mat = scipy.io.loadmat('drive/MyDrive/TurbineAutoencoder/(1)_features_TS_50000_Ch_003_TR_90_TST_10_dataset_Quantile_RF_thold_0.005_4_classes.mat')

X_training = mat['features_training']
y_training = mat['labels_training']

X_validation = mat['features_test']
y_validation = mat['labels_test']

The number of features to reduce

In [None]:
num_of_features = 10

## Auto-encoder feature reduction
Auto-encoder Definition

In [None]:
class Autoencoder(Model):
  def __init__(self, num_of_features):
    super(Autoencoder, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Flatten(),
      layers.Input(shape=(33,1)),
      layers.Dense(24, activation='sigmoid'),
      layers.Dense(16, activation='sigmoid'),
      layers.Dense(num_of_features, activation='sigmoid'),
    ])
    self.decoder = tf.keras.Sequential([
      layers.Dense(num_of_features, activation='relu'),
      layers.Dense(16, activation='relu'),
      layers.Dense(24, activation='relu'),
      layers.Dense(33, activation='relu'),
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded


Auto-encoder training

In [None]:
autoencoder = Autoencoder(num_of_features)

autoencoder.compile(
  optimizer='adam',
  loss=losses.MeanSquaredError(),
  metrics=['accuracy'],)
autoencoder.fit(X_training, X_training,
                epochs=10,
                batch_size=16,
                shuffle=True,
                validation_data=(X_validation, X_validation))


get auto-encoder reduced features

In [None]:
X_training_ae_reduced = autoencoder.encoder(X_training).numpy()
X_validation_ae_reduced = autoencoder.encoder(X_validation).numpy()

print(X_training_ae_reduced.shape)
print(X_validation_ae_reduced.shape)

##Random Forest feature reduction

In [None]:
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_training, y_training)

Extract the top features data from the the Random Forest classifier.

In [None]:
def sel_features(data, clf, max_features):
  reduced_features = np.ndarray( (len(data), max_features) )

  top_features_indices = clf.feature_importances_.argsort()[:max_features]

  for i in range(len(data)):
    top_data = data[i][top_features_indices]
    reduced_features[i] = top_data

  return reduced_features

In [None]:
X_training_rf_reduced = sel_features(X_training, random_forest, num_of_features)
X_validation_rf_reduced = sel_features(X_validation, random_forest, num_of_features)

print(X_training_rf_reduced.shape)
print(X_validation_rf_reduced.shape)

##Tests

###CART without feature reduction

In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_training, y_training)

score = clf.score(X_validation, y_validation)
print(f'score was {score} with all features')

###CART with Random Forest feature reduction

In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_training_rf_reduced, y_training)

score = clf.score(X_validation_rf_reduced, y_validation)
print(f'CART score was {score} with random forest feature reduction')

###CART with auto-encoder feature reduction


In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_training_ae_reduced, y_training)

score = clf.score(X_validation_ae_reduced, y_validation)
print(f'CART score was {score} with autoencoder feature reduction')

###Random Forest without feature reduction

In [None]:
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_training, y_training)

score = clf.score(X_validation, y_validation)
print(f'Random Forest score was {score} with all features')

###Random Forest with Random Forest feature reduction 

In [None]:
clf = RandomForestClassifier(n_estimators=100)
clf = clf.fit(X_training_rf_reduced, y_training)

score = clf.score(X_validation_rf_reduced, y_validation)
print(f'Random Forest score was {score} with Random Forest feature reduction')

###Random Forest with Auto-encoder feature reduction

In [None]:
clf = RandomForestClassifier(n_estimators=100)
clf = clf.fit(X_training_ae_reduced, y_training)

score = clf.score(X_validation_ae_reduced, y_validation)
print(f'Random Forest score was {score} with Autoencoder feature reduction')

###Logistic Regression without feature reduction

In [None]:
clf = LogisticRegression(random_state=0, solver='sag', max_iter=100000)
clf = clf.fit(X_training, y_training)

score = clf.score(X_validation, y_validation)
print(f'SVM score was {score} with all features')

###Logistic Regression with Random Forest feature reduction

In [None]:
clf = LogisticRegression(random_state=0, solver='sag', max_iter=100000)
clf = clf.fit(X_training_rf_reduced, y_training)

score = clf.score(X_validation_rf_reduced, y_validation)
print(f'LR score was {score} with Random Forest feature reduction')

###Logistic Regression with Auto-encoder feature reduction

In [None]:
clf = LogisticRegression(random_state=0, solver='sag', max_iter=100000)
clf = clf.fit(X_training_ae_reduced, y_training)

score = clf.score(X_validation_ae_reduced, y_validation)
print(f'LR score was {score} with Random Forest feature reduction')