In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10}
Overall training accuracy: 0.9775
Overall test accuracy: 0.9228571428571428


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2}
Overall training accuracy: 0.9925925925925926
Overall test accuracy: 0.9466666666666667


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 5}
Overall training accuracy: 0.94
Overall test accuracy: 0.9228571428571428


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Overall training accuracy: 1.0
Overall test accuracy: 0.99


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Overall training accuracy: 1.0
Overall test accuracy: 0.9757142857142858


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

decision_tree_clf = DecisionTreeClassifier()

grid_search = GridSearchCV(decision_tree_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_decision_tree_clf = grid_search.best_estimator_

train_predictions = best_decision_tree_clf.predict(train_wav_energy_scaled)
test_predictions = best_decision_tree_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10}
Overall training accuracy: 0.9953703703703703
Overall test accuracy: 0.9833333333333333
