In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])


# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 90}
Overall training accuracy: 0.96
Overall test accuracy: 0.9585714285714285


In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])


# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 90}
Overall training accuracy: 0.9518518518518518
Overall test accuracy: 0.96


In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])


# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 90}
Overall training accuracy: 0.9
Overall test accuracy: 0.9042857142857142


In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)


train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 80}
Overall training accuracy: 0.9305555555555556
Overall test accuracy: 0.9333333333333333


In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 90}
Overall training accuracy: 0.91375
Overall test accuracy: 0.9228571428571428


In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

parameters = {'C': [50,60,70,80,90]}
logistic_clf = LogisticRegression(max_iter=100, n_jobs=-1, random_state=0)
grid_search = GridSearchCV(logistic_clf, parameters, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_model = grid_search.best_estimator_

train_predictions = best_model.predict(train_wav_energy_scaled)
test_predictions = best_model.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'C': 90}
Overall training accuracy: 0.9527777777777777
Overall test accuracy: 0.96
