In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.890625
Overall test accuracy: 0.89


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.8564814814814815
Overall test accuracy: 0.8433333333333334


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 10}
Overall training accuracy: 0.77
Overall test accuracy: 0.7771428571428571


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.7083333333333334
Overall test accuracy: 0.7


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 10}
Overall training accuracy: 0.858125
Overall test accuracy: 0.8757142857142857


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = LogisticRegression(max_iter=200, n_jobs=-1)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 10}
Overall training accuracy: 0.8416666666666667
Overall test accuracy: 0.8366666666666667


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 10}
Overall training accuracy: 0.89375
Overall test accuracy: 0.8842857142857142


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_time_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.7, 'n_estimators': 15}
Overall training accuracy: 0.9027777777777778
Overall test accuracy: 0.91


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.835625
Overall test accuracy: 0.8442857142857143


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_energy8_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.9342592592592592
Overall test accuracy: 0.92


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_48k_2048_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=700, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.9, 'n_estimators': 15}
Overall training accuracy: 0.888125
Overall test accuracy: 0.8971428571428571


In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

data_wav_energy = pd.read_csv("data_feature_wav_ent8_shan_12k_1024_load_1.csv")
data_wav_energy['fault'] = pd.Categorical(data_wav_energy['fault'])

# Remove collinear features
corr_matrix = data_wav_energy.iloc[:, :-1].corr().abs()
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.9)]
data_wav_energy.drop(to_drop, axis=1, inplace=True)

train_wav_energy, test_wav_energy = train_test_split(data_wav_energy, test_size=300, stratify=data_wav_energy['fault'], random_state=324)

scaler = StandardScaler()
train_wav_energy_scaled = scaler.fit_transform(train_wav_energy.iloc[:, :-1])
test_wav_energy_scaled = (test_wav_energy.iloc[:, :-1].values - scaler.mean_) / np.sqrt(scaler.var_)

base_classifier = SVC(probability=True)

param_grid = {
    'n_estimators': [5, 10, 15],
    'max_samples': [0.5, 0.7, 0.9]
}

bagging_clf = BaggingClassifier(base_estimator=base_classifier)
grid_search = GridSearchCV(bagging_clf, param_grid, cv=5)
grid_search.fit(train_wav_energy_scaled, train_wav_energy['fault'])

best_params = grid_search.best_params_
print("Best parameters:", best_params)

best_bagging_clf = grid_search.best_estimator_

train_predictions = best_bagging_clf.predict(train_wav_energy_scaled)
test_predictions = best_bagging_clf.predict(test_wav_energy_scaled)

train_confu_matrix = confusion_matrix(train_wav_energy['fault'], train_predictions)
test_confu_matrix = confusion_matrix(test_wav_energy['fault'], test_predictions)

train_accuracy = accuracy_score(train_wav_energy['fault'], train_predictions)
print("Overall training accuracy:", train_accuracy)

test_accuracy = accuracy_score(test_wav_energy['fault'], test_predictions)
print("Overall test accuracy:", test_accuracy)


Best parameters: {'max_samples': 0.7, 'n_estimators': 15}
Overall training accuracy: 0.9518518518518518
Overall test accuracy: 0.9633333333333334
