In [6]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report, confusion_matrix
from tabulate import tabulate
import seaborn as sns
import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 1 sekundowe pliki 

In [7]:
param_grid_bagging = {
    "n_estimators": range(1, 101),
    "max_samples": [0.5, 0.7, 1.0]
}

## 13 cech

In [8]:
train_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/train_13_mfcc_features_with_labels_1sec.csv")  
test_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/test_13_mfcc_features_with_labels_1sec.csv")    

X_train = train_data.drop(['Encoded_label'], axis=1) 
y_train = train_data['Encoded_label']  

X_test = test_data.drop(['Encoded_label'], axis=1)  
y_test = test_data['Encoded_label']

In [9]:
results_bagging = []

for n_estimators in param_grid_bagging["n_estimators"]:
    for max_samples in param_grid_bagging["max_samples"]:
        try:
                bagging_model = BaggingClassifier(
                    estimator=DecisionTreeClassifier(),
                    n_estimators=n_estimators,
                    max_samples=max_samples,
                    random_state=42
                )
                start_time = time.time()
                bagging_model.fit(X_train, y_train)
                training_time = time.time() - start_time
                
                y_pred_bagging = bagging_model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred_bagging)
                f1 = f1_score(y_test, y_pred_bagging, average='weighted')
                recall = recall_score(y_test, y_pred_bagging, average='weighted')
                precision = precision_score(y_test, y_pred_bagging, average='weighted', zero_division=0)
                
                results_bagging.append({
                    "n_estimators": n_estimators,
                    "max_samples": max_samples,
                    "Accuracy": accuracy,
                    "F1 Score": f1,
                    "Recall": recall,
                    "Precision": precision,
                    "Training Time (s)": training_time
                })
        except Exception as e:
                print(f"Error with n_estimators={n_estimators}, max_samples={max_samples}, max_features={max_features}: {e}")

metrics_bagging = ["Accuracy", "F1 Score", "Recall", "Precision"]
best_bagging_models = {metric: max(results_bagging, key=lambda x: x[metric]) for metric in metrics_bagging}
best_bagging_models["Training Time"] = min(results_bagging, key=lambda x: x["Training Time (s)"])

all_models_bagging_table = [
    [res["n_estimators"], res["max_samples"], res["max_features"], res["Accuracy"], res["F1 Score"], 
     res["Recall"], res["Precision"], res["Training Time (s)"]]
    for res in results_bagging
]

best_models_bagging_table = [
    [metric, 
     best_bagging_models[metric]["n_estimators"], 
     best_bagging_models[metric]["max_samples"],  
     best_bagging_models[metric]["Accuracy"], 
     best_bagging_models[metric]["F1 Score"], 
     best_bagging_models[metric]["Recall"], 
     best_bagging_models[metric]["Precision"], 
     best_bagging_models[metric]["Training Time (s)"]]
    for metric in metrics_bagging + ["Training Time"]
]


print("Best Models (Bagging):")
print(tabulate(best_models_bagging_table, headers=[
    "Metric", "n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

KeyError: 'max_features'

In [6]:
print(tabulate(best_models_bagging_table, headers=[
"n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

╒══════════╤════════════════╤═════════════════╤════════════╤════════════╤══════════╤═════════════╤═════════════════════╕
│   Metric │   n_estimators │   learning_rate │   Accuracy │   F1 Score │   Recall │   Precision │   Training Time (s) │
╞══════════╪════════════════╪═════════════════╪════════════╪════════════╪══════════╪═════════════╪═════════════════════╡
│        1 │            0.5 │             0.5 │   0.813855 │   0.813178 │ 0.813855 │    0.812744 │            0.105028 │
├──────────┼────────────────┼─────────────────┼────────────┼────────────┼──────────┼─────────────┼─────────────────────┤
│        1 │            0.5 │             1   │   0.877108 │   0.877037 │ 0.877108 │    0.877023 │            0.239047 │
├──────────┼────────────────┼─────────────────┼────────────┼────────────┼──────────┼─────────────┼─────────────────────┤
│        1 │            1   │             0.5 │   0.82244  │   0.821757 │ 0.82244  │    0.821344 │            0.178027 │
├──────────┼────────────────┼───

## 30 cech

In [4]:
train_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/train_30_mfcc_features_with_labels_1sec.csv")  
test_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/test_30_mfcc_features_with_labels_1sec.csv")    

X_train = train_data.drop(['Encoded_label'], axis=1) 
y_train = train_data['Encoded_label']  

X_test = test_data.drop(['Encoded_label'], axis=1)  
y_test = test_data['Encoded_label']

In [None]:
results_bagging = []

for n_estimators in param_grid_bagging["n_estimators"]:
    for max_samples in param_grid_bagging["max_samples"]:
        try:
                bagging_model = BaggingClassifier(
                    estimator=DecisionTreeClassifier(),
                    n_estimators=n_estimators,
                    max_samples=max_samples,
                    random_state=42
                )
                start_time = time.time()
                bagging_model.fit(X_train, y_train)
                training_time = time.time() - start_time
                
                y_pred_bagging = bagging_model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred_bagging)
                f1 = f1_score(y_test, y_pred_bagging, average='weighted')
                recall = recall_score(y_test, y_pred_bagging, average='weighted')
                precision = precision_score(y_test, y_pred_bagging, average='weighted', zero_division=0)
                
                results_bagging.append({
                    "n_estimators": n_estimators,
                    "max_samples": max_samples,
                    "Accuracy": accuracy,
                    "F1 Score": f1,
                    "Recall": recall,
                    "Precision": precision,
                    "Training Time (s)": training_time
                })
        except Exception as e:
                print(f"Error with n_estimators={n_estimators}, max_samples={max_samples}, max_features={max_features}: {e}")

metrics_bagging = ["Accuracy", "F1 Score", "Recall", "Precision"]
best_bagging_models = {metric: max(results_bagging, key=lambda x: x[metric]) for metric in metrics_bagging}
best_bagging_models["Training Time"] = min(results_bagging, key=lambda x: x["Training Time (s)"])

all_models_bagging_table = [
    [res["n_estimators"], res["max_samples"], res["max_features"], res["Accuracy"], res["F1 Score"], 
     res["Recall"], res["Precision"], res["Training Time (s)"]]
    for res in results_bagging
]

best_models_bagging_table = [
    [metric, 
     best_bagging_models[metric]["n_estimators"], 
     best_bagging_models[metric]["max_samples"],  
     best_bagging_models[metric]["Accuracy"], 
     best_bagging_models[metric]["F1 Score"], 
     best_bagging_models[metric]["Recall"], 
     best_bagging_models[metric]["Precision"], 
     best_bagging_models[metric]["Training Time (s)"]]
    for metric in metrics_bagging + ["Training Time"]
]


print("Best Models (Bagging):")
print(tabulate(best_models_bagging_table, headers=[
    "Metric", "n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

In [None]:
print(tabulate(best_models_bagging_table, headers=[
"n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

## 50 cech

In [10]:
train_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/train_50_mfcc_features_with_labels_1sec.csv")  
test_data = pd.read_csv("Sounds_1_seconds/mfcc_1_sec/test_50_mfcc_features_with_labels_1sec.csv")    

X_train = train_data.drop(['Encoded_label'], axis=1) 
y_train = train_data['Encoded_label']  

X_test = test_data.drop(['Encoded_label'], axis=1)  
y_test = test_data['Encoded_label']

In [None]:
results_bagging = []

for n_estimators in param_grid_bagging["n_estimators"]:
    for max_samples in param_grid_bagging["max_samples"]:
        try:
                bagging_model = BaggingClassifier(
                    estimator=DecisionTreeClassifier(),
                    n_estimators=n_estimators,
                    max_samples=max_samples,
                    random_state=42
                )
                start_time = time.time()
                bagging_model.fit(X_train, y_train)
                training_time = time.time() - start_time
                
                y_pred_bagging = bagging_model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred_bagging)
                f1 = f1_score(y_test, y_pred_bagging, average='weighted')
                recall = recall_score(y_test, y_pred_bagging, average='weighted')
                precision = precision_score(y_test, y_pred_bagging, average='weighted', zero_division=0)
                
                results_bagging.append({
                    "n_estimators": n_estimators,
                    "max_samples": max_samples,
                    "Accuracy": accuracy,
                    "F1 Score": f1,
                    "Recall": recall,
                    "Precision": precision,
                    "Training Time (s)": training_time
                })
        except Exception as e:
                print(f"Error with n_estimators={n_estimators}, max_samples={max_samples}, max_features={max_features}: {e}")

metrics_bagging = ["Accuracy", "F1 Score", "Recall", "Precision"]
best_bagging_models = {metric: max(results_bagging, key=lambda x: x[metric]) for metric in metrics_bagging}
best_bagging_models["Training Time"] = min(results_bagging, key=lambda x: x["Training Time (s)"])

all_models_bagging_table = [
    [res["n_estimators"], res["max_samples"], res["max_features"], res["Accuracy"], res["F1 Score"], 
     res["Recall"], res["Precision"], res["Training Time (s)"]]
    for res in results_bagging
]

best_models_bagging_table = [
    [metric, 
     best_bagging_models[metric]["n_estimators"], 
     best_bagging_models[metric]["max_samples"],  
     best_bagging_models[metric]["Accuracy"], 
     best_bagging_models[metric]["F1 Score"], 
     best_bagging_models[metric]["Recall"], 
     best_bagging_models[metric]["Precision"], 
     best_bagging_models[metric]["Training Time (s)"]]
    for metric in metrics_bagging + ["Training Time"]
]


print("Best Models (Bagging):")
print(tabulate(best_models_bagging_table, headers=[
    "Metric", "n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

In [None]:
print(tabulate(best_models_bagging_table, headers=[
"n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

# 3 sekundowe pliki

In [13]:
train_data = pd.read_csv("Sounds_3_seconds/mfcc_3_sec/train_13_mfcc_features_with_labels_3sec.csv")  
test_data = pd.read_csv("Sounds_3_seconds/mfcc_3_sec/test_13_mfcc_features_with_labels_3sec.csv")    

X_train = train_data.drop(['Encoded_label'], axis=1) 
y_train = train_data['Encoded_label']  

X_test = test_data.drop(['Encoded_label'], axis=1)  
y_test = test_data['Encoded_label']

In [None]:
results_bagging = []

for n_estimators in param_grid_bagging["n_estimators"]:
    for max_samples in param_grid_bagging["max_samples"]:
        try:
                bagging_model = BaggingClassifier(
                    estimator=DecisionTreeClassifier(),
                    n_estimators=n_estimators,
                    max_samples=max_samples,
                    random_state=42
                )
                start_time = time.time()
                bagging_model.fit(X_train, y_train)
                training_time = time.time() - start_time
                
                y_pred_bagging = bagging_model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred_bagging)
                f1 = f1_score(y_test, y_pred_bagging, average='weighted')
                recall = recall_score(y_test, y_pred_bagging, average='weighted')
                precision = precision_score(y_test, y_pred_bagging, average='weighted', zero_division=0)
                
                results_bagging.append({
                    "n_estimators": n_estimators,
                    "max_samples": max_samples,
                    "Accuracy": accuracy,
                    "F1 Score": f1,
                    "Recall": recall,
                    "Precision": precision,
                    "Training Time (s)": training_time
                })
        except Exception as e:
                print(f"Error with n_estimators={n_estimators}, max_samples={max_samples}, max_features={max_features}: {e}")

metrics_bagging = ["Accuracy", "F1 Score", "Recall", "Precision"]
best_bagging_models = {metric: max(results_bagging, key=lambda x: x[metric]) for metric in metrics_bagging}
best_bagging_models["Training Time"] = min(results_bagging, key=lambda x: x["Training Time (s)"])

all_models_bagging_table = [
    [res["n_estimators"], res["max_samples"], res["max_features"], res["Accuracy"], res["F1 Score"], 
     res["Recall"], res["Precision"], res["Training Time (s)"]]
    for res in results_bagging
]

best_models_bagging_table = [
    [metric, 
     best_bagging_models[metric]["n_estimators"], 
     best_bagging_models[metric]["max_samples"],  
     best_bagging_models[metric]["Accuracy"], 
     best_bagging_models[metric]["F1 Score"], 
     best_bagging_models[metric]["Recall"], 
     best_bagging_models[metric]["Precision"], 
     best_bagging_models[metric]["Training Time (s)"]]
    for metric in metrics_bagging + ["Training Time"]
]


print("Best Models (Bagging):")
print(tabulate(best_models_bagging_table, headers=[
    "Metric", "n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

In [None]:
print(tabulate(best_models_bagging_table, headers=[
"n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

# 5 sekundowe pliki

In [25]:
train_data = pd.read_csv("Sounds_5_seconds/mfcc_5_sec/train_13_mfcc_features_with_labels_5sec.csv")  
test_data = pd.read_csv("Sounds_5_seconds/mfcc_5_sec/test_13_mfcc_features_with_labels_5sec.csv")    

X_train = train_data.drop(['Encoded_label'], axis=1) 
y_train = train_data['Encoded_label']  

X_test = test_data.drop(['Encoded_label'], axis=1)  
y_test = test_data['Encoded_label']

In [None]:
results_bagging = []

for n_estimators in param_grid_bagging["n_estimators"]:
    for max_samples in param_grid_bagging["max_samples"]:
        try:
                bagging_model = BaggingClassifier(
                    estimator=DecisionTreeClassifier(),
                    n_estimators=n_estimators,
                    max_samples=max_samples,
                    random_state=42
                )
                start_time = time.time()
                bagging_model.fit(X_train, y_train)
                training_time = time.time() - start_time
                
                y_pred_bagging = bagging_model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred_bagging)
                f1 = f1_score(y_test, y_pred_bagging, average='weighted')
                recall = recall_score(y_test, y_pred_bagging, average='weighted')
                precision = precision_score(y_test, y_pred_bagging, average='weighted', zero_division=0)
                
                results_bagging.append({
                    "n_estimators": n_estimators,
                    "max_samples": max_samples,
                    "Accuracy": accuracy,
                    "F1 Score": f1,
                    "Recall": recall,
                    "Precision": precision,
                    "Training Time (s)": training_time
                })
        except Exception as e:
                print(f"Error with n_estimators={n_estimators}, max_samples={max_samples}, max_features={max_features}: {e}")

metrics_bagging = ["Accuracy", "F1 Score", "Recall", "Precision"]
best_bagging_models = {metric: max(results_bagging, key=lambda x: x[metric]) for metric in metrics_bagging}
best_bagging_models["Training Time"] = min(results_bagging, key=lambda x: x["Training Time (s)"])

all_models_bagging_table = [
    [res["n_estimators"], res["max_samples"], res["max_features"], res["Accuracy"], res["F1 Score"], 
     res["Recall"], res["Precision"], res["Training Time (s)"]]
    for res in results_bagging
]

best_models_bagging_table = [
    [metric, 
     best_bagging_models[metric]["n_estimators"], 
     best_bagging_models[metric]["max_samples"],  
     best_bagging_models[metric]["Accuracy"], 
     best_bagging_models[metric]["F1 Score"], 
     best_bagging_models[metric]["Recall"], 
     best_bagging_models[metric]["Precision"], 
     best_bagging_models[metric]["Training Time (s)"]]
    for metric in metrics_bagging + ["Training Time"]
]


print("Best Models (Bagging):")
print(tabulate(best_models_bagging_table, headers=[
    "Metric", "n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))

In [None]:
print(tabulate(best_models_bagging_table, headers=[
"n_estimators", "max_samples", "Accuracy", "F1 Score", "Recall", "Precision", "Training Time (s)"
], tablefmt="fancy_grid"))