<a href="https://colab.research.google.com/github/ShobitKapila/Acoustic-source-identification/blob/main/wind_turbin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import os
import numpy as np
import librosa
import scipy
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.neighbors import KNeighborsClassifier
import joblib

def load_data(folder_path, batch_size=10):
    data_batches = []
    label_batches = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".mat"):
            file_path = os.path.join(folder_path, file_name)
            try:
                mat_data = loadmat(file_path)
            except Exception as e:
                print(f"Error loading file {file_path}: {e}")
                continue

            for key in mat_data.keys():
                if key.startswith('AN'):
                    sample_data = mat_data[key]
                    for i in range(0, sample_data.shape[0], batch_size):
                        data_batches.append(sample_data[i:i+batch_size])
                elif key == 'Speed':
                    labels = [mat_data[key]] * sample_data.shape[0]
                    for i in range(0, len(labels), batch_size):
                        label_batches.append(labels[i:i+batch_size])

    X = np.concatenate(data_batches, axis=0)
    y = np.concatenate(label_batches, axis=0)

    return X, y



def extract_features_from_sound(sound_file_path):
    y, sr = librosa.load(sound_file_path)
    features = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return features.T


def train_and_evaluate(X, y, save_model_path):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train.ravel())

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    f1_acc = f1_score(y_test, y_pred)
    print(f"F1 Accuracy Score: {f1_acc}")


    joblib.dump(clf, save_model_path)
    print(f"Model saved to {save_model_path}")

healthy_folder_path = "/content/Healthy"
damaged_folder_path = "/content/Damaged"


healthy_model_path = "D:/wind turbin/healthy_model.joblib"
damaged_model_path = "D:/wind turbin/damaged_model.joblib"

print("Healthy Dataset:")
X_healthy, y_healthy = load_data(healthy_folder_path)
train_and_evaluate(X_healthy, y_healthy, healthy_model_path)


print("\nDamaged Dataset:")
X_damaged, y_damaged = load_data(damaged_folder_path)
train_and_evaluate(X_damaged, y_damaged, damaged_model_path)

sound_file_path = "path_to_your_sound_file.wav"
sound_features = extract_features_from_sound(sound_file_path)
model = joblib.load(healthy_model_path)
prediction = model.predict(sound_features.reshape(1, -1))
print(f"The predicted healthiness is: {'Damaged' if prediction == 1 else 'Healthy'}")


Healthy Dataset:


FileNotFoundError: ignored