In [4]:
import os
import numpy as np
import librosa
import librosa.display
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from pydub import AudioSegment
import io
import warnings

In [5]:
# Suppress warnings
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)


In [6]:
# Define folder paths
dir_path = "C:/Users/mahes/OneDrive/Desktop/FinalYearProjects/Projects/ASD-Detection-App/app/"
recordings_folder = os.path.join(dir_path, "recordings")
features_folder = os.path.join(dir_path, "features")
models_folder = os.path.join(dir_path, "models")

In [7]:
# Ensure feature and model directories exist
if not os.path.exists(features_folder):
    os.makedirs(features_folder)
if not os.path.exists(models_folder):
    os.makedirs(models_folder)

In [8]:

# Define models
models = {
    'rf.pkl': 'Random Forest',
    'ann.pkl': 'Artificial Neural Network',
    'svm.pkl': 'Support Vector Machine',
    'nb.pkl': 'Naive Bayes'
}


In [9]:
# Function to extract MFCC features
def extract_mfcc(audio_data, sample_rate, n_mfcc=20):
    mfcc_features = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc)
    return mfcc_features

In [10]:
# Optional: Extract and store MFCC features
# for file in os.listdir(recordings_folder):
#     if file.endswith(".m4a"):
#         audio = AudioSegment.from_file(os.path.join(recordings_folder, file), format='m4a')
#         samples = audio.get_array_of_samples()
#         y = np.array(samples).astype(np.float32) / (2**15 - 1)
#         sr = audio.frame_rate
#         mfcc_features = extract_mfcc(y, sr)
#         np.save(os.path.join(features_folder, file.replace(".m4a", ".npy")), mfcc_features)

# Optional: Train and save models
# (Load MFCC features, train models, and save as .pkl files)

In [11]:
# Load models
loaded_models = {}
for model_file, model_name in models.items():
    model_path = os.path.join(models_folder, model_file)
    if os.path.exists(model_path):
        loaded_models[model_name] = joblib.load(model_path)
    else:
        print(f"Model {model_name} not found!")

In [19]:
# Predict using the loaded models
audio_files = [f for f in os.listdir(recordings_folder) if f.endswith(".m4a")]
predictions = {}
actual_labels = []  # Add actual labels if available
sample_audios = np.random.choice(audio_files, size=5, replace=False).tolist()

In [20]:

for file in sample_audios:
    audio = AudioSegment.from_file(os.path.join(recordings_folder, file), format='m4a')
    samples = audio.get_array_of_samples()
    y = np.array(samples).astype(np.float32) / (2**15 - 1)
    sr = audio.frame_rate
    mfcc_features = extract_mfcc(y, sr)
    mfcc_avg = np.mean(mfcc_features, axis=1, keepdims=True).reshape(1, -1)
    
    predictions[file] = {}
    for model_name, model in loaded_models.items():
        pred = model.predict(mfcc_avg)
        predictions[file][model_name] = pred[0]

In [21]:
# Display results
for file, preds in predictions.items():
    print(f"Results for {file}:")
    for model_name, pred in preds.items():
        print(f"  {model_name}: {'Autistic' if pred == 1 else 'Non-Autistic'}")

Results for autistic_059.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Non-Autistic
Results for autistic_045.m4a:
  Random Forest: Autistic
  Artificial Neural Network: Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic
Results for autistic_066.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Non-Autistic
Results for autistic_044.m4a:
  Random Forest: Autistic
  Artificial Neural Network: Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic
Results for non_autistic_030.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic


In [22]:
# Display results
for file, preds in predictions.items():
    print(f"Results for {file}:")
    for model_name, pred in preds.items():
        print(f"  {model_name}: {'Autistic' if pred == 1 else 'Non-Autistic'}")

Results for autistic_059.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Non-Autistic
Results for autistic_045.m4a:
  Random Forest: Autistic
  Artificial Neural Network: Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic
Results for autistic_066.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Non-Autistic
Results for autistic_044.m4a:
  Random Forest: Autistic
  Artificial Neural Network: Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic
Results for non_autistic_030.m4a:
  Random Forest: Non-Autistic
  Artificial Neural Network: Non-Autistic
  Support Vector Machine: Autistic
  Naive Bayes: Autistic


In [25]:
# # Performance Metrics (if actual labels are available)
# # accuracy, confusion matrix, classification report
# if actual_labels:
#     for model_name in loaded_models.keys():
#         y_pred = [predictions[file][model_name] for file in audio_files]
#         print(f"Performance of {model_name}:")
#         print("Accuracy:", accuracy_score(actual_labels, y_pred))
#         print(classification_report(actual_labels, y_pred))
#         cm = confusion_matrix(actual_labels, y_pred)
#         sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
#         plt.title(f'Confusion Matrix for {model_name}')
#         plt.xlabel('Predicted')
#         plt.ylabel('Actual')
#         plt.show()
