In [4]:
import os
import librosa

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

import librosa.display

import plotly.express as px
import IPython.display as ipd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from tqdm import tqdm, trange
from librosa import feature, amplitude_to_db, load

from tqdm.auto import tqdm
from plotly.subplots import make_subplots

from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

pd.plotting.register_matplotlib_converters()

%matplotlib inline

In [6]:
import librosa
import numpy as np
import pandas as pd
import os

# Function to extract mean MFCC features
def extract_mean_mfcc(audio_file, n_mfcc=20, frame_length_ms=25, hop_length_ms=10):
    """
    Extracts MFCC features from an audio file and computes the mean across frames.
    Returns the mean MFCC features as a list.
    """
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=None)  

    # Compute hop length and FFT window size
    hop_length = int((hop_length_ms / 1000) * sr)  
    n_fft = int((frame_length_ms / 1000) * sr)    

    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)

    # Compute the mean of MFCCs across time frames
    mean_mfccs = np.mean(mfccs, axis=1)

    return mean_mfccs

# Function to process all audio files in a folder for multiple frame lengths
def process_audio_folder(input_folder, hop_lengths=[25, 50, 100, 150, 200, 250, 300], output_csv="mfcc_means.csv"):
    """
    Processes all audio files in the input folder for different frame lengths,
    extracts mean MFCCs, and saves results to a CSV.
    """
    # Get all audio files in the folder
    audio_files = [f for f in os.listdir(input_folder) if f.endswith(('.wav', '.mp3'))]
    total_files = len(audio_files)
    
    if total_files == 0:
        print("No audio files found in the folder!")
        return
    
    # List to store results
    mfcc_results = []

    # Process each audio file
    for i, audio_file in enumerate(audio_files):
        audio_path = os.path.join(input_folder, audio_file)
        
        for frame_length in frame_lengths:
            mean_mfccs = extract_mean_mfcc(audio_path, frame_length_ms=frame_length)
            
            # Append results with filename and frame length
            mfcc_results.append([audio_file, frame_length] + mean_mfccs.tolist())

        # Display progress
        print(f"Processed {i + 1}/{total_files} files ({(i + 1) / total_files * 100:.2f}%)")

    # Convert to DataFrame
    column_names = ["Filename", "Frame_Length_ms"] + [f"MFCC_{i+1}" for i in range(len(mean_mfccs))]
    mfcc_df = pd.DataFrame(mfcc_results, columns=column_names)

    # Save to CSV
    mfcc_df.to_csv(output_csv, index=False)
    print(f"\n✅ All files processed. Mean MFCCs saved to {output_csv}")

# Define input folder containing audio files
input_folder = r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia"  # Replace with your actual folder path
output_file = "mfcc_means_Dementia_new.csv"  # Output file

# Process all files for multiple frame lengths and save mean MFCCs
process_audio_folder(input_folder, output_csv=output_file)



Processed 1/119 files (0.84%)
Processed 2/119 files (1.68%)
Processed 3/119 files (2.52%)
Processed 4/119 files (3.36%)
Processed 5/119 files (4.20%)
Processed 6/119 files (5.04%)
Processed 7/119 files (5.88%)
Processed 8/119 files (6.72%)
Processed 9/119 files (7.56%)
Processed 10/119 files (8.40%)
Processed 11/119 files (9.24%)
Processed 12/119 files (10.08%)
Processed 13/119 files (10.92%)
Processed 14/119 files (11.76%)
Processed 15/119 files (12.61%)
Processed 16/119 files (13.45%)
Processed 17/119 files (14.29%)
Processed 18/119 files (15.13%)
Processed 19/119 files (15.97%)
Processed 20/119 files (16.81%)
Processed 21/119 files (17.65%)
Processed 22/119 files (18.49%)
Processed 23/119 files (19.33%)
Processed 24/119 files (20.17%)
Processed 25/119 files (21.01%)
Processed 26/119 files (21.85%)
Processed 27/119 files (22.69%)
Processed 28/119 files (23.53%)
Processed 29/119 files (24.37%)
Processed 30/119 files (25.21%)
Processed 31/119 files (26.05%)
Processed 32/119 files (26.8

In [4]:
import pandas as pd

# Read CSV file
df = pd.read_csv(r'C:\Users\adity\mfcc_means_control.csv')

# Get column names
columns = df.columns.tolist()
print(columns)


['Filename', 'Frame_Length_ms', 'MFCC_1', 'MFCC_2', 'MFCC_3', 'MFCC_4', 'MFCC_5', 'MFCC_6', 'MFCC_7', 'MFCC_8', 'MFCC_9', 'MFCC_10', 'MFCC_11', 'MFCC_12', 'MFCC_13', 'MFCC_14', 'MFCC_15', 'MFCC_16', 'MFCC_17', 'MFCC_18', 'MFCC_19', 'MFCC_20']


In [3]:
import pandas as pd

# Read both CSV files
df1 = pd.read_csv('mfcc_means_control_new.csv')
df2 = pd.read_csv('mfcc_means_Dementia_new.csv')

# Add an output column: 0 for the first file, 1 for the second
df1['output'] = 0
df2['output'] = 1

# Merge the data
merged_df = pd.concat([df1, df2], ignore_index=True)

# Save the merged CSV
merged_df.to_csv('merged_mfcc_new.csv', index=False)

print("Merged CSV file saved as 'merged_mfcc_new.csv'")


Merged CSV file saved as 'merged_mfcc_new.csv'


In [8]:
import pandas as pd
import os

# File path
file_path = "merged_mfcc_new_hop_length.csv"

# Read CSV file
df = pd.read_csv(file_path)

# Ensure the output folder exists
output_folder = r"C:\Users\adity\OneDrive\Desktop\Hop2length"
os.makedirs(output_folder, exist_ok=True)

# Values to filter
hop_lengths = [10, 25, 50, 100, 200, 300, 400, 500]

# Split and save separate CSV files
for value in hop_lengths:
    subset_df = df[df['Hop_Length_ms'] == value]  # Filter rows
    if not subset_df.empty:
        filename = f"{output_folder}/Hop_Length_{value}.csv"
        subset_df.to_csv(filename, index=False)
        print(f"Saved: {filename}")

print("All files saved successfully!")


Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_10.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_25.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_50.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_100.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_200.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_300.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_400.csv
Saved: C:\Users\adity\OneDrive\Desktop\Hop2length/Hop_Length_500.csv
All files saved successfully!


In [10]:
import pandas as pd
import os

# Folder containing the CSV files
folder_path = r"C:\Users\adity\OneDrive\Desktop\Hop2length"

# Columns to drop
columns_to_drop = ['Filename', 'Hop_Length_ms']  # Change these to actual column names

# Process each CSV file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):  # Ensure it's a CSV file
        file_path = os.path.join(folder_path, filename)
        
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Drop specified columns if they exist
        df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors='ignore')
        
        # Save the modified CSV
        df.to_csv(file_path, index=False)
        print(f"Updated: {filename}")

print("All files updated successfully!")


Updated: Hop_Length_10.csv
Updated: Hop_Length_100.csv
Updated: Hop_Length_200.csv
Updated: Hop_Length_25.csv
Updated: Hop_Length_300.csv
Updated: Hop_Length_400.csv
Updated: Hop_Length_50.csv
Updated: Hop_Length_500.csv
All files updated successfully!


In [11]:
import os
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Folder containing the CSV files
folder_path = r"C:\Users\adity\OneDrive\Desktop\Hop2length"

# Iterate through all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path, filename)
        
        # Read CSV
        df = pd.read_csv(file_path)
        
        # Ensure the target column 'output' exists
        if 'output' not in df.columns:
            print(f"Skipping {filename} (No 'output' column)")
            continue
        
        # Separate features (X) and target variable (y)
        X = df.drop(columns=['output'])  # Drop target column
        y = df['output']

        # Handle missing values (fill with mean)
        X = X.fillna(X.mean())

        # Standardize features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

        # Train SVM model
        model = SVC(kernel='rbf')  # Use RBF kernel for non-linear classification
        model.fit(X_train, y_train)

        # Predict and evaluate
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        print(f"Model trained on {filename} - Accuracy: {accuracy:.4f}")

print("SVM applied to all CSV files successfully!")


Model trained on Hop_Length_10.csv - Accuracy: 0.6078
Model trained on Hop_Length_100.csv - Accuracy: 0.6078
Model trained on Hop_Length_200.csv - Accuracy: 0.5882
Model trained on Hop_Length_25.csv - Accuracy: 0.6078
Model trained on Hop_Length_300.csv - Accuracy: 0.6078
Model trained on Hop_Length_400.csv - Accuracy: 0.5882
Model trained on Hop_Length_50.csv - Accuracy: 0.6078
Model trained on Hop_Length_500.csv - Accuracy: 0.6078
SVM applied to all CSV files successfully!
