In [3]:
import os
import librosa

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

import librosa.display

import plotly.express as px
import IPython.display as ipd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from tqdm import tqdm, trange
from librosa import feature, amplitude_to_db, load

from tqdm.auto import tqdm
from plotly.subplots import make_subplots

from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

pd.plotting.register_matplotlib_converters()

%matplotlib inline

In [10]:
import librosa
import numpy as np
import pandas as pd

# Step 4.1: Extract MFCC
def extract_mfcc(audio_file, n_mfcc=13):
    """
    Extract MFCC features from an audio file.
    MFCC(x) ← DCT(log(FFT(x)))
    """
    y, sr = librosa.load(audio_file, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)  # MFCC extraction
    return mfccs.T  # Transpose to get frame-wise MFCCs

# Step 4.2: Extract Prosody Features (e.g., pitch, energy)
def extract_prosody_features(audio_file):
    """
    Extract prosody features like pitch and energy from an audio file.
    """
    y, sr = librosa.load(audio_file, sr=None)
    
    # Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = pitches[pitches > 0].mean()  # Mean pitch
    
    # Energy (Root Mean Square Energy)
    rms = librosa.feature.rms(y=y)
    mean_energy = np.mean(rms)
    
    return {"pitch": pitch, "energy": mean_energy}

# Step 4.3: Extract Statistical Features
def extract_statistical_features(features):
    """
    Extract statistical features (mean, standard deviation, min, max) for given features.
    """
    stats = {
        "mean": np.mean(features, axis=0),
        "std_dev": np.std(features, axis=0),
        "min": np.min(features, axis=0),
        "max": np.max(features, axis=0)
    }
    return stats

# Wrapper Function to Process Data
def process_audio_files(audio_files):
    """
    Process multiple audio files to extract MFCC, prosody, and statistical features.
    """
    all_features = []
    
    for file in audio_files:
        print(f"Processing: {file}")
        
        # Extract MFCC Features
        mfcc = extract_mfcc(file)
        mfcc_stats = extract_statistical_features(mfcc)
        
        # Extract Prosody Features
        prosody = extract_prosody_features(file)
        
        # Combine Features
        combined_features = {
            **{f"MFCC_mean_{i+1}": mfcc_stats["mean"][i] for i in range(len(mfcc_stats["mean"]))},
            **{f"MFCC_std_{i+1}": mfcc_stats["std_dev"][i] for i in range(len(mfcc_stats["std_dev"]))},
            **prosody
        }
        
        all_features.append(combined_features)
    
    return pd.DataFrame(all_features)

# Example Usage
audio_files = [
    r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav",  # Add paths to your audio files
    
]

features_df = process_audio_files(audio_files)

# Save to CSV
output_csv = "R1audio_features.csv"
features_df.to_csv(output_csv, index=False)
print(f"Features saved to {output_csv}")


Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav
Features saved to R1audio_features.csv


In [8]:
import librosa
import pandas as pd
import numpy as np

# Step 1: Load the audio file
audio_file = r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav"  # Replace with your audio file path
y, sr = librosa.load(audio_file, sr=None)

# Step 2: Extract Features (e.g., MFCCs)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)  # Extract 13 MFCCs

# Convert the MFCCs to a DataFrame
mfcc_df = pd.DataFrame(mfccs.T, columns=[f"MFCC_{i+1}" for i in range(mfccs.shape[0])])

# Step 3: Save to CSV
# Optional: Add timestamps (each column corresponds to a frame)
time_stamps = librosa.frames_to_time(range(mfccs.shape[1]), sr=sr)
mfcc_df["Timestamp"] = time_stamps

output_csv = "Raudio_features.csv"  # Desired CSV file name
mfcc_df.to_csv(output_csv, index=False)

print(f"Audio features saved to {output_csv}")


Audio features saved to Raudio_features.csv


In [None]:
# Load the dataset
# data_path = 'audios/UrbanSound8K.csv'
data_path = r"C:\Users\adity\.cache\kagglehub\datasets\chrisfilo\urbansound8k\versions\1\UrbanSound8K.csv"
df = pd.read_csv(data_path)
df.head()

In [4]:
sample_path = []
# path = 'audios'
path = r"C:\Users\adity\OneDrive\Desktop\Speech Sample"
for index_num,row in tqdm(df.iterrows()):
    file_name = os.path.join(os.path.abspath(path),'fold'+str(row["fold"])+'/',str(row['slice_file_name']))    
    sample_path.append(file_name)

NameError: name 'df' is not defined