In [1]:
import os
import librosa

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

import librosa.display

import plotly.express as px
import IPython.display as ipd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from tqdm import tqdm, trange
from librosa import feature, amplitude_to_db, load

from tqdm.auto import tqdm
from plotly.subplots import make_subplots

from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

pd.plotting.register_matplotlib_converters()

%matplotlib inline

In [2]:
import librosa
import numpy as np
import pandas as pd
from scipy.fft import fft

# Step 4.1: Extract GFCC (Generalized Frequency Cepstral Coefficients)
def extract_gfcc(audio_file, n_gfcc=13, gamma=1.0):
    """
    Extract GFCC features from an audio file.
    GFCC(x) ← DCT(log(FFT(x))) with a generalized frequency scale.
    """
    y, sr = librosa.load(audio_file, sr=None)

    # Apply FFT
    fft_values = np.abs(fft(y))

    # Compute logarithm of the FFT values
    log_fft_values = np.log1p(fft_values)

    # Apply Generalized Frequency Scale (Gamma)
    freq_scale = np.linspace(0, sr / 2, len(log_fft_values))
    generalized_freq = np.power(freq_scale, gamma)

    # Apply DCT to the log of FFT values (GFCC)
    gfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_gfcc)  # MFCC extraction can be adapted to GFCC
    gfccs = np.dot(np.diag(generalized_freq), gfccs)  # Applying frequency scale adjustment

    return gfccs.T  # Transpose to get frame-wise GFCCs

# Step 4.2: Extract Prosody Features (e.g., pitch, energy)
def extract_prosody_features(audio_file):
    """
    Extract prosody features like pitch and energy from an audio file.
    """
    y, sr = librosa.load(audio_file, sr=None)
    
    # Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = pitches[pitches > 0].mean()  # Mean pitch
    
    # Energy (Root Mean Square Energy)
    rms = librosa.feature.rms(y=y)
    mean_energy = np.mean(rms)
    
    return {"pitch": pitch, "energy": mean_energy}

# Step 4.3: Extract Statistical Features
def extract_statistical_features(features):
    """
    Extract statistical features (mean, standard deviation, min, max) for given features.
    """
    stats = {
        "mean": np.mean(features, axis=0),
        "std_dev": np.std(features, axis=0),
        "min": np.min(features, axis=0),
        "max": np.max(features, axis=0)
    }
    return stats

# Wrapper Function to Process Data
def process_audio_files(audio_files):
    """
    Process multiple audio files to extract GFCC, prosody, and statistical features.
    """
    all_features = []
    
    for file in audio_files:
        print(f"Processing: {file}")
        
        # Extract GFCC Features
        gfcc = extract_gfcc(file)
        gfcc_stats = extract_statistical_features(gfcc)
        
        # Extract Prosody Features
        prosody = extract_prosody_features(file)
        
        # Combine Features
        combined_features = {
            **{f"GFCC_mean_{i+1}": gfcc_stats["mean"][i] for i in range(len(gfcc_stats["mean"]))},
            **{f"GFCC_std_{i+1}": gfcc_stats["std_dev"][i] for i in range(len(gfcc_stats["std_dev"]))},
            **prosody
        }
        
        all_features.append(combined_features)
    
    return pd.DataFrame(all_features)

# Example Usage
audio_files = [
    r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav",  # Add paths to your audio files
]

features_df = process_audio_files(audio_files)

# Save to CSV
output_csv = "R1audio_gfcc_features.csv"
features_df.to_csv(output_csv, index=False)
print(f"Features saved to {output_csv}")


Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav


MemoryError: Unable to allocate 43.1 TiB for an array with shape (2433938, 2433938) and data type float64

In [3]:
import librosa
import numpy as np
import pandas as pd
from scipy.fft import fft

# Step 4.1: Extract GFCC (Generalized Frequency Cepstral Coefficients)
def extract_gfcc(audio_file, n_gfcc=13, gamma=1.0):
    """
    Extract GFCC features from an audio file.
    GFCC(x) ← DCT(log(FFT(x))) with a generalized frequency scale.
    """
    y, sr = librosa.load(audio_file, sr=None)

    # Apply FFT
    fft_values = np.abs(fft(y))

    # Compute logarithm of the FFT values
    log_fft_values = np.log1p(fft_values)

    # Apply Generalized Frequency Scale (Gamma)
    freq_scale = np.linspace(0, sr / 2, len(log_fft_values))
    generalized_freq = np.power(freq_scale, gamma)

    # Apply the frequency scaling element-wise
    scaled_log_fft_values = log_fft_values * generalized_freq

    # Apply DCT to the scaled log FFT values
    gfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_gfcc)  # MFCC extraction can be adapted to GFCC
    gfccs = np.dot(np.diag(generalized_freq[:n_gfcc]), gfccs)  # Scaling for the desired number of GFCCs

    return gfccs.T  # Transpose to get frame-wise GFCCs

# Step 4.2: Extract Prosody Features (e.g., pitch, energy)
def extract_prosody_features(audio_file):
    """
    Extract prosody features like pitch and energy from an audio file.
    """
    y, sr = librosa.load(audio_file, sr=None)
    
    # Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = pitches[pitches > 0].mean()  # Mean pitch
    
    # Energy (Root Mean Square Energy)
    rms = librosa.feature.rms(y=y)
    mean_energy = np.mean(rms)
    
    return {"pitch": pitch, "energy": mean_energy}

# Step 4.3: Extract Statistical Features
def extract_statistical_features(features):
    """
    Extract statistical features (mean, standard deviation, min, max) for given features.
    """
    stats = {
        "mean": np.mean(features, axis=0),
        "std_dev": np.std(features, axis=0),
        "min": np.min(features, axis=0),
        "max": np.max(features, axis=0)
    }
    return stats

# Wrapper Function to Process Data
def process_audio_files(audio_files):
    """
    Process multiple audio files to extract GFCC, prosody, and statistical features.
    """
    all_features = []
    
    for file in audio_files:
        print(f"Processing: {file}")
        
        # Extract GFCC Features
        gfcc = extract_gfcc(file)
        gfcc_stats = extract_statistical_features(gfcc)
        
        # Extract Prosody Features
        prosody = extract_prosody_features(file)
        
        # Combine Features
        combined_features = {
            **{f"GFCC_mean_{i+1}": gfcc_stats["mean"][i] for i in range(len(gfcc_stats["mean"]))},
            **{f"GFCC_std_{i+1}": gfcc_stats["std_dev"][i] for i in range(len(gfcc_stats["std_dev"]))},
            **prosody
        }
        
        all_features.append(combined_features)
    
    return pd.DataFrame(all_features)

# Example Usage
audio_files = [
    r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav",  # Add paths to your audio files
]

features_df = process_audio_files(audio_files)

# Save to CSV
output_csv = "R1audio_gfcc_features.csv"
features_df.to_csv(output_csv, index=False)
print(f"Features saved to {output_csv}")


Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav
Features saved to R1audio_gfcc_features.csv


Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\001-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\003-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\005-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\005-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\007-1.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\007-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\010-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\010-1.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\010-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\010-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\010-4.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Dementia\014-2.wav

In [5]:
import librosa
import numpy as np
import pandas as pd
import os

# Step 4.1: Extract GFCC (Generalized Frequency Cepstral Coefficients)
def extract_gfcc(audio_file, n_gfcc=13, gamma=1.0):
    """
    Extract GFCC features from an audio file.
    GFCC(x) ← DCT(log(FFT(x))) with a generalized frequency scale.
    """
    y, sr = librosa.load(audio_file, sr=None)

    # Apply FFT
    fft_values = np.abs(np.fft.fft(y))

    # Compute logarithm of the FFT values
    log_fft_values = np.log1p(fft_values)

    # Apply Generalized Frequency Scale (Gamma)
    freq_scale = np.linspace(0, sr / 2, len(log_fft_values))
    generalized_freq = np.power(freq_scale, gamma)

    # Apply the frequency scaling element-wise
    scaled_log_fft_values = log_fft_values * generalized_freq

    # Apply DCT to the scaled log FFT values
    gfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_gfcc)  # MFCC extraction can be adapted to GFCC
    gfccs = np.dot(np.diag(generalized_freq[:n_gfcc]), gfccs)  # Scaling for the desired number of GFCCs

    return gfccs.T  # Transpose to get frame-wise GFCCs

# Step 4.2: Extract Prosody Features (e.g., pitch, energy)
def extract_prosody_features(audio_file):
    """
    Extract prosody features like pitch and energy from an audio file.
    """
    y, sr = librosa.load(audio_file, sr=None)
    
    # Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = pitches[pitches > 0].mean()  # Mean pitch
    
    # Energy (Root Mean Square Energy)
    rms = librosa.feature.rms(y=y)
    mean_energy = np.mean(rms)
    
    return {"pitch": pitch, "energy": mean_energy}

# Step 4.3: Extract Statistical Features
def extract_statistical_features(features):
    """
    Extract statistical features (mean, standard deviation, min, max) for given features.
    """
    stats = {
        "mean": np.mean(features, axis=0),
        "std_dev": np.std(features, axis=0),
        "min": np.min(features, axis=0),
        "max": np.max(features, axis=0)
    }
    return stats

# Wrapper Function to Process All Audio Files in a Folder
def process_audio_files_in_folder(folder_path):
    """
    Process all audio files in a folder to extract GFCC, prosody, and statistical features.
    """
    all_features = []
    
    # Get all .wav files in the folder
    audio_files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
    
    for file in audio_files:
        file_path = os.path.join(folder_path, file)
        print(f"Processing: {file_path}")
        
        # Extract GFCC Features
        gfcc = extract_gfcc(file_path)
        gfcc_stats = extract_statistical_features(gfcc)
        
        # Extract Prosody Features
        prosody = extract_prosody_features(file_path)
        
        # Combine Features
        combined_features = {
            **{f"GFCC_mean_{i+1}": gfcc_stats["mean"][i] for i in range(len(gfcc_stats["mean"]))},
            **{f"GFCC_std_{i+1}": gfcc_stats["std_dev"][i] for i in range(len(gfcc_stats["std_dev"]))},
            **prosody
        }
        
        all_features.append(combined_features)
    
    return pd.DataFrame(all_features)

# Example Usage
folder_path = r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Control"  # Update to your folder path
features_df = process_audio_files_in_folder(folder_path)

# Save to CSV
output_csv = "audio_gfcc_features_Control.csv"
features_df.to_csv(output_csv, index=False)
print(f"Features saved to {output_csv}")

Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-1.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-4.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-4.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\015-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\015-1.wav
Processing: 

In [6]:
import pandas as pd

# File paths for the two CSV files
file1_path = r"C:\Users\adity\audio_gfcc_features.csv"  # Replace with your first file path
file2_path = r"C:\Users\adity\audio_gfcc_features_Control.csv"  # Replace with your second file path

# Load the CSV files into DataFrames
df1 = pd.read_csv(file1_path)
df2 = pd.read_csv(file2_path)

# Add a new column to each DataFrame
df1['Output'] = 1  # Mark rows from the first file as 1
df2['Output'] = 0  # Mark rows from the second file as 0

# Combine the two DataFrames
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined DataFrame to a new CSV file
output_file_path = 'Combined_GFCC.csv'  # Replace with your desired output file path
combined_df.to_csv(output_file_path, index=False)

print("Files combined successfully!")


Files combined successfully!
