# Data augmentation

In [1]:
import numpy as np
import librosa
import os

# Function to apply Additive White Gaussian Noise (AWGN) to audio file
def apply_awgn(audio, rate):
    noise = np.random.normal(0, 1, len(audio))
    augmented_audio = audio + rate * noise
    return augmented_audio

# Function to apply time-stretching to audio file
def apply_time_stretch(audio, factor):
    stretched_audio = librosa.effects.time_stretch(y=audio, rate=factor)
    return stretched_audio

# Define the data folders for each class
data_folders = {
    'healthy_voices': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/healthy voice',
    'hyper_dysphonia': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/hyper dysphonie',
    'laryngitis': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/laryngitis',
    'vox_senilis': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/vox senilis'
}

# Loop through each class folder
for class_label, folder_path in data_folders.items():
    # Loop through each audio file in the class folder
    for file in os.listdir(folder_path):
        # Check if the file is an audio file
        if file.endswith('.wav'):
            file_path = os.path.join(folder_path, file)
            # Load audio file
            audio, sr = librosa.load(file_path, sr=None)

            # Apply AWGN
            augmented_audio_awgn_1 = apply_awgn(audio, 0.020)
            augmented_audio_awgn_2 = apply_awgn(audio, 0.025)

            # Apply time-stretching
            augmented_audio_time_stretch_1 = apply_time_stretch(audio, 0.7)
            augmented_audio_time_stretch_2 = apply_time_stretch(audio, 0.8)

            # Save augmented audio files
            import soundfile as sf

            # Save augmented audio files with appropriate file names
            sf.write(os.path.join(folder_path, file.replace('.wav', '_awgn_1.wav')), augmented_audio_awgn_1, sr)
            sf.write(os.path.join(folder_path, file.replace('.wav', '_awgn_2.wav')), augmented_audio_awgn_2, sr)
            sf.write(os.path.join(folder_path, file.replace('.wav', '_time_stretch_1.wav')), augmented_audio_time_stretch_1, sr)
            sf.write(os.path.join(folder_path, file.replace('.wav', '_time_stretch_2.wav')), augmented_audio_time_stretch_2, sr)


# feature extraction

In [2]:
import opensmile
import os
import pandas as pd

# Initialize the eGeMAPS feature set (version v01b as included in openSMILE 3.0)
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPS,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Directories for different disorders
directories = {
    'healthy_voices': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/healthy voice',
    'hyper_dysphonia': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/hyper dysphonie',
    'laryngitis': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/laryngitis',
    'vox_senilis': 'D:/SEMESTER 6/SPEECH PROCESSING/pap/augmentation/vox senilis'
}

# Process each directory
for disorder, folder in directories.items():
    # DataFrame to store features for the current disorder
    disorder_features = []
    
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            filepath = os.path.join(folder, filename)
            features = smile.process_file(filepath)
            features['file'] = filename
            features['disorder'] = disorder
            disorder_features.append(features)
    
    # Concatenate features into a single DataFrame for the current disorder
    disorder_features_df = pd.concat(disorder_features, ignore_index=True)
    
    # Save to CSV
    output_csv = f'extracted_features1_{disorder}.csv'
    disorder_features_df.to_csv(output_csv, index=False)
    
    # Print the number of columns (features) in the CSV file
    num_columns = len(disorder_features_df.columns)
    print(f'{disorder} - Number of columns: {num_columns}')
    print(f'{disorder} - Number of features (excluding file and disorder columns): {num_columns - 2}')


  from pandas.core import (


healthy_voices - Number of columns: 90
healthy_voices - Number of features (excluding file and disorder columns): 88
hyper_dysphonia - Number of columns: 90
hyper_dysphonia - Number of features (excluding file and disorder columns): 88
laryngitis - Number of columns: 90
laryngitis - Number of features (excluding file and disorder columns): 88
vox_senilis - Number of columns: 90
vox_senilis - Number of features (excluding file and disorder columns): 88


# Final dataframe

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# File paths
csv_files = [
    'extracted_features1_healthy_voices.csv',
    'extracted_features1_hyper_dysphonia.csv',
    'extracted_features1_vox_senilis.csv',
    'extracted_features1_laryngitis.csv'
]

# Load each CSV file into a DataFrame
dataframes = [pd.read_csv(file) for file in csv_files]

# Combine all DataFrames into one
all_features_df = pd.concat(dataframes, ignore_index=True)

# Handle missing values by filling them with 0
all_features_df.fillna(0, inplace=True)

# Encode the 'disorder' label into numerical format
label_encoder = LabelEncoder()
all_features_df['disorder'] = label_encoder.fit_transform(all_features_df['disorder'])

# Separate features and labels
X = all_features_df.drop(columns=['file', 'disorder'])
y = all_features_df['disorder']

# Normalize/Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Combine the scaled features and labels back into a DataFrame for easy inspection (optional)
final_df = pd.DataFrame(X_scaled, columns=X.columns)
final_df['disorder'] = y

# Save the final DataFrame to a new CSV file (optional)
final_df.to_csv('prepared_features1.csv', index=False)

# Print the first few rows of the final DataFrame
print(final_df.head())

   F0semitoneFrom27.5Hz_sma3nz_amean  F0semitoneFrom27.5Hz_sma3nz_stddevNorm  \
0                           0.288585                                1.314599   
1                           0.685394                               -0.199647   
2                           0.746177                               -0.419337   
3                           0.255968                                1.327056   
4                           0.270849                                1.398371   

   F0semitoneFrom27.5Hz_sma3nz_percentile20.0  \
0                                    0.122994   
1                                    0.621480   
2                                    0.646613   
3                                   -0.204326   
4                                   -0.090093   

   F0semitoneFrom27.5Hz_sma3nz_percentile50.0  \
0                                    0.524206   
1                                    0.596782   
2                                    0.625023   
3                           