In [1]:
import os
import librosa

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

import librosa.display

import plotly.express as px
import IPython.display as ipd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from tqdm import tqdm, trange
from librosa import feature, amplitude_to_db, load

from tqdm.auto import tqdm
from plotly.subplots import make_subplots

from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

pd.plotting.register_matplotlib_converters()

%matplotlib inline

In [4]:
import librosa
import numpy as np
import pandas as pd
import os

# Step 4.1: Extract MFCC with specified hop length and frame length
def extract_mfcc(audio_file, n_mfcc=13):
    """
    Extract MFCC features from an audio file with a frame length of 25ms
    and a hop length of 10ms.
    """
    y, sr = librosa.load(audio_file, sr=None)  # Load audio with its original sampling rate
    
    hop_length = int(0.01 * sr)  # 10ms hop size
    n_fft = int(0.025 * sr)      # 25ms frame length

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
    return mfccs.T  # Transpose to get frame-wise MFCCs

# Step 4.2: Extract Prosody Features (Pitch and Energy)
def extract_prosody_features(audio_file):
    """
    Extract prosody features like pitch and energy from an audio file.
    """
    y, sr = librosa.load(audio_file, sr=None)
    
    # Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = pitches[pitches > 0].mean() if np.any(pitches > 0) else 0  # Mean pitch
    
    # Energy (Root Mean Square Energy)
    rms = librosa.feature.rms(y=y)
    mean_energy = np.mean(rms)
    
    return {"pitch": pitch, "energy": mean_energy}

# Step 4.3: Extract Statistical Features
def extract_statistical_features(features):
    """
    Extract statistical features (mean, standard deviation, min, max) for given features.
    """
    stats = {
        "mean": np.mean(features, axis=0),
        "std_dev": np.std(features, axis=0),
        "min": np.min(features, axis=0),
        "max": np.max(features, axis=0)
    }
    return stats

# Process all audio files in a folder
def process_audio_folder(folder_path):
    """
    Process all .wav audio files in a given folder to extract MFCC, prosody, and statistical features.
    """
    all_features = []
    audio_files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]

    if not audio_files:
        print("No .wav files found in the folder!")
        return pd.DataFrame()  # Return empty DataFrame if no files are found

    for file in audio_files:
        file_path = os.path.join(folder_path, file)
        print(f"Processing: {file_path}")
        
        # Extract MFCC Features
        mfcc = extract_mfcc(file_path)
        mfcc_stats = extract_statistical_features(mfcc)
        
        # Extract Prosody Features
        prosody = extract_prosody_features(file_path)
        
        # Combine Features
        combined_features = {
            "filename": file,
            **{f"MFCC_mean_{i+1}": mfcc_stats["mean"][i] for i in range(len(mfcc_stats["mean"]))},
            **{f"MFCC_std_{i+1}": mfcc_stats["std_dev"][i] for i in range(len(mfcc_stats["std_dev"]))},
            **prosody
        }
        
        all_features.append(combined_features)
    
    return pd.DataFrame(all_features)

# Example Usage
folder_path = r"C:\Users\adity\OneDrive\Desktop\Speech Sample\Control"  # Change to your folder path
features_df = process_audio_folder(folder_path)

# Save to CSV
if not features_df.empty:
    output_csv = "Before_mid_sem_Control_audio_features.csv"
    features_df.to_csv(output_csv, index=False)
    print(f"Features saved to {output_csv}")
else:
    print("No features extracted.")


Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-1.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\002-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\006-4.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-2.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-3.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\013-4.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\015-0.wav
Processing: C:\Users\adity\OneDrive\Desktop\Speech Sample\Control\015-1.wav
Processing: 

In [5]:
import pandas as pd

# Load the CSV file
csv_file = r"Before_mid_sem_Control_audio_features.csv"  # Replace with your file path
df = pd.read_csv(csv_file)

# Print all column names
print("Columns in the CSV file:")
print(df.columns.tolist())


Columns in the CSV file:
['filename', 'MFCC_mean_1', 'MFCC_mean_2', 'MFCC_mean_3', 'MFCC_mean_4', 'MFCC_mean_5', 'MFCC_mean_6', 'MFCC_mean_7', 'MFCC_mean_8', 'MFCC_mean_9', 'MFCC_mean_10', 'MFCC_mean_11', 'MFCC_mean_12', 'MFCC_mean_13', 'MFCC_std_1', 'MFCC_std_2', 'MFCC_std_3', 'MFCC_std_4', 'MFCC_std_5', 'MFCC_std_6', 'MFCC_std_7', 'MFCC_std_8', 'MFCC_std_9', 'MFCC_std_10', 'MFCC_std_11', 'MFCC_std_12', 'MFCC_std_13', 'pitch', 'energy']


In [9]:
import pandas as pd

# File path
csv_file = r"C:\Users\adity\Before_mid_sem_Control_audio_features.csv"  # Replace with your CSV file path

# Load the CSV file
df = pd.read_csv(csv_file)

# Add a new column "output" with all values set to 0
df["output"] = 1

# Save the updated CSV "C:\Users\adity\Before_mid_sem_audio_features.csv"
output_csv = "Updated_2_Before_mid_sem_Control_audio_features.csv"
df.to_csv(output_csv, index=False)

print(f"Updated CSV saved to {output_csv}")


Updated CSV saved to Updated_2_Before_mid_sem_Control_audio_features.csv


In [11]:
import pandas as pd

# File paths
csv_file1 = r"C:\Users\adity\Updated_2_Before_mid_sem_Control_audio_features.csv"  # Replace with your first CSV file path
csv_file2 = r"C:\Users\adity\Updated_Before_mid_sem_audio_features.csv"  # Replace with your second CSV file path

# Load CSV files
df1 = pd.read_csv(csv_file1)
df2 = pd.read_csv(csv_file2)

# Combine both DataFrames
combined_df = pd.concat([df1, df2], ignore_index=True)

# Save the combined data to a new CSV file
output_csv = "Combined_before_mid_sem.csv"
combined_df.to_csv(output_csv, index=False)

print(f"Combined CSV saved to {output_csv}")


Combined CSV saved to Combined_before_mid_sem.csv
