In [None]:
import zipfile
import os

def unzip_file(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Paths to your zip files and extraction directories
zip_paths = [
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master.zip',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master.zip',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master.zip',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master.zip',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Telugu_master.zip'
]

# Extract all zip files to their respective directories
for zip_path in zip_paths:
    extract_to = os.path.splitext(zip_path)[0]  # Extract to a folder with the same name as the zip file
    unzip_file(zip_path, extract_to)


In [None]:
pip install librosa

In [4]:
import librosa
import csv
import os
import numpy as np

def remove_silence(audio, sr, top_db=20):
    """
    Remove silent intervals from the audio signal.
    :param audio: Audio signal (numpy array)
    :param sr: Sampling rate
    :param top_db: The threshold (in decibels) below reference to consider as silence
    :return: Audio signal without silence
    """
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)
    non_silent_audio = np.concatenate([audio[start:end] for start, end in non_silent_intervals])
    return non_silent_audio

def extract_formants(audio, sr, frame_size=2048, lpc_order=8):
    """
    Extract the first two formants from the audio signal.
    :param audio: Audio signal (numpy array)
    :param sr: Sampling rate
    :param frame_size: Size of the frame for LPC analysis
    :param lpc_order: Order of the LPC analysis
    :return: Formants (first two formants)
    """
    formants = []
    for i in range(0, len(audio) - frame_size, frame_size):
        frame = audio[i:i + frame_size] * np.hamming(frame_size)
        A = librosa.lpc(frame, order=lpc_order)
        roots = np.roots(A)
        roots = [r for r in roots if np.imag(r) >= 0]  # Keep only positive imaginary roots
        angles = np.angle(roots)
        formants.extend(np.abs(np.cos(angles)) * sr / (2 * np.pi))
    
    formants = np.array(formants)
    if formants.size > 0:
        # Handle cases where formants array is not divisible by 2
        num_formants = formants.size // 2 * 2  # Ensure the number is even
        formants = formants[:num_formants]  # Trim excess if any
        formants = np.mean(formants.reshape(-1, 2), axis=0)  # Average formants across frames
    else:
        formants = np.zeros(2)  # Default to zeros if no formants are extracted
    
    return formants



def extract_features(audio, sr, output_label):
    # Remove silence from the audio
    audio = remove_silence(audio, sr)

    frame_size = int(0.03 * sr)
    hop_length = int(0.01 * sr)

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, hop_length=hop_length, n_fft=frame_size).astype(np.float16)
    delta_mfcc = librosa.feature.delta(mfcc).astype(np.float16)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2).astype(np.float16)

    # Extract Pitch (Fundamental Frequency) and HNR
    pitches, magnitudes = librosa.core.piptrack(y=audio, sr=sr, hop_length=hop_length, n_fft=frame_size)
    pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0.0
    hnr = librosa.effects.harmonic(y=audio).mean()

    # Extract Formants
    formants = extract_formants(audio, sr)

    # Extract Spectral Features
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)
    spectral_flatness = librosa.feature.spectral_flatness(y=audio, hop_length=hop_length).flatten().astype(np.float16)

    # Extract Zero-Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Extract RMS Energy
    rms = librosa.feature.rms(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Extract Chroma Features
    chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)

    num_frames = min(
        mfcc.shape[1],
        delta_mfcc.shape[1],
        delta2_mfcc.shape[1],
        len(zcr),
        len(rms),
        len(spectral_centroid),
        len(spectral_bandwidth),
        len(spectral_contrast),
        len(spectral_flatness),
        len(chroma_stft)
    )

    features = []
    for i in range(num_frames):
        frame_features = np.concatenate((
            mfcc[:, i],
            delta_mfcc[:, i],
            delta2_mfcc[:, i],
            [pitch],
            [hnr],
            formants,
            [zcr[i]],
            [rms[i]],
            [spectral_centroid[i]],
            [spectral_bandwidth[i]],
            spectral_contrast,
            [spectral_flatness[i]],
            chroma_stft
        ))
        features.append([frame_features, output_label])

    return features


def get_speaker_info(file_path):
    speaker_dict = {}
    with open(file_path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            line_parts = line.split()
            gender = np.int8(1 if line_parts[1] == 'Male' else 0)
            age = round(float(line_parts[7]))
            age_group = [np.int8(0)] * 4
            age_index = min(max((age // 10) - 1, 0), 3)
            age_group[age_index] = np.int8(1)
            speaker_dict[line_parts[0]] = [gender] + age_group
    return speaker_dict

def write_header(writer):
    header = ['mfcc_' + str(i+1) for i in range(13)] + \
             ['delta_mfcc_' + str(i+1) for i in range(13)] + \
             ['delta2_mfcc_' + str(i+1) for i in range(13)] + \
             ['pitch', 'hnr', 'formant1', 'formant2', 'zcr', 'rms', 
              'spectral_centroid', 'spectral_bandwidth'] + \
             ['spectral_contrast_' + str(i+1) for i in range(7)] + \
             ['spectral_flatness'] + \
             ['chroma_stft_' + str(i+1) for i in range(12)] + \
             ['gender'] + ['age_group_' + str(i+1) for i in range(4)]
    writer.writerow(header)

def process_audio_files():
    source_dirs = [
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master\Tamil_master\English_Tamil',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master\Tamil_master\Tamil',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master\Malayalam_master\English_Malayalam',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master\Malayalam_master\Malayalam',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Hindi_master\Hindi_master\English_Hindi',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Hindi_master\Hindi_master\Hindi',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Telugu_master\Telugu_master\English_Telugu',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Telugu_master\Telugu_master\Telugu',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Kannada_master\Kannada_master\English_Kannada',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Kannada_master\Kannada_master\Kannada'
    ]
    speaker_info_file = r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\total_spkrinfo.list'
    speaker_dict = get_speaker_info(speaker_info_file)

    csv_file_number = 1
    record_count = 0
    max_records_per_file = 2000000

    csv_file = f'audio_features{csv_file_number}.csv'
    f = open(csv_file, 'a', newline='')
    writer = csv.writer(f)
    write_header(writer)

    for source_dir in source_dirs:
        for root, _, files in os.walk(source_dir):
            for file in files:
                if file.endswith('.wav'):
                    audio_file = os.path.join(root, file)
                    speaker_id = file[:8]
                    output_label = speaker_dict.get(speaker_id, [0] * 5)
                    audio, sr = librosa.load(audio_file, sr=None)
                    features = extract_features(audio, sr, output_label)
                    for frame_features in features:
                        writer.writerow(frame_features[0].tolist() + [frame_features[1][0]] + frame_features[1][1:])
                        record_count += 1

                        if record_count >= max_records_per_file:
                            f.close()
                            csv_file_number += 1
                            csv_file = f'audio_features{csv_file_number}.csv'
                            f = open(csv_file, 'a', newline='')
                            writer = csv.writer(f)
                            write_header(writer)
                            record_count = 0

                    print(f'Processed {file} - {record_count} records in current file')

    f.close()
    print("Feature extraction and CSV writing completed")

if __name__ == '__main__':
    process_audio_files()


ValueError: cannot reshape array of size 265 into shape (2)

In [1]:
import librosa
import csv
import os
import numpy as np
from scipy.stats import skew, kurtosis

def safe_stats(array):
    """Replace inf and nan with zero and ensure valid statistics."""
    array = np.array(array)
    array[np.isnan(array)] = 0
    array[np.isinf(array)] = 0
    return array

def extract_features(audio, sr, output_label):
    frame_size = int(0.03 * sr)
    hop_length = int(0.01 * sr)

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, hop_length=hop_length, n_fft=frame_size).astype(np.float16)
    delta_mfcc = librosa.feature.delta(mfcc).astype(np.float16)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2).astype(np.float16)

    # Extract Pitch (Fundamental Frequency)
    pitches, _ = librosa.core.piptrack(y=audio, sr=sr, hop_length=hop_length, n_fft=frame_size)
    pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0.0

    # Extract Spectral Features
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)

    # Extract Zero-Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Extract RMS Energy
    rms = librosa.feature.rms(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Statistics for MFCCs
    mfccs_stats = np.hstack([
        safe_stats(np.mean(mfcc, axis=1)),
        safe_stats(np.std(mfcc, axis=1)),
        safe_stats(skew(mfcc, axis=1)),
        safe_stats(kurtosis(mfcc, axis=1))
    ])

    # Ensure consistent lengths
    num_frames = min(
        mfcc.shape[1],
        delta_mfcc.shape[1],
        delta2_mfcc.shape[1],
        len(zcr),
        len(rms),
        len(spectral_centroid),
        len(spectral_bandwidth)
    )

    features = []
    for i in range(num_frames):
        frame_features = np.concatenate((
            mfcc[:, i],
            delta_mfcc[:, i],
            delta2_mfcc[:, i],
            [pitch],
            [zcr[i]],
            [rms[i]],
            [spectral_centroid[i]],
            [spectral_bandwidth[i]],
            mfccs_stats
        ))
        features.append([frame_features, output_label])

    return features

def get_speaker_info(file_path):
    speaker_dict = {}
    with open(file_path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            line_parts = line.split()
            gender = np.int8(1 if line_parts[1] == 'Male' else 0)
            age = round(float(line_parts[7]))
            age_group = [np.int8(0)] * 4
            age_index = min(max((age // 10) - 1, 0), 3)
            age_group[age_index] = np.int8(1)
            speaker_dict[line_parts[0]] = [gender] + age_group
    return speaker_dict

def write_header(writer):
    header = ['mfcc_' + str(i+1) for i in range(13)] + \
             ['delta_mfcc_' + str(i+1) for i in range(13)] + \
             ['delta2_mfcc_' + str(i+1) for i in range(13)] + \
             ['pitch', 'zcr', 'rms', 'spectral_centroid', 'spectral_bandwidth'] + \
             ['mfcc_mean_' + str(i+1) for i in range(13)] + \
             ['mfcc_std_' + str(i+1) for i in range(13)] + \
             ['mfcc_skew_' + str(i+1) for i in range(13)] + \
             ['mfcc_kurt_' + str(i+1) for i in range(13)] + \
             ['gender'] + ['age_group_' + str(i+1) for i in range(4)]
    writer.writerow(header)

def process_audio_files():
    source_dirs = [
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master\Tamil_master\English_Tamil',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master\Tamil_master\Tamil - Copy',
    ]
    speaker_info_file = r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\total_spkrinfo.list'
    speaker_dict = get_speaker_info(speaker_info_file)

    csv_file_number = 1
    record_count = 0
    max_records_per_file = 2000000

    csv_file = f'audio_features{csv_file_number}.csv'
    f = open(csv_file, 'a', newline='')
    writer = csv.writer(f)
    write_header(writer)

    for source_dir in source_dirs:
        for root, _, files in os.walk(source_dir):
            for file in files:
                if file.endswith('.wav'):
                    audio_file = os.path.join(root, file)
                    speaker_id = file[:8]
                    output_label = speaker_dict.get(speaker_id, [0] * 5)
                    audio, sr = librosa.load(audio_file, sr=None)
                    features = extract_features(audio, sr, output_label)
                    for frame_features in features:
                        writer.writerow(frame_features[0].tolist() + [frame_features[1][0]] + frame_features[1][1:])
                        record_count += 1

                        if record_count >= max_records_per_file:
                            f.close()
                            csv_file_number += 1
                            csv_file = f'audio_features{csv_file_number}.csv'
                            f = open(csv_file, 'a', newline='')
                            writer = csv.writer(f)
                            write_header(writer)
                            record_count = 0

                    print(f'Processed {file} - {record_count} records in current file')

    f.close()
    print("Feature extraction and CSV writing completed")

if __name__ == '__main__':
    process_audio_files()


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)
  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)
  s *= a_zero_mean
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  s = s**2


Processed Tam_0001_Eng_m_0000.wav - 666 records in current file
Processed Tam_0001_Eng_m_0001.wav - 1323 records in current file
Processed Tam_0001_Eng_m_0002.wav - 2330 records in current file
Processed Tam_0001_Eng_m_0003.wav - 3759 records in current file
Processed Tam_0001_Eng_m_0004.wav - 4538 records in current file
Processed Tam_0001_Eng_m_0005.wav - 5317 records in current file
Processed Tam_0001_Eng_m_8081.wav - 7264 records in current file
Processed Tam_0001_Eng_m_8082.wav - 9818 records in current file
Processed Tam_0001_Eng_m_8083.wav - 10709 records in current file
Processed Tam_0001_Eng_m_8084.wav - 12300 records in current file
Processed Tam_0001_Eng_m_8085.wav - 13688 records in current file
Processed Tam_0001_Eng_m_8086.wav - 15829 records in current file


  s = a_zero_mean**2


Processed Tam_0001_Eng_m_8087.wav - 16289 records in current file
Processed Tam_0001_Eng_m_8088.wav - 18305 records in current file
Processed Tam_0001_Eng_m_8089.wav - 20349 records in current file
Processed Tam_0001_Eng_m_8090.wav - 23050 records in current file
Processed Tam_0001_Eng_m_8091.wav - 24860 records in current file
Processed Tam_0001_Eng_m_8092.wav - 26689 records in current file
Processed Tam_0001_Eng_m_8093.wav - 29527 records in current file
Processed Tam_0001_Eng_m_8094.wav - 31527 records in current file
Processed Tam_0001_Eng_m_8095.wav - 33584 records in current file
Processed Tam_0001_Eng_m_8096.wav - 35431 records in current file
Processed Tam_0001_Eng_m_8097.wav - 36210 records in current file
Processed Tam_0001_Eng_m_8098.wav - 37954 records in current file
Processed Tam_0001_Eng_m_8099.wav - 39229 records in current file
Processed Tam_0001_Eng_m_8100.wav - 42020 records in current file
Processed Tam_0001_Eng_m_8101.wav - 43233 records in current file
Processed 

  rel_diff = np.max(np.abs(a_zero_mean), axis=axis,


Processed Tam_0006_Eng_m_0002.wav - 194329 records in current file
Processed Tam_0006_Eng_m_0003.wav - 195483 records in current file
Processed Tam_0006_Eng_m_0004.wav - 196159 records in current file
Processed Tam_0006_Eng_m_0005.wav - 196800 records in current file
Processed Tam_0006_Eng_m_8256.wav - 198116 records in current file
Processed Tam_0006_Eng_m_8257.wav - 199632 records in current file
Processed Tam_0006_Eng_m_8258.wav - 201089 records in current file
Processed Tam_0006_Eng_m_8259.wav - 202361 records in current file
Processed Tam_0006_Eng_m_8260.wav - 204633 records in current file
Processed Tam_0006_Eng_m_8261.wav - 205549 records in current file
Processed Tam_0006_Eng_m_8262.wav - 206681 records in current file
Processed Tam_0006_Eng_m_8263.wav - 208778 records in current file
Processed Tam_0006_Eng_m_8264.wav - 210010 records in current file
Processed Tam_0006_Eng_m_8265.wav - 210807 records in current file
Processed Tam_0006_Eng_m_8266.wav - 212604 records in current 

In [3]:
import pandas as pd

def remove_zero_columns_from_files(file_paths, columns_to_remove):
    for file_path in file_paths:
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check if columns to remove exist in the DataFrame
        existing_columns_to_remove = [col for col in columns_to_remove if col in df.columns]
        
        # Drop the columns that are all zeros
        df = df.drop(columns=existing_columns_to_remove)
        
        # Save the modified DataFrame back to CSV
        df.to_csv(file_path, index=False)
        print(f"Updated file saved: {file_path}")

# List of file paths to your CSV files
file_paths = [
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features1.csv'
]

# List of columns that are all zeros across all files
zero_columns_across_all_files = [
    'mfcc_std_1', 'mfcc_skew_1', 'mfcc_kurt_1',
    'mfcc_kurt_2', 'mfcc_kurt_3', 'mfcc_kurt_4', 
    'mfcc_kurt_5', 'mfcc_kurt_6', 'mfcc_kurt_7', 
    'mfcc_kurt_9', 'mfcc_kurt_13','mfcc_skew_3'
]

# Remove the zero columns from all files and save them
remove_zero_columns_from_files(file_paths, zero_columns_across_all_files)


Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features1.csv


In [1]:
import librosa
import csv
import os
import numpy as np
from scipy.stats import skew, kurtosis

def extract_features(audio, sr, output_label):
    frame_size = int(0.03 * sr)
    hop_length = int(0.01 * sr)

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, hop_length=hop_length, n_fft=frame_size).astype(np.float16)
    delta_mfcc = librosa.feature.delta(mfcc).astype(np.float16)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2).astype(np.float16)

    # Extract Pitch (Fundamental Frequency)
    pitches, _ = librosa.core.piptrack(y=audio, sr=sr, hop_length=hop_length, n_fft=frame_size)
    pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0.0

    # Extract Spectral Features
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, hop_length=hop_length).flatten().astype(np.float16)

    # Extract Zero-Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Extract RMS Energy
    rms = librosa.feature.rms(y=audio, hop_length=hop_length, frame_length=frame_size).flatten().astype(np.float16)

    # Statistics for MFCCs
    mfccs_stats = np.hstack([np.mean(mfcc, axis=1), np.std(mfcc, axis=1),
                              skew(mfcc, axis=1), kurtosis(mfcc, axis=1)])

    # Ensure consistent lengths
    num_frames = min(
        mfcc.shape[1],
        delta_mfcc.shape[1],
        delta2_mfcc.shape[1],
        len(zcr),
        len(rms),
        len(spectral_centroid),
        len(spectral_bandwidth)
    )

    features = []
    for i in range(num_frames):
        frame_features = np.concatenate((
            mfcc[:, i],
            delta_mfcc[:, i],
            delta2_mfcc[:, i],
            [pitch],
            [zcr[i]],
            [rms[i]],
            [spectral_centroid[i]],
            [spectral_bandwidth[i]],
            mfccs_stats
        ))
        features.append([frame_features, output_label])

    return features

def get_speaker_info(file_path):
    speaker_dict = {}
    with open(file_path, 'r') as f:
        next(f)  # Skip header
        for line in f:
            line_parts = line.split()
            gender = np.int8(1 if line_parts[1] == 'Male' else 0)
            age = round(float(line_parts[7]))
            age_group = [np.int8(0)] * 4
            age_index = min(max((age // 10) - 1, 0), 3)
            age_group[age_index] = np.int8(1)
            speaker_dict[line_parts[0]] = [gender] + age_group
    return speaker_dict

def write_header(writer):
    header = ['mfcc_' + str(i+1) for i in range(13)] + \
             ['delta_mfcc_' + str(i+1) for i in range(13)] + \
             ['delta2_mfcc_' + str(i+1) for i in range(13)] + \
             ['pitch', 'zcr', 'rms', 'spectral_centroid', 'spectral_bandwidth'] + \
             ['mfcc_mean_' + str(i+1) for i in range(13)] + \
             ['mfcc_std_' + str(i+1) for i in range(13)] + \
             ['mfcc_skew_' + str(i+1) for i in range(13)] + \
             ['mfcc_kurt_' + str(i+1) for i in range(13)] + \
             ['gender'] + ['age_group_' + str(i+1) for i in range(4)]
    writer.writerow(header)

def process_audio_files():
    source_dirs = [
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master - Copy\Tamil_master\English_Tamil',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Tamil_master - Copy\Tamil_master\Tamil',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master - Copy\Malayalam_master\English_Malayalam',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Malayalam_master - Copy\Malayalam_master\Malayalam',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Hindi_master - Copy\Hindi_master\English_Hindi',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Hindi_master - Copy\Hindi_master\Hindi',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Telugu_master - Copy\Telugu_master\English_Telugu',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Telugu_master - Copy\Telugu_master\Telugu',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Kannada_master - Copy\Kannada_master\English_Kannada',
        r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Kannada_master - Copy\Kannada_master\Kannada'
    ]
    speaker_info_file = r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\total_spkrinfo.list'
    speaker_dict = get_speaker_info(speaker_info_file)

    csv_file_number = 1
    record_count = 0
    max_records_per_file = 2000000

    csv_file = f'audio_features{csv_file_number}.csv'
    f = open(csv_file, 'a', newline='')
    writer = csv.writer(f)
    write_header(writer)

    for source_dir in source_dirs:
        for root, _, files in os.walk(source_dir):
            for file in files:
                if file.endswith('.wav'):
                    audio_file = os.path.join(root, file)
                    speaker_id = file[:8]
                    output_label = speaker_dict.get(speaker_id, [0] * 5)
                    audio, sr = librosa.load(audio_file, sr=None)
                    features = extract_features(audio, sr, output_label)
                    for frame_features in features:
                        writer.writerow(frame_features[0].tolist() + [frame_features[1][0]] + frame_features[1][1:])
                        record_count += 1

                        if record_count >= max_records_per_file:
                            f.close()
                            csv_file_number += 1
                            csv_file = f'audio_features{csv_file_number}.csv'
                            f = open(csv_file, 'a', newline='')
                            writer = csv.writer(f)
                            write_header(writer)
                            record_count = 0

                    print(f'Processed {file} - {record_count} records in current file')

    f.close()
    print("Feature extraction and CSV writing completed")

if __name__ == '__main__':
    process_audio_files()


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)
  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)
  s *= a_zero_mean
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  s = s**2


Processed Tam_0003_Eng_m_0000.wav - 572 records in current file
Processed Tam_0003_Eng_m_0001.wav - 1107 records in current file
Processed Tam_0003_Eng_m_0002.wav - 1889 records in current file
Processed Tam_0003_Eng_m_0003.wav - 3171 records in current file
Processed Tam_0003_Eng_m_0004.wav - 3778 records in current file


  s = a_zero_mean**2


Processed Tam_0003_Eng_m_0005.wav - 4307 records in current file
Processed Tam_0003_Eng_m_8150.wav - 5486 records in current file
Processed Tam_0003_Eng_m_8151.wav - 6021 records in current file
Processed Tam_0003_Eng_m_8152.wav - 8084 records in current file
Processed Tam_0003_Eng_m_8153.wav - 10238 records in current file
Processed Tam_0003_Eng_m_8154.wav - 11504 records in current file
Processed Tam_0003_Eng_m_8155.wav - 12555 records in current file
Processed Tam_0003_Eng_m_8156.wav - 14259 records in current file
Processed Tam_0003_Eng_m_8157.wav - 15641 records in current file
Processed Tam_0003_Eng_m_8158.wav - 18013 records in current file
Processed Tam_0003_Eng_m_8159.wav - 19217 records in current file
Processed Tam_0003_Eng_m_8160.wav - 20833 records in current file
Processed Tam_0003_Eng_m_8161.wav - 23418 records in current file
Processed Tam_0003_Eng_m_8162.wav - 24594 records in current file
Processed Tam_0003_Eng_m_8163.wav - 26535 records in current file
Processed Tam_

  rel_diff = np.max(np.abs(a_zero_mean), axis=axis,


Processed Tam_0011_Eng_f_8455.wav - 170249 records in current file
Processed Tam_0011_Eng_f_8456.wav - 171981 records in current file
Processed Tam_0011_Eng_f_8457.wav - 172835 records in current file
Processed Tam_0011_Eng_f_8458.wav - 174423 records in current file
Processed Tam_0011_Eng_f_8459.wav - 175555 records in current file
Processed Tam_0011_Eng_f_8460.wav - 176259 records in current file
Processed Tam_0011_Eng_f_8461.wav - 177535 records in current file
Processed Tam_0011_Eng_f_8462.wav - 178823 records in current file
Processed Tam_0011_Eng_f_8463.wav - 179383 records in current file
Processed Tam_0011_Eng_f_8464.wav - 180359 records in current file
Processed Tam_0011_Eng_f_8465.wav - 181244 records in current file
Processed Tam_0015_Eng_m_0000.wav - 181798 records in current file
Processed Tam_0015_Eng_m_0001.wav - 182342 records in current file
Processed Tam_0015_Eng_m_0002.wav - 183052 records in current file
Processed Tam_0015_Eng_m_0003.wav - 184131 records in current 

In [2]:
import pandas as pd

def remove_zero_columns_from_files(file_paths, columns_to_remove):
    for file_path in file_paths:
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check if columns to remove exist in the DataFrame
        existing_columns_to_remove = [col for col in columns_to_remove if col in df.columns]
        
        # Drop the columns that are all zeros
        df = df.drop(columns=existing_columns_to_remove)
        
        # Save the modified DataFrame back to CSV
        df.to_csv(file_path, index=False)
        print(f"Updated file saved: {file_path}")

# List of file paths to your CSV files
file_paths = [
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features1.csv',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features2.csv',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features3.csv',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features4.csv',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features5.csv',
    r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features6.csv'
]

# List of columns that are all zeros across all files
zero_columns_across_all_files = [
    'mfcc_std_1', 'mfcc_skew_1', 'mfcc_kurt_1',
    'mfcc_kurt_2', 'mfcc_kurt_3', 'mfcc_kurt_4', 
    'mfcc_kurt_5', 'mfcc_kurt_6', 'mfcc_kurt_7', 
    'mfcc_kurt_9', 'mfcc_kurt_13','mfcc_skew_3'
]

# Remove the zero columns from all files and save them
remove_zero_columns_from_files(file_paths, zero_columns_across_all_files)


Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features1.csv
Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features2.csv
Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features3.csv
Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features4.csv
Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features5.csv
Updated file saved: C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\audio_features6.csv


In [2]:
import librosa
import numpy as np
import soundfile as sf

def remove_silence_and_save(input_file, output_file, top_db=20):
    # Load the audio file
    audio, sr = librosa.load(input_file, sr=None)
    
    # Remove silence from the audio
    non_silent_intervals = librosa.effects.split(audio, top_db=top_db)
    non_silent_audio = np.concatenate([audio[start:end] for start, end in non_silent_intervals])
    
    # Save the non-silent audio to a new file
    sf.write(output_file, non_silent_audio, sr)
    print(f"Processed audio saved to {output_file}")

# Usage example:
input_audio = r'C:\Users\deeps\VSCodeProjects\Jio_internship_project_2\Hindi_master\Hindi_master\English_Hindi\RECS\0002\Hin_0002_Eng_f_0000.wav'
output_audio = 'your_audio_file_no_silence.wav'
remove_silence_and_save(input_audio, output_audio)


Processed audio saved to your_audio_file_no_silence.wav
