In [4]:
import librosa
import numpy as np
import pandas as pd
import os

# Function to extract features from an audio segment
def extract_features(y, sr):

    features = {}
    features.update({f"mfcc_{i}": val for i, val in enumerate(np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20), axis=1))})
    features["chroma_mean"] = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))  # Single chroma feature (mean of all bins)
    features["spectral_centroid"] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    features["spectral_bandwidth"] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    features["spectral_rolloff"] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    features["zero_crossing_rate"] = np.mean(librosa.feature.zero_crossing_rate(y=y))
    features["rms_energy"] = np.mean(librosa.feature.rms(y=y))
    return features

# Function to process an audio file in 1-second intervals
def process_audio_in_intervals(file_path, type, interval_duration=10):
    y, sr = librosa.load(file_path, sr=None)  # Load audio file
    interval_samples = sr * interval_duration  # Number of samples per interval

    features_list = []
    total_samples = len(y)

    # Iterate over intervals

    for start_sample in range(0, total_samples, interval_samples):
        end_sample = min(start_sample + interval_samples, total_samples)
        y_segment = y[start_sample:end_sample]  # Extract segment

        # Extract features for this segment
        features = extract_features(y_segment, sr)
        features["type"] = type  # Add type (real or fake)
        features["interval_start"] = start_sample / sr  # Start time of the interval (in seconds)
        features["interval_end"] = end_sample / sr  # End time of the interval (in seconds)
        features_list.append(features)

    return features_list

# Function to process all files in a directory
def process_directory(directory_path, type, interval_duration=10):
    all_features = []
    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)
        if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac')):  # Check for audio files
            print(f"Processing file: {file_name}")
            file_features = process_audio_in_intervals(file_path, type,interval_duration)
            for feature_set in file_features:
                feature_set["file_name"] = file_name  # Add file name for reference
            all_features.extend(file_features)

    return pd.DataFrame(all_features)
'''
functions used to extract intervals with overlap
def process_audio_in_intervals(file_path, type, interval_duration=3, overlap=0.5):
    y, sr = librosa.load(file_path, sr=None)  # Load audio file
    interval_samples = int(sr * interval_duration)  # Number of samples per interval
    overlap_samples = int(sr * overlap)  # Number of samples for overlap
    step_samples = interval_samples - overlap_samples  # Step size between intervals

    features_list = []
    total_samples = len(y)

    # Iterate over intervals with overlap
    for start_sample in range(0, total_samples, step_samples):
        end_sample = start_sample + interval_samples
        if end_sample > total_samples:
            break  # Stop if the interval exceeds the audio length

        y_segment = y[start_sample:end_sample]  # Extract segment

        # Extract features for this segment
        features = extract_features(y_segment, sr)
        features["type"] = type  # Add type (real or fake)
        features["interval_start"] = start_sample / sr  # Start time of the interval (in seconds)
        features["interval_end"] = end_sample / sr  # End time of the interval (in seconds)
        features_list.append(features)

    return features_list

# Function to process all files in a directory
def process_directory(directory_path, type, interval_duration=3, overlap=0.5):
    all_features = []
    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)
        if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac')):  # Check for audio files
            print(f"Processing file: {file_name}")
            file_features = process_audio_in_intervals(file_path, type, interval_duration, overlap)
            for feature_set in file_features:
                feature_set["file_name"] = file_name  # Add file name for reference
            all_features.extend(file_features)

    return pd.DataFrame(all_features)
    '''

# Example usage
directory_path = "/content/drive/MyDrive/KAGGLE/AUDIO/FAKE"  # Replace with the path to your directory
df = process_directory(directory_path, "FAKE")
df.to_csv("FAKE_audio_features_directory_10secNoOverlap.csv", index=False)
# Example usage
directory_path = "/content/drive/MyDrive/KAGGLE/AUDIO/REAL"  # Replace with the path to your directory
df2 = process_directory(directory_path, "REAL")
#combined_df = pd.concat([df, df2], axis=1)

# Save to CSV
#combined_df.to_csv("combined_columns.csv", index=False)
# Save to a CSV file (optional)
df2.to_csv("REAL_audio_features_directory_10secNoOverlap.csv", index=False)

# Print the DataFrame
print(df2)

Processing file: Obama-to-Biden.wav
Processing file: Obama-to-Trump.wav
Processing file: biden-to-Obama.wav
Processing file: biden-to-Trump.wav
Processing file: biden-to-linus.wav
Processing file: biden-to-margot.wav
Processing file: biden-to-musk.wav
Processing file: biden-to-ryan.wav
Processing file: biden-to-taylor.wav
Processing file: linus-to-biden.wav
Processing file: linus-to-margot.wav
Processing file: linus-to-musk.wav
Processing file: linus-to-obama.wav
Processing file: linus-to-ryan.wav
Processing file: linus-to-taylor.wav
Processing file: margot-to-biden.wav
Processing file: linus-to-trump.wav
Processing file: margot-to-linus.wav
Processing file: margot-to-musk.wav
Processing file: margot-to-obama.wav
Processing file: margot-to-ryan.wav
Processing file: margot-to-taylor.wav
Processing file: margot-to-trump.wav
Processing file: musk-to-biden.wav
Processing file: musk-to-linus.wav
Processing file: musk-to-margot.wav
Processing file: musk-to-obama.wav
Processing file: musk-to-



Processing file: margot-original.wav
Processing file: linus-original.wav
Processing file: musk-original.wav
Processing file: ryan-original.wav
Processing file: obama-original.wav
Processing file: taylor-original.wav
Processing file: trump-original.wav
         mfcc_0      mfcc_1      mfcc_2     mfcc_3     mfcc_4     mfcc_5  \
0   -292.628998  183.571457  -66.835434 -16.946964 -27.256487  -5.341168   
1   -282.826691  185.599487  -59.464340  -8.625219 -27.985434   1.459049   
2   -323.364929  184.040573  -44.738613 -10.211464 -23.815020  -1.249293   
3   -316.016479  177.245499  -47.671532  -3.562074 -21.484310  -1.374686   
4   -352.062042  177.053192  -32.603863 -13.656229  -9.804168   6.783495   
..          ...         ...         ...        ...        ...        ...   
376 -321.708313  149.902298  -62.989723  17.462248 -20.380974   8.208504   
377 -327.140137  172.507156  -63.258026   1.172269 -22.579338 -10.327603   
378 -382.578217  148.488235  -31.333691  18.114008 -13.033998  1