<a href="https://colab.research.google.com/github/Datascientisit/ML_regression/blob/main/extract_feature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa
import pandas as pd
import numpy as np
import os
from scipy.stats import kurtosis, mode
import IPython.display as ipd

In [None]:
voice = pd.read_csv("/content/drive/MyDrive/voice.csv")

In [None]:
y, sr = librosa.load("/content/audios/Thompson-Advanced-WB-11.2.mp3")

In [None]:
y

In [None]:
pd.Series(y).plot(
    figsize=(10, 5),
    title="Voice Signal"
)

In [None]:
audio = ipd.Audio("/content/audios/Thompson-Advanced-WB-11.2.mp3")
audio

In [None]:
def extract_mean_frequency(y, sr=22050):
    return librosa.feature.spectral_centroid(y=y, sr=sr).mean()

def extract_standard_deviation(y, sr=22050):
    return librosa.feature.spectral_centroid(y=y, sr=sr).std()

def extract_median(y, sr=22050):
    return np.median(librosa.feature.spectral_centroid(y=y, sr=sr))

def extract_kurtosis(y, sr=22050):
    return kurtosis(y)

def extract_mode(y, sr=22050):
    return mode(librosa.feature.spectral_centroid(y=y, sr=sr))

In [None]:
mean_freq=extract_mean_frequency(y)
std_freq=extract_standard_deviation(y)
median_freq=extract_median(y)
kurtosis_freq=extract_kurtosis(y)
mode_freq=extract_mode(y)

In [None]:
for x in [mean_freq, std_freq, median_freq, kurtosis_freq, mode_freq]:
  print(x)

In [None]:
def extract_extral_flatness(y):
    return librosa.feature.spectral_flatness(y=y)[0]

def extract_mode(y):
    return mode(y)[0]

def extract_centroid(y):
    return librosa.feature.spectral_centroid(y=y, sr=sr)[0]

def extract_peak(y):
    return librosa.feature.spectral_rolloff(y=y, sr=sr)[0]

def extract_mean_function(y):
    return librosa.feature.mfcc(y=y, sr=sr).mean()

def extract_standard_deviation(y):
    return librosa.feature.mfcc(y=y, sr=sr).std()

def extract_min_function(y):
    return librosa.feature.mfcc(y=y, sr=sr).min()

def extract_max_function(y):
    return librosa.feature.mfcc(y=y, sr=sr).max()

def extract_mean_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].mean()

def extract_std_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].std()

def extract_min_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].min()

def extract_max_dom(y, sr):
    return librosa.feature.spectral_bandwidth(y=y, sr=sr)[0].max()

def dfrange(maxdom, mindom):
    return maxdom - mindom

def extract_modindx(y, sr):
    return librosa.feature.tempogram(y=y, sr=sr).mean()

In [None]:
def extract_features(audio_file, sr=sr):
    y, sr = librosa.load(audio_file)

    mean_freq=extract_mean_frequency(y)
    std_freq=extract_standard_deviation(y)
    median_freq=extract_median(y)
    kurtosis_freq=extract_kurtosis(y)
    mode_freq=extract_mode(y)
    sfm = extract_extral_flatness(y)
    centroid = extract_centroid(y)
    peak = extract_peak(y)
    meanfun = extract_mean_function(y)
    minfun = extract_min_function(y)
    maxfun = extract_max_function(y)
    meandom = extract_mean_dom(y, sr)
    mindom = extract_min_dom(y, sr)
    maxdom = extract_max_dom(y, sr)
    modindx = extract_modindx(y, sr)

    featurs = {
        'meanfreq': mean_freq,
        'sd': std_freq,
        'median': median_freq,
        'mode': mode_freq,
        'centroid': centroid,
        'meanfun': meanfun,
        'minfun': minfun,
        'maxfun': maxfun,
        'meandom': meandom,
        'mindom': mindom,
        'maxdom': maxdom,
        'modindx': modindx
    }

    return featurs

In [None]:
def process_audio_directory(directory_path):
    features_list = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".mp3"):
            audio_file = os.path.join(directory_path, filename)
            features = extract_features(audio_file)
            features_list.append(features)
    df = pd.DataFrame(features_list)

    df.to_csv('features.csv', index=False)

input_diracotry = 'audios/'
process_audio_directory(input_diracotry)

In [None]:
x =pd.read_csv("/content/features.csv")

In [None]:
x.to_excel("features.xlsx", index=False)