<a href="https://colab.research.google.com/github/ChingizIbnVaxob/NLP/blob/main/audio_feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
import warnings
warnings.filterwarnings('ignore')

In [63]:
import librosa
import pandas as pd
import os
from scipy.stats import kurtosis, mode

In [64]:
def extract_mean_features(y, sr=22050):
  return librosa.feature.spectral_centroid(y=y, sr=sr).mean()

In [65]:
audio_file = "/content/clips/download (3).mp4"

In [66]:
data, r = librosa.load(audio_file)

In [67]:
data

array([-0.00145933, -0.00230369, -0.00272832, ..., -0.00234083,
       -0.00199465, -0.00194032], dtype=float32)

In [68]:
meanfreq = extract_mean_features(data)

In [69]:
meanfreq

1520.9061750735104

In [70]:
def extract_standart_deviation(y, sr=22050):
  return librosa.feature.spectral_centroid(y=y, sr=sr).std()

In [71]:
def exctact_median(y, sr=22050):
  return librosa.feature.spectral_centroid(y=y, sr=sr)[0]

In [72]:
def extract_kurtosis(y):
  return kurtosis(y)

In [73]:
def extract_spectral_flatness(y):
  return librosa.feature.spectral_flatness(y=y).mean()

def extract_mode(y):
  return mode(y)[0]

def extract_centroid(y, sr):
  return librosa.feature.spectral_centroid(y=y, sr=sr).mean()

def extract_peak(y, sr):
  return librosa.feature.spectral_centroid(y=y, sr=sr).max()

def extract_mean_function(y, sr):
  return librosa.feature.mfcc(y=y, sr=sr)[0].mean()

def extract_min_function(y, sr):
  return librosa.feature.mfcc(y=y, sr=sr).min()

def extract_max_function(y, sr):
  return librosa.feature.mfcc(y=y, sr=sr).max()

def extract_mean_dom(y, sr):
  return librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()

def extract_minimum_dom(y, sr):
  return librosa.feature.spectral_bandwidth(y=y, sr=sr).min()

def extract_maximum_dom(y, sr):
  return librosa.feature.spectral_bandwidth(y=y, sr=sr).max()

def extract_dfrange(maxdom, mindom):
  return maxdom - mindom

def extract_modindx(y, sr):
  return librosa.feature.tempogram(y=y, sr=sr).mean()

In [74]:
def extract_features(audio_file):
  audio, sr = librosa.load(audio_file)

  meanfreq = extract_mean_features(audio, sr)
  std = extract_standart_deviation(audio, sr)
  median = exctact_median(audio, sr)
  kurtosis = extract_kurtosis(audio)
  spectral_flatness = extract_spectral_flatness(audio)
  mode_ = extract_mode(audio)
  centroid = extract_centroid(audio, sr)
  peakfreq = extract_peak(audio, sr)
  meanfun = extract_mean_function(audio, sr)
  minfun = extract_min_function(audio, sr)
  maxfun = extract_max_function(audio, sr)
  meandom = extract_mean_dom(audio, sr)
  mindom = extract_minimum_dom(audio, sr)
  maxdom = extract_maximum_dom(audio, sr)
  dfrange = extract_dfrange(maxdom, mindom)
  modindx = extract_modindx(audio, sr)


  features = {
      'meanfreq': meanfreq,
      'std': std,
      'median': median,
      'kurtosis': kurtosis,
      'spectral_flatness': spectral_flatness,
      'mode': mode_,
      'centroid': centroid,
      'peakfreq': peakfreq,
      'meanfun': meanfun,
      'minfun': minfun,
      'maxfun': maxfun,
      'meandom': meandom,
      'mindom': mindom,
      'maxdom': maxdom,
      'dfrange': dfrange,
      'modindx': modindx
  }

  return features


In [75]:
def process_audio_directory(input_dir, output_dir):
  features_list = []

  for filename in os.listdir(input_dir):
    if filename.endswith(".mp4"):
      audio_file = os.path.join(input_dir, filename)
      features = extract_features(audio_file)
      features_list.append(features)

  df = pd.DataFrame(features_list)

  df.to_csv(output_dir, index=False)

input_directory = 'clips/'
output_directory = 'features.csv'

process_audio_directory(input_directory, output_directory)