In [1]:
%load_ext autoreload
%autoreload 2

import os
import librosa
from crossai.processing import Audio

# Load Instance

Use a package that can load the .wav files as an NumPy array (such as *librosa*)

In [2]:
# audio instance path (tutorials/data_test folder)
data_path = os.path.join(os.getcwd(), "data_test", "audio.wav")

In [3]:
# info about the instance
audio, sr = librosa.load(data_path)
print("Type of audio", type(audio))
print("Shape of audio:", audio.shape)
print("Sampling rate:", sr)

Type of audio <class 'numpy.ndarray'>
Shape of audio: (661794,)
Sampling rate: 22050


# Create a CrossAI Audio object

Load a list of instances and their corresponding labels (in our case we have only 1)

In [4]:
data = [{'X': audio, 'Y': "hiphop"}]

In [5]:
crossai_audio = Audio(data)
print("Type of CrossAI instance:", type(crossai_audio))

# access the instance
print("Type of CrossAI audio data:", type(crossai_audio.X[0]))
print("Label of instance:", crossai_audio.Y[0])
print("Shape of audio:", crossai_audio.X[0].shape)
print("Audio data: \n", crossai_audio.X[0])

Type of CrossAI instance: <class 'crossai.processing.audio._audio.Audio'>
Type of CrossAI audio data: <class 'numpy.ndarray'>
Label of instance: hiphop
Shape of audio: (661794,)
Audio data: 
 [0.012847900390625 0.020721435546875 0.0194091796875 ... -0.02362060546875
 -0.0714111328125 -0.090911865234375]


# Transform the data

## Filter by using the Butterworth filtering

In [6]:
crossai_audio.butterworth_filter(order=12, cutoff_freq=[500], type="highpass", Fs=22050)

# Access the transformed data

In [7]:
crossai_audio.data

[{'X': array([[[-2.1501966e-03,  5.4476494e-03,  3.8301658e-03, ...,
            2.9492613e-02, -4.7704084e-03,  1.1275703e-16]]], dtype=float32),
  'Y': 'hiphop'}]

Check the X data (which is actually the train data)

In [8]:
crossai_audio.X.shape

(1, 1, 661794)

Check the X instance that was transformed

In [9]:
print("Transformed audio instance shape", crossai_audio.X[0].shape)
print("Transformed audio instance data: \n", crossai_audio.X[0])

Transformed audio instance shape (1, 661794)
Transformed audio instance data: 
 [[-0.0021501965820789337 0.005447649396955967 0.003830165835097432 ...
  0.029492612928152084 -0.004770408384501934 1.1275702593849246e-16]]
