In [1]:
import os
from caits.loading import wav_specs_check, wav_loader
from caits.dataset import CoreArray, DatasetList

In [2]:
folder = "data/gtzan_small"

X = []
y = []
_id = []
fs = 0
sample_width = 0
signal_length = 0

for d in os.listdir(folder):
    for f in os.listdir(folder + "/" + d):
        specs = wav_specs_check(os.path.join(folder + "/" + d, f))
        fs = specs["framerate"]
        sample_width = specs["sampwidth"]
        signal_length = specs["nframes"]

        wav = wav_loader(os.path.join(folder + "/" + d, f))[0]
        X.append(
            CoreArray(
                values=wav.values,
                axis_names={"axis_1": wav.columns}
            )
        )

        y.append(d)
        _id.append(f)


data = DatasetList(X=X, y=y, id=_id)
data


DatasetList object with 15 instances.

In [3]:
print(data.X)

[                     ch_1  
     0  0.012420654296875  
     1   0.07501220703125  
     2  0.058197021484375  
     3   0.01495361328125  
     4      0.02587890625  
   ...                ...  
661789   0.08367919921875  
661790  0.114654541015625  
661791    0.1351318359375  
661792  0.177032470703125  
661793  0.182647705078125  

CoreArray with shape (661794, 1)
,                      ch_1  
     0  0.362396240234375  
     1   0.64947509765625  
     2  0.631744384765625  
     3  0.606781005859375  
     4  0.518157958984375  
   ...                ...  
661789  0.013336181640625  
661790  0.031402587890625  
661791  0.043365478515625  
661792   0.05572509765625  
661793    0.0570068359375  

CoreArray with shape (661794, 1)
,                       ch_1  
     0     -0.033447265625  
     1  -0.054901123046875  
     2  -0.054351806640625  
     3  -0.066619873046875  
     4    -0.0745849609375  
   ...                 ...  
661789      -0.19580078125  
661790   -0.14947509765

In [60]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from caits.transformers import FunctionTransformer, FeatureExtractorSpectrum, DatasetToArray, SklearnWrapper
from caits.fe import melspectrogram
from caits.filtering import filter_butterworth

mel_specs_tr = FeatureExtractorSpectrum(
    func=melspectrogram,
    sr=fs,
    power=1.0
)

data_conv = DatasetToArray(flatten=True)

pipeline = Pipeline(
    steps = [
        ("filter", FunctionTransformer(
            func=filter_butterworth,
            fs=fs,
            filter_type="bandpass",
            cutoff_freq=(100, 4000)
        )),
        ("mel", mel_specs_tr),
        ("convert", data_conv),
        ("scaler", SklearnWrapper(StandardScaler)),
    ]
)

transformed_data = pipeline.fit_transform(data)
transformed_data


DatasetArray object with 15 instances.

In [61]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42)
res = kmeans.fit_predict(transformed_data.X.values)
res



array([2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1], dtype=int32)

In [63]:
len(res)

15

In [64]:
import pandas as pd

total_res = [
    {
        "file": data._id[i],
        "genre": data.y[i],
        "pred": res[i]
    }
    for i in range(len(res))
]

print(pd.DataFrame(total_res))


               file  genre  pred
0    rock.00004.wav   rock     2
1    rock.00001.wav   rock     2
2    rock.00000.wav   rock     2
3    rock.00002.wav   rock     2
4    rock.00003.wav   rock     2
5   blues.00004.wav  blues     0
6   blues.00000.wav  blues     0
7   blues.00001.wav  blues     0
8   blues.00002.wav  blues     2
9   blues.00003.wav  blues     0
10  metal.00002.wav  metal     1
11  metal.00004.wav  metal     1
12  metal.00000.wav  metal     1
13  metal.00001.wav  metal     1
14  metal.00003.wav  metal     1
