In [1]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 

import kilroy_was_here                     # Mandatory. Allow access to shared python code from repository root
from audace.jupytools import iprint           # timstamped (to the ms) print with CPU and RAM consumption information
from audace.audiodataset import AudioDataset  # Class for audio dataset handling
from audace import providers
from audace import featurizers


# Dataset name is the master key for dataset adressing
# Changing according to the dataset you want to process
DATASET_NAME = 'MAIN1000'

# Initialize Dataset Object. 
ds = AudioDataset(DATASET_NAME)
    
# Display AudioDataset summary    
ds.info()


[2020-08-18/20:50:23.140|13.5%|64.3%|0.26GB] ------------------------------------------------------
[2020-08-18/20:50:23.141|20.0%|64.3%|0.26GB] DATASET NAME          : MAIN1000
[2020-08-18/20:50:23.141|00.0%|64.3%|0.26GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\MAIN1000
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\MAIN1000\MAIN1000.db
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\MAIN1000\samples
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] NB SOURCE AUDIO FILES : 48
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] SAMPLE RATE           : 22050
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] DURATION              : 1.0
[2020-08-18/20:50:23.142|00.0%|64.3%|0.26GB] OVERLAP               : 0.0
[2020-08-18/20:50:23.143|00.0%|64.3%|0.26GB] NB AUDIO CHUNKS       : 24788
[2020-08-18/20:50:23.143|00.0%|64.3%|0.26GB] ------------------------------------------------------


In [2]:
# Get pandas dataframe
sql = "select name, file_id, hive, queen, mfcc20, magic05, apidictor, fold from samples where nobee = 0"
df = ds.queryDataFrame(sql)
df

Unnamed: 0,name,file_id,hive,queen,mfcc20,magic05,apidictor,fold
0,00-000000,1,CF001,0.0,"[[-452.60522, -455.07095, -466.4225, -480.3195...","[3.9144727e-08, 9.0641564e-08, 6.047873e-06, 5...","[2.6183884e-07, 1.737341e-07, 1.8253529e-07, 2...",1
1,00-000001,1,CF001,0.0,"[[-495.33484, -498.9276, -484.53116, -477.9013...","[4.330297e-08, 2.7317887e-07, 5.098576e-06, 5....","[4.9584986e-07, 1.8042519e-07, 7.9778815e-08, ...",1
2,00-000002,1,CF001,0.0,"[[-445.42633, -443.7818, -444.26807, -442.6415...","[1.0806294e-07, 3.9161785e-07, 8.634005e-06, 4...","[3.484265e-07, 3.1297404e-07, 2.41745e-07, 3.6...",1
3,00-000003,1,CF001,0.0,"[[-460.85968, -462.19055, -463.63702, -465.996...","[6.7095286e-08, 1.6329729e-07, 4.094701e-06, 6...","[3.5953724e-07, 1.9998862e-07, 1.4095741e-07, ...",1
4,00-000004,1,CF001,0.0,"[[-466.56985, -455.9927, -446.66547, -444.0310...","[7.585439e-08, 1.6839813e-07, 4.4799826e-06, 5...","[3.101988e-07, 3.4276513e-07, 3.632166e-07, 2....",1
...,...,...,...,...,...,...,...,...
17200,47-000583,48,Hive3,1.0,"[[-429.1963, -428.889, -427.92667, -422.99686,...","[2.8078146e-09, 4.323107e-09, 1.3884766e-08, 2...","[1.7832702e-08, 2.1633614e-08, 1.1228838e-08, ...",4
17201,47-000584,48,Hive3,1.0,"[[-434.4861, -429.77988, -437.36362, -441.3969...","[5.362719e-09, 8.614295e-09, 1.1138677e-08, 1....","[1.6410093e-08, 1.9566436e-08, 2.5602061e-08, ...",4
17202,47-000585,48,Hive3,1.0,"[[-422.22186, -415.6533, -427.11856, -431.4219...","[6.880176e-09, 1.2994544e-08, 1.1858766e-08, 3...","[1.4815208e-08, 1.3480358e-08, 2.1716842e-08, ...",4
17203,47-000586,48,Hive3,1.0,"[[-439.2309, -434.92932, -441.01767, -440.6773...","[3.7156628e-08, 1.0280094e-07, 8.020571e-08, 8...","[2.5325264e-08, 1.7652539e-08, 1.9103688e-08, ...",4


In [3]:
df['mfcc20'][0].shape

(20, 44)

In [4]:
import csv
from pathlib import Path

# Iterating over one column - `f` is some function that processes your data
# result = [f(x) for x in df['col']]
# Iterating over two columns, use `zip`
# result = [f(x, y) for x, y in zip(df['col1'], df['col2'])]
# Iterating over multiple columns - same data type
#result = [f(row[0], ..., row[n]) for row in df[['col1', ...,'coln']].to_numpy()]
# Iterating over multiple columns - differing data type
# result = [f(row[0], ..., row[n]) for row in zip(df['col1'], ..., df['coln'])]

ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    'hive',
    'mfcc20'
)   

ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    ['hive', 'queen'],
    'mfcc20'
)            

ds.exportTSV(
    "select hive||'_'||queen as hivequeen, mfcc20 from samples where nobee = 0",
    "./output",
    'hivequeen',
    'mfcc20'
)         
            
            
ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    'queen',
    'mfcc20'
)

    
ds.exportTSV(
    "select queen, magic05 from samples where nobee = 0",
    "./output",
    'queen',
    'magic05'
)    

ds.exportTSV(
    "select queen, apidictor from samples where nobee = 0",
    "./output",
    'queen',
    'apidictor'
)    