In [1]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 

import kilroy_was_here                     # Mandatory. Allow access to shared python code from repository root
from audace.jupytools import iprint           # timstamped (to the ms) print with CPU and RAM consumption information
from audace.audiodataset import AudioDataset  # Class for audio dataset handling
from audace import providers
from audace import featurizers


# Dataset name is the master key for dataset adressing
# Changing according to the dataset you want to process
DATASET_NAME = 'DUO1000'

# Initialize Dataset Object. 
ds = AudioDataset(DATASET_NAME)
    
# Display AudioDataset summary    
ds.info()


[2020-08-21/11:23:47.613|13.9%|65.9%|0.26GB] ------------------------------------------------------
[2020-08-21/11:23:47.614|00.0%|65.9%|0.26GB] DATASET NAME          : DUO1000
[2020-08-21/11:23:47.614|00.0%|65.9%|0.26GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\DUO1000
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\DUO1000\DUO1000.db
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\DUO1000\samples
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] NB SOURCE AUDIO FILES : 4
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] SAMPLE RATE           : 22050
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] DURATION              : 1.0
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] OVERLAP               : 0.0
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] NB AUDIO CHUNKS       : 2374
[2020-08-21/11:23:47.615|00.0%|65.9%|0.26GB] ------------------------------------------------------


In [2]:
# Get pandas dataframe
sql = "select name, file_id, hive, queen, mfcc20, std_mfcc20, std_mfcc8, std_apidictor from samples where nobee = 0"
df = ds.queryDataFrame(sql)
df

Unnamed: 0,name,file_id,hive,queen,mfcc20,std_mfcc20,std_mfcc8,std_apidictor
0,00-000000,1,Hive1,1.0,"[[-408.86795, -408.73077, -409.20367, -409.398...","[[97.41327, 97.54802, 97.07751, 96.88353, 89.6...","[[97.41327, 97.54802, 97.07751, 96.88353, 89.6...","[0.00071722025, 0.0009952486, 0.00092343683, 0..."
1,00-000001,1,Hive1,1.0,"[[-423.7303, -417.42346, -409.6195, -403.34555...","[[100.600525, 106.9022, 114.70632, 120.97934, ...","[[100.600525, 106.9022, 114.70632, 120.97934, ...","[0.0009066642, 0.00073385256, 0.0005510749, 0...."
2,00-000002,1,Hive1,1.0,"[[-402.01688, -405.7763, -413.01965, -406.1069...","[[116.9854, 113.22777, 105.981575, 112.89249, ...","[[116.9854, 113.22777, 105.981575, 112.89249, ...","[0.00054802553, 0.0009914172, 0.0008397524, 0...."
3,00-000003,1,Hive1,1.0,"[[-415.67346, -411.83347, -410.87616, -403.968...","[[91.93293, 95.777405, 96.73117, 103.64037, 10...","[[91.93293, 95.777405, 96.73117, 103.64037, 10...","[0.0010954693, 0.00047434308, 0.0004931584, 0...."
4,00-000004,1,Hive1,1.0,"[[-410.36508, -402.53772, -407.56488, -404.221...","[[94.70627, 102.5433, 97.51247, 100.85734, 105...","[[94.70627, 102.5433, 97.51247, 100.85734, 105...","[0.00051012536, 0.00071958837, 0.0012351932, 0..."
...,...,...,...,...,...,...,...,...
1505,03-000587,4,Hive3,1.0,"[[-435.4528, -431.11078, -440.1507, -445.8108,...","[[95.949135, 100.29187, 91.251724, 85.591286, ...","[[95.949135, 100.29187, 91.251724, 85.591286, ...","[0.0007336626, 0.001499429, 0.0042691566, 0.00..."
1506,03-000588,4,Hive3,1.0,"[[-462.93552, -454.59076, -456.0919, -458.9897...","[[109.537186, 117.8831, 116.38189, 113.48234, ...","[[109.537186, 117.8831, 116.38189, 113.48234, ...","[0.00063938217, 0.0007473792, 0.00053195737, 0..."
1507,03-000589,4,Hive3,1.0,"[[-456.26517, -455.3929, -456.27307, -448.8443...","[[110.65241, 111.52762, 110.648445, 118.07588,...","[[110.65241, 111.52762, 110.648445, 118.07588,...","[0.00028464478, 0.00054751436, 0.0007417356, 0..."
1508,03-000590,4,Hive3,1.0,"[[-444.8451, -443.43448, -448.0073, -452.25623...","[[119.70333, 121.11477, 116.541245, 112.29247,...","[[119.70333, 121.11477, 116.541245, 112.29247,...","[0.0004038708, 0.00047419884, 0.0005732875, 0...."


In [14]:
df['mfcc20'][0].shape

(20, 44)

In [3]:
import csv
from pathlib import Path

# Iterating over one column - `f` is some function that processes your data
# result = [f(x) for x in df['col']]
# Iterating over two columns, use `zip`
# result = [f(x, y) for x, y in zip(df['col1'], df['col2'])]
# Iterating over multiple columns - same data type
#result = [f(row[0], ..., row[n]) for row in df[['col1', ...,'coln']].to_numpy()]
# Iterating over multiple columns - differing data type
# result = [f(row[0], ..., row[n]) for row in zip(df['col1'], ..., df['coln'])]

ds.exportTSV(
    "select hive, mfcc20 from samples where nobee = 0",
    "./output",
    'hive',
    'mfcc20'
)   

ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    ['hive', 'queen'],
    'mfcc20'
)            

ds.exportTSV(
    "select hive||'_'||queen as hivequeen, mfcc20 from samples where nobee = 0",
    "./output",
    'hivequeen',
    'mfcc20'
)     

ds.exportTSV(
    "select hive||'_'||queen as hivequeen, std_mfcc20 from samples where nobee = 0",
    "./output",
    'hivequeen',
    'std_mfcc20'
)   
            
            
ds.exportTSV(
    "select queen, mfcc20 from samples where nobee = 0",
    "./output",
    'queen',
    'mfcc20'
)

ds.exportTSV(
    "select queen, std_mfcc20 from samples where nobee = 0",
    "./output",
    'queen',
    'std_mfcc20'
)

ds.exportTSV(
    "select queen, std_mfcc8 from samples where nobee = 0",
    "./output",
    'queen',
    'std_mfcc8'
)

    
ds.exportTSV(
    "select queen, std_apidictor from samples where nobee = 0",
    "./output",
    'queen',
    'std_apidictor'
)    