In [2]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 

import kilroy_was_here                     # Mandatory. Allow access to shared python code from repository root
from audace.jupytools import iprint           # timstamped (to the ms) print with CPU and RAM consumption information
from audace.audiodataset import AudioDataset  # Class for audio dataset handling
from audace import providers
from audace import featurizers


# Dataset name is the master key for dataset adressing
# Changing according to the dataset you want to process
DATASET_NAME = 'DUO0500'

# Initialize Dataset Object. 
ds = AudioDataset(DATASET_NAME)
    
# The following line provides some information about the retrieved AudioDataset object    
ds.info()


#ds.addFeature('mfcc10')
#ds.setFeature('mfcc10', providers.FromSample(ds.samples_path, featurizers.MFCC(n_mfcc=10)))

#ds.addFeature('mfcc05')
#ds.setFeature('mfcc05', providers.FromSample(ds.samples_path, featurizers.MFCC(n_mfcc=5)))

#ds.addFeature('magic05')
#ds.setFeature('magic05', providers.FromSample(ds.samples_path, featurizers.Magic(10,1000,10)))



[2020-08-15/19:59:14.942|12.9%|80.3%|0.26GB] ------------------------------------------------------
[2020-08-15/19:59:14.942|00.0%|80.3%|0.26GB] DATASET NAME          : DUO0500
[2020-08-15/19:59:14.942|00.0%|80.3%|0.26GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\DUO0500
[2020-08-15/19:59:14.942|00.0%|80.3%|0.26GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\DUO0500\database.db
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\DUO0500\samples
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] NB SOURCE AUDIO FILES : 4
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] SAMPLE RATE           : 22050
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] DURATION              : 0.5
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] OVERLAP               : 0.0
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] NB AUDIO CHUNKS       : 4748
[2020-08-15/19:59:14.943|00.0%|80.3%|0.26GB] ------------------------------------------------------


In [16]:
from audace import transformers
ds.dropAttribute('fold')
ds.addAttribute('fold')

e = {'CF001':1, 'CF003':1, 'CJ001':2, 'GH001':2, 'Hive1':3, 'Hive3': 4}
ds.setAttribute('fold', providers.FromQuery('hive', transformers.Decode(e)))

4748

In [3]:
# Display cardinalities by fold attribute and queen label for samples with no external perturbation
sql = """
    select distinct hive, queen, count(*)
    from samples
    where nobee = 0
    group by hive, queen
    order by hive
    """
ds.queryDataFrame(sql)

Unnamed: 0,hive,queen,count(*)
0,Hive1,0.0,864
1,Hive1,1.0,802
2,Hive3,0.0,824
3,Hive3,1.0,621


In [18]:
# Get pandas dataframe
sql = "select name, file_id, hive, queen, mfcc20, magic05, fold from samples where nobee = 0"
df = ds.queryDataFrame(sql)
df

Unnamed: 0,name,file_id,hive,queen,mfcc20,magic05,fold
0,00-000000,1,Hive1,1.0,"[-408.8679504394531, -408.73077392578125, -409...","[3.2136795e-09, 1.9805428e-08, 4.005462e-08, 3...",3
1,00-000001,1,Hive1,1.0,"[-412.67633056640625, -409.9009704589844, -411...","[5.369889e-09, 5.31459e-08, 5.657863e-08, 3.64...",3
2,00-000002,1,Hive1,1.0,"[-423.73028564453125, -417.4234619140625, -409...","[2.8255782e-09, 3.6627576e-08, 4.4421895e-08, ...",3
3,00-000003,1,Hive1,1.0,"[-412.2464294433594, -414.35784912109375, -417...","[3.4870855e-09, 2.8285903e-08, 3.6422378e-08, ...",3
4,00-000004,1,Hive1,1.0,"[-402.0168762207031, -405.77630615234375, -413...","[3.4720469e-09, 3.016427e-08, 5.139529e-08, 4....",3
...,...,...,...,...,...,...,...
3106,03-001179,4,Hive3,1.0,"[-438.46368408203125, -431.82635498046875, -44...","[3.0452059e-09, 7.466517e-09, 1.5205437e-08, 8...",4
3107,03-001180,4,Hive3,1.0,"[-444.8450927734375, -443.4344787597656, -448....","[3.0531373e-09, 7.223166e-09, 1.6578603e-08, 7...",4
3108,03-001181,4,Hive3,1.0,"[-455.8758544921875, -447.3699035644531, -445....","[9.706691e-09, 1.8734587e-08, 3.8384027e-08, 8...",4
3109,03-001182,4,Hive3,1.0,"[-449.1347961425781, -441.712890625, -444.2016...","[6.9079102e-09, 9.217258e-09, 4.1515605e-08, 5...",4


In [19]:
ds.query("select name, file_id, hive, queen, fold from samples where nobee = 0")

('00-000000', 1, 'Hive1', 1.0, 3)
('00-000001', 1, 'Hive1', 1.0, 3)
('00-000002', 1, 'Hive1', 1.0, 3)
('00-000003', 1, 'Hive1', 1.0, 3)
('00-000004', 1, 'Hive1', 1.0, 3)
('00-000005', 1, 'Hive1', 1.0, 3)
('00-000006', 1, 'Hive1', 1.0, 3)
('00-000007', 1, 'Hive1', 1.0, 3)
('00-000008', 1, 'Hive1', 1.0, 3)
('00-000009', 1, 'Hive1', 1.0, 3)
('00-000010', 1, 'Hive1', 1.0, 3)
('00-000011', 1, 'Hive1', 1.0, 3)
('00-000012', 1, 'Hive1', 1.0, 3)
('00-000013', 1, 'Hive1', 1.0, 3)
('00-000014', 1, 'Hive1', 1.0, 3)
('00-000015', 1, 'Hive1', 1.0, 3)
('00-000016', 1, 'Hive1', 1.0, 3)
('00-000017', 1, 'Hive1', 1.0, 3)
('00-000018', 1, 'Hive1', 1.0, 3)
('00-000019', 1, 'Hive1', 1.0, 3)
('00-000020', 1, 'Hive1', 1.0, 3)
('00-000021', 1, 'Hive1', 1.0, 3)
('00-000022', 1, 'Hive1', 1.0, 3)
('00-000023', 1, 'Hive1', 1.0, 3)
('00-000024', 1, 'Hive1', 1.0, 3)
('00-000025', 1, 'Hive1', 1.0, 3)
('00-000026', 1, 'Hive1', 1.0, 3)
('00-000027', 1, 'Hive1', 1.0, 3)
('00-000028', 1, 'Hive1', 1.0, 3)
('00-000029', 

In [2]:
import csv
from pathlib import Path



# Iterating over one column - `f` is some function that processes your data
# result = [f(x) for x in df['col']]
# Iterating over two columns, use `zip`
# result = [f(x, y) for x, y in zip(df['col1'], df['col2'])]
# Iterating over multiple columns - same data type
#result = [f(row[0], ..., row[n]) for row in df[['col1', ...,'coln']].to_numpy()]
# Iterating over multiple columns - differing data type
# result = [f(row[0], ..., row[n]) for row in zip(df['col1'], ..., df['coln'])]


 

    
ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    'hive',
    'mfcc20'
)   

ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    ['hive', 'queen'],
    'mfcc20'
)            

ds.exportTSV(
    "select hive||'_'||queen as hivequeen, mfcc20 from samples where nobee = 0",
    "./output",
    'hivequeen',
    'mfcc20'
)         
            
            
ds.exportTSV(
    "select hive, queen, mfcc20 from samples where nobee = 0",
    "./output",
    'queen',
    'mfcc20'
)

ds.exportTSV(
    "select queen, mfcc10 from samples where nobee = 0",
    "./output",
    'queen',
    'mfcc10'
)

ds.exportTSV(
    "select queen, mfcc05 from samples where nobee = 0",
    "./output",
    'queen',
    'mfcc05'
)
            

    
ds.exportTSV(
    "select queen, magic05 from samples where nobee = 0",
    "./output",
    'queen',
    'magic05'
)    
    