In [2]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 

import kilroy_was_here                     # Mandatory. Allow access to shared python code from repository root
from audace.jupytools import iprint           # timstamped (to the ms) print with CPU and RAM consumption information
from audace.audiodataset import AudioDataset  # Class for audio dataset handling


# Dataset name is the master key for dataset adressing
DATASET_NAME = 'MONO1000'

# Initialize Dataset Object. 
ds = AudioDataset(DATASET_NAME)
    
# The following line provides some information about the retrieved AudioDataset object    
ds.info()


[2020-08-11/14:25:58.085|14.6%|73.5%|0.26GB] ------------------------------------------------------
[2020-08-11/14:25:58.085|00.0%|73.5%|0.26GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\MONO1000
[2020-08-11/14:25:58.085|00.0%|73.5%|0.26GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\MONO1000\database.db
[2020-08-11/14:25:58.085|00.0%|73.5%|0.26GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\MONO1000\samples
[2020-08-11/14:25:58.085|00.0%|73.5%|0.26GB] NB SOURCE AUDIO FILES : 2
[2020-08-11/14:25:58.085|00.0%|73.5%|0.26GB] SAMPLE RATE           : 22050
[2020-08-11/14:25:58.086|00.0%|73.5%|0.26GB] DURATION              : 1.0
[2020-08-11/14:25:58.086|00.0%|73.5%|0.26GB] OVERLAP               : 0.5
[2020-08-11/14:25:58.086|00.0%|73.5%|0.26GB] NB AUDIO CHUNKS       : 2366
[2020-08-11/14:25:58.086|00.0%|73.5%|0.26GB] ------------------------------------------------------


In [3]:
# Display cardinalities by hives and queen/noqueen for samples with no external perturbation
sql = """
    select distinct hive, queen, count(*)
    from samples
    where nobee = 0
    group by hive, queen
    order by hive
    """
ds.queryDataFrame(sql)

Unnamed: 0,hive,queen,count(*)
0,Hive3,0.0,811
1,Hive3,1.0,606


In [7]:
# Get pandas dataframe
sql = "select name, file_id, hive, queen, mfcc from samples where nobee = 0"
df = ds.queryDataFrame(sql)
df

Unnamed: 0,name,file_id,hive,queen,MFCC
0,00-000000,1,Hive3,0.0,"[[-394.88925, -395.52295, -411.27222, -399.069..."
1,00-000001,1,Hive3,0.0,"[[-401.44745, -394.54202, -396.4122, -404.9671..."
2,00-000002,1,Hive3,0.0,"[[-419.504, -409.64413, -407.69608, -416.86414..."
3,00-000003,1,Hive3,0.0,"[[-420.08606, -418.07767, -417.40793, -404.803..."
4,00-000004,1,Hive3,0.0,"[[-372.56345, -371.13123, -384.61908, -378.365..."
...,...,...,...,...,...
1412,01-001178,2,Hive3,1.0,"[[-456.26517, -455.3929, -456.27307, -448.8443..."
1413,01-001179,2,Hive3,1.0,"[[-438.46368, -431.82635, -440.27603, -449.474..."
1414,01-001180,2,Hive3,1.0,"[[-444.8451, -443.43448, -448.0073, -452.25623..."
1415,01-001181,2,Hive3,1.0,"[[-455.87585, -447.3699, -445.45035, -446.2260..."


In [30]:
mfccs = df['MFCC']
mfccs


0       [[-394.88925, -395.52295, -411.27222, -399.069...
1       [[-401.44745, -394.54202, -396.4122, -404.9671...
2       [[-419.504, -409.64413, -407.69608, -416.86414...
3       [[-420.08606, -418.07767, -417.40793, -404.803...
4       [[-372.56345, -371.13123, -384.61908, -378.365...
                              ...                        
1412    [[-456.26517, -455.3929, -456.27307, -448.8443...
1413    [[-438.46368, -431.82635, -440.27603, -449.474...
1414    [[-444.8451, -443.43448, -448.0073, -452.25623...
1415    [[-455.87585, -447.3699, -445.45035, -446.2260...
1416    [[-449.1348, -441.7129, -444.20163, -447.613, ...
Name: MFCC, Length: 1417, dtype: object

In [33]:
import csv

output_path = Path('./output')
output_path.mkdir(parents=True)

with open(Path(output_path, 'tensors.tsv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter='\t')
    for mfcc in mfccs:
        writer.writerow(mfcc.flatten().tolist()) 