In [1]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 

import kilroy_was_here                     # Mandatory. Allow access to shared python code from repository root
from lib.jupytools import iprint           # timstamped (to the ms) print with CPU and RAM consumption information
from lib.audiodataset import AudioDataset  # Class for audio dataset handling


# Dataset name is the master key for dataset adressing
DATASET_NAME = 'MAIN1000'

# Initialize Dataset Object. 
ds = AudioDataset(DATASET_NAME)
    
# The following line provides some information about the newly created (or retrived) AudioDataset object    
ds.info()



[2020-08-08/09:47:45.732|16.5%|71.3%|0.25GB] ------------------------------------------------------
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\MAIN1000
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\MAIN1000\database.db
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\MAIN1000\samples
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] NB SOURCE AUDIO FILES : 48
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] SAMPLE RATE           : 22050
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] DURATION              : 1.0
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] OVERLAP               : 0.0
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] NB AUDIO CHUNKS       : 24788
[2020-08-08/09:47:45.732|00.0%|71.3%|0.25GB] ------------------------------------------------------


In [2]:
# Display cardinalities by hives and queen/noqueen for samples with no external perturbation
sql = """
    select distinct hive, queen, count(*)
    from samples
    where nobee = 0
    group by hive, queen
    order by hive
    """
ds.queryDataFrame(sql)

Unnamed: 0,hive,queen,count(*)
0,CF001,0.0,15
1,CF003,1.0,3700
2,CJ001,0.0,802
3,GH001,1.0,1400
4,Hive1,0.0,1473
5,Hive1,1.0,2684
6,Hive3,0.0,6546
7,Hive3,1.0,654


In [3]:
sql = "select name, file_id, hive, queen from samples where nobee = 0"
df = ds.queryDataFrame(sql)
print(df)

            name  file_id   hive  queen
0      00-000000        1  CF001    0.0
1      00-000001        1  CF001    0.0
2      00-000002        1  CF001    0.0
3      00-000003        1  CF001    0.0
4      00-000004        1  CF001    0.0
...          ...      ...    ...    ...
17269  47-000583       48  Hive3    1.0
17270  47-000584       48  Hive3    1.0
17271  47-000585       48  Hive3    1.0
17272  47-000586       48  Hive3    1.0
17273  47-000587       48  Hive3    1.0

[17274 rows x 4 columns]


In [5]:
from lib.jupytools import predestination
predestination(23081965)
df.sample(n=10)

Unnamed: 0,name,file_id,hive,queen
15319,43-000107,44,Hive3,0.0
13213,37-000566,38,Hive3,0.0
4172,12-000197,13,CF003,1.0
8587,26-000571,27,Hive1,1.0
5680,18-000226,19,CJ001,0.0
3385,10-000002,11,CF003,1.0
12786,37-000089,38,Hive3,0.0
2637,07-000131,8,CF003,1.0
2139,05-000185,6,CF003,1.0
3637,10-000254,11,CF003,1.0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17274 entries, 0 to 17273
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   name     17274 non-null  object 
 1   file_id  17274 non-null  int64  
 2   hive     17274 non-null  object 
 3   queen    17274 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 539.9+ KB


In [7]:
from lib.attributors import FromIdList
from lib.transformers import AsConstant
ds.addAttribute('set')
ds.setAttribute('set', FromIdList((1,2,3,24786), AsConstant("test")))
ds.dumpDataFrame()


Unnamed: 0,name,file_id,start_t,end_t,nobee,queen,hive,set
0,00-000000,1,0.0,1.0,0.00,0.0,CF001,
1,00-000001,1,1.0,2.0,0.00,0.0,CF001,
2,00-000002,1,2.0,3.0,0.00,0.0,CF001,
3,00-000003,1,3.0,4.0,0.00,0.0,CF001,
4,00-000004,1,4.0,5.0,0.00,0.0,CF001,
...,...,...,...,...,...,...,...,...
24783,47-000587,48,587.0,588.0,0.00,1.0,Hive3,
24784,47-000588,48,588.0,589.0,0.85,1.0,Hive3,
24785,47-000589,48,589.0,590.0,1.00,1.0,Hive3,
24786,47-000590,48,590.0,591.0,1.00,1.0,Hive3,
