Retrieve Dataset

In [2]:
import warnings                            # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore')          # commented out till the final version, to avoid missing "real" warnings 
import kilroy_was_here                     # Mandatory. Allow access to shared python code in the upper 'lib' directory
from lib.audiodataset import AudioDataset  # Class for audio dataset handling
from lib.attributors import addAttr

# Dataset name is the master key for dataset adressing
DATASET_NAME = 'SMALL1005'

# Initialize Dataset Object. By NOT providing a source path, you implicitly express
# express the intent of RETRIEVING an existing dataset rather than CREATING a new one
ds = AudioDataset(DATASET_NAME)

ds.info()

[2020-08-05/15:27:12.534|04.2%|70.1%|0.25GB] ------------------------------------------------------
[2020-08-05/15:27:12.534|00.0%|70.1%|0.25GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\SMALL1005
[2020-08-05/15:27:12.535|00.0%|70.1%|0.25GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\SMALL1005\database.db
[2020-08-05/15:27:12.535|00.0%|70.1%|0.25GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\SMALL1005\samples
[2020-08-05/15:27:12.535|00.0%|70.1%|0.25GB] NB SOURCE AUDIO FILES : 4
[2020-08-05/15:27:12.535|00.0%|70.1%|0.25GB] SAMPLE RATE           : 22050
[2020-08-05/15:27:12.535|00.0%|70.1%|0.25GB] DURATION              : 1.0
[2020-08-05/15:27:12.536|00.0%|70.1%|0.25GB] OVERLAP               : 0.5
[2020-08-05/15:27:12.536|00.0%|70.1%|0.25GB] NB AUDIO CHUNKS       : 4744
[2020-08-05/15:27:12.536|00.0%|70.1%|0.25GB] ------------------------------------------------------


In [2]:
addAttr(ds, 'myAttr', fromFileName, )

[2020-08-04/10:10:31.078|08.2%|68.0%|0.25GB] ------------------------------------------------------
[2020-08-04/10:10:31.078|00.0%|68.0%|0.25GB] DATASET PATH          : D:\Jupyter\ShowBees\datasets\SMALL1005
[2020-08-04/10:10:31.079|00.0%|68.0%|0.25GB] DATASET DB PATH       : D:\Jupyter\ShowBees\datasets\SMALL1005\database.db
[2020-08-04/10:10:31.079|00.0%|68.0%|0.25GB] DATASET SAMPLES PATH  : D:\Jupyter\ShowBees\datasets\SMALL1005\samples
[2020-08-04/10:10:31.080|00.0%|68.0%|0.25GB] NB SOURCE AUDIO FILES : 4
[2020-08-04/10:10:31.080|00.0%|68.0%|0.25GB] SAMPLE RATE           : 22050
[2020-08-04/10:10:31.081|00.0%|68.0%|0.25GB] DURATION              : 1.0
[2020-08-04/10:10:31.082|00.0%|68.0%|0.25GB] OVERLAP               : 0.5
[2020-08-04/10:10:31.082|00.0%|68.0%|0.25GB] NB AUDIO CHUNKS       : 4744
[2020-08-04/10:10:31.083|00.0%|68.0%|0.25GB] ------------------------------------------------------


In [1]:
import re
import sqlite3


def setAttr(_ds_, name, attributor, source):
    with sqlite3.connect(_ds_.db_path) as db:
        sql = source.format(name)
        db.create_function("transform", 1, attributor)
        c = db.cursor()
        c.execute(sql)
        rows = c.fetchall()
           
        c.executemany(
            "INSERT OR REPLACE INTO attributes (sample_id, name, value) VALUES (?,?,?)",
            rows)
        
        db.commit()
        

def getAttrNames(_ds_):
    with sqlite3.connect(_ds_.db_path) as db:
        sql = "SELECT DISTINCT name from attributes"
        c = db.cursor()
        c.execute(sql)
        rows = c.fetchall()  
        result = [row[0] for row in rows]
        return result        
  
        
        
def getLabelSerie(_ds_, name, fields):
    with sqlite3.connect(_ds_.db_path) as db:
        sql = f"SELECT {', '.join(fields)} from labels where name = '{name}'"
        print(sql)
        c = db.cursor()
        c.execute(sql)
        rows = c.fetchall()
        return rows

    
def getAttrSerie(_ds_, name):
    with sqlite3.connect(_ds_.db_path) as db:
        sql = f"SELECT value from attributes where name = '{name}'"
        c = db.cursor()
        c.execute(sql)
        rows = c.fetchall()
        result = [row[0] for row in rows]
        return result     
    
      

In [3]:
import warnings
warnings.filterwarnings('ignore')
import sqlite3
import kilroy_was_here
from lib.audiodataset import AudioDataset 
from lib.labelizers import FromFileName, FromAnnotation
from lib.transformers import StringMapper, StringMatcher
 

# Dataset name is the master key for dataset adressing
DATASET_NAME = 'SMALL1005'
SOURCE_PATH ='D:/datasets/sounds/Nolasco'

ds = AudioDataset(DATASET_NAME)

trsfrm_queen = StringMapper(
        [('(?i)active', 1), 
         ('(?i)missing queen', 0),
         ('NO_QueenBee', 0),
         ('QueenBee', 1)     
        ])

trsfrm_hive = StringMatcher("^(\w{5})")

ds.setLabel('queen', FromFileName(trsfrm_queen))


ds.setLabel('nobee', FromAnnotation(SOURCE_PATH))

ds.setAttr('hive', FromFileName(trsfrm_hive))






[2020-08-05/17:36:14.408|05.2%|77.2%|0.26GB] [2] Hive1_12_06_2018_QueenBee_H1_audio___15_00_00.wav
[2020-08-05/17:36:14.652|14.2%|77.2%|0.26GB] [3] Hive1_31_05_2018_NO_QueenBee_H1_audio___15_00_00.wav
[2020-08-05/17:36:14.936|14.1%|77.2%|0.26GB] [4] Hive3_15_07_2017_NO_QueenBee_H3_audio___06_10_00.wav
[2020-08-05/17:36:15.168|12.2%|76.9%|0.26GB] [5] Hive3_20_07_2017_QueenBee_H3_audio___06_10_00.wav


4744

In [3]:
print(ds.dropLabel('queen'))
print(ds.dropLabel('nobee'))
print(ds.dropLabel('Prout'))

4744
4744
0
