In [3]:
##Imports
import os
import csv
import math

import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt

import scipy as sp
from scipy import io
from scipy.io import wavfile as wav
from scipy import signal as sgnp
from scipy.interpolate import InterpolatedUnivariateSpline as ius
from scipy import fftpack
from scipy import stats

import h5py
import xlrd

%matplotlib inline

In [2]:
bird_names = set(['blk161', 'Orange222', 'r87', 'White293'])

In [43]:
def get_file_table(bird, file_table_path='C:\\Users\\abre049.UOA\\Documents\\data\\file tables'):
    file_path = file_table_path + '\\' + bird + '.csv'
    file_table_data = {} # the return object
    with open(file_path, 'r') as f: # opens the .csv file
            reader = csv.reader(f)
            row_numb = 0 # counter to keep track of which row as some data must be excluded as it is garbage
            for row in reader: # iterates through the .csv rows
                if row_numb >= 29 and len(row) > 1: # identify the data body, ignoring empty rows
                    print row
                    for val in row[1:3]: # cleans the data as there are useless chars arond the useful info
                        if val[:2] == " '":
                            file_name = val[2:-1] # some info are strings
                        elif len(val) > 0:
                            file_index = int(val[1:]) # convert the numbers from strings to floats
                    file_table_data[file_name] = file_index
                    file_table_data[file_index] = file_name
                row_numb += 1
            f.close()
    return file_table_data

file_table = {}
for bird in bird_names:
    file_table[bird] = get_file_table(bird)
print file_table.keys()
print file_table['Orange222']['v0417t174548v2.wav']

["  ('blk161'", ' 113', " 'v0302t144654v2.wav'", ' 42219.3969038', ' 0)', '']
["  ('blk161'", ' 114', " 'v0303t131839v1.wav'", ' 42219.396907', ' 0)', '']
["  ('blk161'", ' 115', " 'v0305t182122v4.wav'", ' 42219.39691063', ' 0)', '']
["  ('blk161'", ' 116', " 'v0305t182330v8.wav'", ' 42219.39691396', ' 0)', '']
["  ('blk161'", ' 117', " 'v0305t184151v4.wav'", ' 42219.39691714', ' 0)', '']
["  ('blk161'", ' 118', " 'v0305t184151v8.wav'", ' 42219.39692033', ' 0)', '']
["  ('blk161'", ' 119', " 'v0305t185025v3.wav'", ' 42219.39692427', ' 0)', '']
["  ('blk161'", ' 120', " 'v0306t083553v2.wav'", ' 42219.39692755', ' 0)', '']
["  ('blk161'", ' 121', " 'v0306t095351v7.wav'", ' 42219.39693106', ' 0)', '']
["  ('blk161'", ' 122', " 'v0306t101617v3.wav'", ' 42219.39693429', ' 0)', '']
["  ('blk161'", ' 123', " 'v0306t101637v8.wav'", ' 42219.39693772', ' 0)', '']
["  ('blk161'", ' 124', " 'v0306t183809v7.wav'", ' 42219.39694113', ' 0)', '']
["  ('blk161'", ' 125', " 'v0306t184324v7.wav'", ' 4221

#import fearture tables into python

The below code imports the .csv raw feature tables made from SAP2011 and converst them into a dictionary. In ordre to avoid lond load times in the future I manually went through each bird and dumped the dictionaries as pickle files in the same folder. This was the file can be easily read in the main program and will reduce clutter in the main notebook.

In [64]:
def get_feature_table(bird, file_table_path='C:\\Users\\abre049.UOA\\Documents\\data\\raw feature tables'):
    '''
    This function takes the name of a bird and the file path for the feature table 
    (.csv files made by SAP2011 backpup feature) and returns a dict with the format
    feature_table = {'feature_name': [feature_array]}
    
    Note that this function also requires the global variable "file_table" made with the get_file_table function as above
    
    feature names include:
        'amplitude'
        'mean_frequency_amp'
        'pitch'
        'mean_frequency'
        'FM'
        'am'
        'goodness'
        'entropy'
        'peak_frequency'
        'DAS'
        'continuity_t'
    '''
    file_path = file_table_path + '\\' + bird + '.csv'
#     file_table_data = [] # the return object
    feature_table = {}
    with open(file_path, 'r') as f: # opens the .csv file
            reader = csv.reader(f)
            headings = [] # will become the first row
            row_data = [] # used to make the table body
            row_numb = 0 # counter to keep track of which row as some data must be excluded as it is garbage
            for row in reader: # iterates through the .csv rows
                if row_numb < 50:
                    print row_numb, row
                if row_numb >= 39 and len(row) > 1:
                    if file_table[bird][int(row[1][1:])] not in feature_table.keys():
                        feature_table[file_table[bird][int(row[1][1:])]]  = {'amplitude': [int(row[2][1:])], 
                                                       'mean_frequency_amp': [int(row[3][1:])], 
                                                       'pitch': [int(row[4][1:])], 
                                                       'mean_frequency': [int(row[5][1:])], 
                                                       'FM': [int(row[6][1:])], 
                                                       'am': [int(row[7][1:])], 
                                                       'goodness': [int(row[8][1:])], 
                                                       'entropy': [int(row[9][1:])], 
                                                       'peak_frequency': [int(row[10][1:])], 
                                                       'DAS': [int(row[11][1:])], 
                                                       'continuity_t': [int(row[12][1:])]}
                    else:
                        feature_table[file_table[bird][int(row[1][1:])]]['amplitude'].append(int(row[2][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['mean_frequency_amp'].append(int(row[3][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['pitch'].append(int(row[4][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['mean_frequency'].append(int(row[5][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['FM'].append(int(row[6][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['am'].append(int(row[7][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['goodness'].append(int(row[8][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['entropy'].append(int(row[9][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['peak_frequency'].append(int(row[10][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['DAS'].append(int(row[11][1:]))
                        feature_table[file_table[bird][int(row[1][1:])]]['continuity_t'].append(int(row[12][1:]))
                row_numb += 1
            f.close()
    return feature_table

feature_table = get_feature_table('r87')
# print feature_table[:3]

# plt.plot(feature_table[113]['amplitude'])
# plt.show()

0 ['-- MyDAC version: 7.0.1']
1 ['-- MySQL server version: 5.5.16']
2 ['-- MySQL client version: 7.0.0 Direct']
3 ['-- Script date 8/3/2015 11:25:35']
4 ['-- ---------------------------------------------------------------------- ']
5 ['-- Server: localhost']
6 ['-- Database: SAP']
7 []
8 ['/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS', ' FOREIGN_KEY_CHECKS=0 */;']
9 ['/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE', " SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;"]
10 ['-- ']
11 ['-- Table structure for table  raw_r87_3628']
12 ['-- ']
13 []
14 ['DROP TABLE IF EXISTS raw_r87_3628;']
15 ['CREATE TABLE `raw_r87_3628` (']
16 ["  `time` int(9) NOT NULL DEFAULT '0'", '']
17 ["  `file_index` int(7) NOT NULL DEFAULT '0'", '']
18 ["  `amplitude` smallint(6) NOT NULL DEFAULT '0'", '']
19 ["  `mean_frequency_amp` smallint(6) NOT NULL DEFAULT '0'", '']
20 ["  `pitch` int(7) NOT NULL DEFAULT '0'", '']
21 ["  `mean_frequency` int(7) NOT NULL DEFAULT '0'", '']
22 ["  `FM` smallint(6) NOT NULL DEFAULT '0'

In [65]:
print feature_table.keys()
print feature_table['v0115t165350v1.wav']

['v0119t161807v4.wav', 'v0115t164544v2.wav', 'v0117t072632v2.wav', 'v0115t164818v4.wav', 'v0115t164728v1.wav', 'v0119t162730v5.wav', 'v0115t165350v1.wav', 'v0119t162542v5.wav', 'v0115t163019v3.wav', 'v0119t162004v4.wav', 'v0119t162610v9.wav', 'v0117t072739v2.wav', 'v0119t162245v4.wav', 'v0119t162042v6.wav', 'v0117t072453v2.wav', 'v0115t164324v3.wav', 'v0119t162610v8.wav', 'v0115t165330v3.wav', 'v0115t170002v4.wav', 'v0117t073343v2.wav', 'v0115t164410v3.wav', 'v0115t164510v4.wav', 'v0115t165433v3.wav', 'v0119t081425v2.wav', 'v0117t072601v3.wav', 'v0119t162202v2.wav', 'v0117t074159v3.wav', 'v0119t162802v5.wav', 'v0115t164510v3.wav', 'v0119t162447v3.wav', 'v0115t165433v2.wav', 'v0119t162245v8.wav', 'v0119t162610v7.wav', 'v0115t165350v2.wav', 'v0115t163019v5.wav', 'v0119t162730v4.wav', 'v0119t162245v5.wav', 'v0119t162245v3.wav', 'v0115t164818v3.wav', 'v0115t164641v3.wav', 'v0115t165506v3.wav', 'v0115t164544v3.wav', 'v0115t165330v4.wav', 'v0115t165330v5.wav', 'v0119t162512v4.wav', 'v0119t16

Below is the function used to dunp the pickle files

In [69]:
import cPickle as pickle

# pickle.dump(feature_table, open('C:\\Users\\abre049.UOA\\Documents\\data\\raw feature tables\\r87\\'+song[:-3]+'p', "wb" ))

for bird in bird_names:
    feature_table = get_feature_table(bird)
    for song in feature_table.keys():
        pickle.dump(feature_table[song], open('C:\\Users\\abre049.UOA\\Documents\\data\\raw feature tables\\'+bird+'\\'+song[:-3]+'p', "wb" ))

0 ['-- MyDAC version: 7.0.1']
1 ['-- MySQL server version: 5.5.16']
2 ['-- MySQL client version: 7.0.0 Direct']
3 ['-- Script date 8/3/2015 11:28:17']
4 ['-- ---------------------------------------------------------------------- ']
5 ['-- Server: localhost']
6 ['-- Database: SAP']
7 []
8 ['/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS', ' FOREIGN_KEY_CHECKS=0 */;']
9 ['/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE', " SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;"]
10 ['-- ']
11 ['-- Table structure for table  raw_blk161_3616']
12 ['-- ']
13 []
14 ['DROP TABLE IF EXISTS raw_blk161_3616;']
15 ['CREATE TABLE `raw_blk161_3616` (']
16 ["  `time` int(9) NOT NULL DEFAULT '0'", '']
17 ["  `file_index` int(7) NOT NULL DEFAULT '0'", '']
18 ["  `amplitude` smallint(6) NOT NULL DEFAULT '0'", '']
19 ["  `mean_frequency_amp` smallint(6) NOT NULL DEFAULT '0'", '']
20 ["  `pitch` int(7) NOT NULL DEFAULT '0'", '']
21 ["  `mean_frequency` int(7) NOT NULL DEFAULT '0'", '']
22 ["  `FM` smallint(6) NOT NULL DE

In [72]:
import cPickle as pickle

favorite_color = pickle.load( open( 'C:\\Users\\abre049.UOA\\Documents\\data\\raw feature tables\\r87\\v0115t164212v3.p', "rb" ) )
print favorite_color

{'amplitude': [2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -2, -2, -2, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -2, -2, -2, -1, -1, -1, -1, -1, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -3, -3, -3, -2, -2, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -2, -2, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -1, -2, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,

#make pickle files of the syllable tables


[['recnum', 'serial_number', 'bird_ID', 'start_on', 'duration', 'mean_pitch', 'mean_peak_freq', 'mean_FM', 'mean_entropy', 'mean_goodness_of_pitch', 'mean_mean_frequency', 'mean_am2', 'mean_amplitude', 'mean_continuity_f', 'mean_continuity_t', 'mean_DAS', 'min_pitch', 'min_peak_freq', 'min_FM', 'min_entropy', 'min_meanFr', 'min_DAS', 'max_pitch', 'max_peak_freq', 'max_FM', 'max_amplitude', 'max_entropy', 'max_meanFr', 'max_DAS', 'max_continuity_f', 'max_continuity_t', 'var_pitch', 'var_FM', 'var_entropy', 'var_goodness_of_pitch', 'var_mean_frequency', 'var_am', 'month', 'day', 'hour', 'minute', 'second', 'cluster', 'file_name', 'comments'], [1.0, 42216.42456, 'blk161', 779.229, 213.515, 568.0, 3832.0, 47.0, -1.68, 347.0, 3241.0, 0.01478, 39.7, -141.89, 12.96, 40.67, 303.0, 3832.0, 0.6, -2.99, 1457.0, 20.0, 4042.0, 3832.0, 84.4, 48.0, -0.59, 3959.0, 85.0, -104.87, 35.85, 0.0, 564.0, 0.423124, 75300.0, 283000.0, 0.01485, 7.0, 31.0, 11.0, 47.0, 36.0, 0.0, 'v0307t072941v2.wav', "'"], [2.0, 42216.42456, 'blk161', 1008.71, 81.8141, 5596.0, 3832.0, 35.6, -5.1, 71.4, 5735.0, 0.02248, 47.2, 206.49, 7.52, 46.34, 858.0, 3832.0, 0.4, -6.32, 4925.0, 20.0, 7137.0, 3832.0, 89.0, 51.0, -2.26, 6677.0, 71.0, 205.96, 20.0, 0.0, 1056.0, 0.936668, 7000.0, 231000.0, 0.02276, 7.0, 31.0, 11.0, 47.0, 36.0, 0.0, 'v0307t072941v2.wav', "'"]]



In [4]:
def get_syl_data(bird, syllable_table_path='C:\\Users\\abre049.UOA\\Documents\\data\\syllable tables'):
    '''
    Takes the directories of the .cvs file with syllable data and cluster number (produced with SAP2011 backup function).
    Return a 2d list with data
    Parameters:
        syll_data_path - a directory containing the .cvs file produced by SAP2011 backup function
            with cols of song_name, start time, duration, mean_freq etc
    Returns:
        list of lists. first row contains the headings of the columns as strings. The remaining lists are the data as either
            strings or floats
    Exceptions: nil
    '''
    
    file_path = syllable_table_path + '\\' + bird + '.csv'
    syllable_table = []
    with open(file_path, 'r') as f: # opens the .csv file
        reader = csv.reader(f)
        row_data = [] # used to make the table body
        row_numb = 0 # counter to keep track of which row as some data must be excluded as it is garbage
        
        for row in reader: # iterates through the .csv rows
            if row_numb >= 70 and len(row) > 1:
                if len(row[43][1:]) < 2:
                    print row
#                 if row[2][2:-1] not in syllable_table.keys():
                syllable_table.append({'serial_number': float(row[1][1:]), 
                                         'file_name': row[43][2:-1],
                                         'start_on': float(row[3][1:]), 
                                         'duration': float(row[4][1:]), 
                                         'mean_pitch': float(row[5][1:]), 
                                         'mean_peak_freq': float(row[6][1:]), 
                                         'mean_FM': float(row[7][1:]), 
                                         'mean_entropy': float(row[8][1:]), 
                                         'mean_goodness_of_pitch': float(row[9][1:]), 
                                         'mean_mean_frequency': float(row[10][1:]), 
                                         'mean_am2': float(row[11][1:]), 
                                         'mean_amplitude': float(row[12][1:]), 
                                         'mean_continuity_f': float(row[13][1:]), 
                                         'mean_continuity_t': float(row[14][1:]), 
                                         'mean_DAS': float(row[15][1:]), 
                                         'min_pitch': float(row[16][1:]), 
                                         'min_peak_freq': float(row[17][1:]), 
                                         'min_FM': float(row[18][1:]), 
                                         'min_entropy': float(row[19][1:]), 
                                         'min_meanFr': float(row[20][1:]), 
                                         'min_DAS': float(row[21][1:]), 
                                         'max_pitch': float(row[22][1:]), 
                                         'max_peak_freq': float(row[23][1:]), 
                                         'max_FM': float(row[24][1:]), 
                                         'max_amplitude': float(row[25][1:]), 
                                         'max_entropy': float(row[26][1:]), 
                                         'max_meanFr': float(row[27][1:]), 
                                         'max_DAS': float(row[28][1:]), 
                                         'max_continuity_f': float(row[29][1:]), 
                                         'max_continuity_t': float(row[30][1:]), 
                                         'var_pitch': float(row[31][1:]), 
                                         'var_FM': float(row[32][1:]), 
                                         'var_entropy': float(row[33][1:]), 
                                         'var_goodness_of_pitch': float(row[34][1:]), 
                                         'var_mean_frequency': float(row[35][1:]), 
                                         'var_am': float(row[36][1:]), 
                                         'cluster': int(row[42][1:])})
                                                                                        
            row_numb += 1
        f.close()
    return syllable_table

syllable_table = get_syl_data('blk161')
print syllable_table[1].keys()
print syllable_table[1]['start_on']
print syllable_table[1]['duration']

# for syls in syllable_table:
#     print syls['cluster']
                                                     
                                                     


['max_peak_freq', 'mean_mean_frequency', 'max_continuity_t', 'max_entropy', 'max_FM', 'min_DAS', 'var_pitch', 'mean_continuity_t', 'file_name', 'min_meanFr', 'var_entropy', 'min_pitch', 'max_meanFr', 'cluster', 'start_on', 'min_FM', 'duration', 'mean_goodness_of_pitch', 'mean_peak_freq', 'mean_DAS', 'mean_continuity_f', 'var_goodness_of_pitch', 'max_amplitude', 'mean_pitch', 'max_pitch', 'mean_entropy', 'max_DAS', 'var_am', 'max_continuity_f', 'mean_FM', 'var_FM', 'min_entropy', 'var_mean_frequency', 'serial_number', 'min_peak_freq', 'mean_am2', 'mean_amplitude']
1008.71
81.8141


In [89]:
import cPickle as pickle

# for bird in bird_names:
for bird in ['r87']:
    syllable_table = get_syl_data(bird)
    pickle.dump(syllable_table, open('C:\\Users\\abre049.UOA\\Documents\\data\\syllable tables\\'+bird+'.p', "wb" ))

In [60]:
import cPickle as pickle
print pickle.load(open('C:\\Users\\abre049.UOA\\Documents\\data\\raw feature tables\\blk161\\v0308t095713v5.p'), 'rb')

TypeError: load() takes exactly one argument (2 given)

In [5]:
#For blk161 motifs for testing various different delay times

import cPickle as pickle

syllable_table = get_syl_data('blk161', syllable_table_path='C:\\Users\\abre049.UOA\\Documents\\data\\blk161 motif')
pickle.dump(syllable_table, open('C:\\Users\\abre049.UOA\\Documents\\data\\blk161 motif\\blk161_motif.p', "wb" ))