In [38]:
import io
import requests
import json
import csv
import random
import base64
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import aifc

datafolder = "/Users/george/Downloads/whale-detection-challenge/data/"
trainfolder = datafolder+"train/"

whale_samples = []
not_whale_samples = []
with open(datafolder+"train.csv") as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        is_whale = row['label'] == '1'
        filename = row['clip_name']
        if (is_whale):
            whale_samples.append(filename)
        else:
            not_whale_samples.append(filename)
    print(f"Number of whale samples: {len(whale_samples)}")
    print(f"Number of not-whale samples: {len(not_whale_samples)}")

random.seed(2350834243)
whale_subset = random.sample(whale_samples, 2500)
not_whale_subset = random.sample(not_whale_samples, 2500)
print(f"Count of random whale subset: {len(whale_subset)}")
print(f"Count of random not-whale subset: {len(not_whale_subset)}")


Number of whale samples: 7027
Number of not-whale samples: 22973
Count of random whale subset: 2500
Count of random not-whale subset: 2500


In [42]:

# Helper class for nyckel API
class Nyckel:
    def __init__(self, client_id, client_secret):
        self.client_id = client_id
        self.client_secret = client_secret
        self.auth_token = ''

    def ensure_auth_token(self):
        if not self.auth_token:
            data = {'client_id': self.client_id, 'client_secret': self.client_secret,
                    'grant_type': 'client_credentials'}
            result = requests.post(
                'https://www.nyckel.com/connect/token', data=data)
            self.auth_token = json.loads(result.text)['access_token']

    def create_function(self, name, label_names):
        self.ensure_auth_token()
        result = requests.post(
            f"https://www.nyckel.com/v1/functions",
            json={"name": name, "input": "Image", "output": "Classification"},
            headers={"authorization": "Bearer " + self.auth_token})
        function_id = json.loads(result.text)['id'].replace("function_", "")
        for label_name in label_names:
            requests.post(
                f"https://www.nyckel.com/v1/functions/{function_id}/labels",
                json={"name": label_name},
                headers={"authorization": "Bearer " + self.auth_token})
        return function_id

    def base64encoded_image(self, img_bytes):
        encoded_string = base64.b64encode(img_bytes).decode("utf-8")
        return "data:image/jpg;base64," + encoded_string

    def post_image(self, function_id, image_bytes, external_id, label_name):
        self.ensure_auth_token()
        base64_image = self.base64encoded_image(image_bytes)
        json = {"data": base64_image, "externalId": external_id,
                "annotation": {"labelName": label_name}}
        result = requests.post(
            f"https://www.nyckel.com/v1/functions/{function_id}/samples",
            json=json,
            headers={"authorization": "Bearer " + self.auth_token})


# Helper class for creating a spectrogram from an audio signal
class Spectrogram:
    def get_time_series(self, image_path):
        s = aifc.open(image_path)
        n_samples = s.getnframes()
        sample_rate = s.getframerate()
        strsig = s.readframes(n_samples)
        values = np.frombuffer(strsig, np.short).byteswap()
        return values, sample_rate

    def get_plot_bytes(self, plt):
        with io.BytesIO() as buffer:  # use buffer memory
            plt.savefig(buffer, format='jpg')
            buffer.seek(0)
            return buffer.getvalue()

    def process_audio(self, image_path):
        series, rate = self.get_time_series(image_path)
        f, t, Sxx = signal.spectrogram(series, rate, nperseg=256, noverlap=224)
        plt.pcolormesh(t, f, Sxx)
        plot_bytes = self.get_plot_bytes(plt)
        plt.close()
        return plot_bytes


# Insert client_id and client_secret for nyckel below
client_id = ""
client_secret = ""
nyckel = Nyckel(client_id, client_secret)
spectrogram = Spectrogram()

# Create an image classification function with labels "whale" and "not whale"
function_id = nyckel.create_function("whale_test", ["whale", "not whale"])
print(f"Created function {function_id}")

print("Adding whale samples")
for sample in whale_subset[:10]:
    filename = trainfolder+sample
    image_bytes = spectrogram.process_audio(filename)
    nyckel.post_image(function_id, image_bytes, sample, "whale")

print("Adding not-whale samples")
for sample in not_whale_subset[:10]:
    filename = trainfolder+sample
    image_bytes = spectrogram.process_audio(filename)
    nyckel.post_image(function_id, image_bytes, sample, "not whale")

print("Done adding samples")

#Todo:
# Measure accuracy with ~1000 samples in the test set
# Show confusion matrix
# Show a couple of examples of whale and not-whale spectrograms
# Add more detailed comments to all the code

Created function p5conud3p4v7570h
Adding whale samples
Adding not-whale samples
Done adding samples
