In [None]:
!pip install xeno-canto

In [None]:
# Download only the metadata
import json

!xeno-canto -m grp:"bats" q:"A"

# Uncomment the next line to get all data (WARNING 10 GiB)
# !xeno-canto -dl grp:"bats" q:"A"

dir = "dataset/metadata/grp_batsq_A"

with open(dir+"/page1.json", 'r') as file:
    data = json.load(file)
    print("API message:", data["message"])
    print("Number of recordings:", data["numRecordings"])
    print("Number of pages:", data["numPages"])

In [None]:
import os
import json
import pandas as pd

# Convert metadata into frames
frames = []
for entry in os.scandir(dir):
    if entry.is_file():
        with open(entry, 'r') as file:
            data = json.load(file)
            df = pd.DataFrame(data['recordings'])
            frames.append(df)

all_data = pd.concat(frames)

# Print some metadata
print (all_data.columns.values.tolist())


# all_data.head()

# print(all_data)


In [None]:
def to_seconds(x):
    mins, secs = map(float, x.split(':'))
    return mins * 60 + secs

all_data['length'] = all_data['length'].apply(to_seconds)
all_data = all_data.rename(columns={"gen": "genus", "en": "english_name", "cnt": "country", "type": "call_type", "length": "audio_length", "dvc": "device", "mic": "microphone"})
data = all_data[["id", "genus", "english_name", "country", "call_type", "sex", "audio_length", "device", "microphone"]]
data.head()


In [None]:
import matplotlib.pyplot as plt
import seaborn

fig, ax = plt.subplots(figsize=(10,3))
ax.tick_params(axis='x', labelrotation=90)
ax.set_title("Histogram of bat genus")
seaborn.histplot(x="genus", data=data, ax=ax)

fig, ax = plt.subplots(figsize=(10,3))
ax.tick_params(axis='x', labelrotation=90)
ax.set_title("Histogram of bat sex")
seaborn.histplot(x="sex", data=data, ax=ax)

fig, ax = plt.subplots(figsize=(10,3))
ax.tick_params(axis='x', labelrotation=90)
ax.set_title("Histogram of country of observation")
seaborn.histplot(x="country", data=data, ax=ax)

fig, ax = plt.subplots(figsize=(10,3))
ax.tick_params(axis='x', labelrotation=90)
ax.set_title("Histogram of call type")
seaborn.histplot(x="call_type", data=data, ax=ax)

fig, ax = plt.subplots(figsize=(10,3))
ax = seaborn.histplot(x="audio_length", data=data)
ax.set_title("Histogram of audio fragment duration")
ax.set_xlabel("Audio duration (s)")