In [1]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.8.0-cp38-cp38-win_amd64.whl (438.0 MB)
Collecting gast>=0.2.1
  Downloading gast-0.5.3-py3-none-any.whl (19 kB)
Collecting tensorboard<2.9,>=2.8
  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
Collecting keras<2.9,>=2.8.0rc0
  Downloading keras-2.8.0-py2.py3-none-any.whl (1.4 MB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.24.0-cp38-cp38-win_amd64.whl (1.5 MB)
Collecting keras-preprocessing>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
Collecting astunparse>=1.6.0
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting absl-py>=0.4.0
  Downloading absl_py-1.0.0-py3-none-any.whl (126 kB)
Collecting protobuf>=3.9.2
  Downloading protobuf-3.19.4-cp38-cp38-win_amd64.whl (895 kB)
Collecting libclang>=9.0.1
  Downloading libclang-13.0.0-py2.py3-none-win_amd64.whl (13.9 MB)
Collecting grpcio<2.0,>=1.24.3
  Downloading grpcio-1.44.0-cp38-cp38

In [2]:
!pip install solos

Collecting solos
  Downloading solos-0.4.1-py3-none-any.whl (65 kB)
Collecting youtube-dl
  Downloading youtube_dl-2021.12.17-py2.py3-none-any.whl (1.9 MB)
Collecting googledrivedownloader
  Downloading googledrivedownloader-0.4-py2.py3-none-any.whl (3.9 kB)
Collecting Fire
  Downloading fire-0.4.0.tar.gz (87 kB)
Building wheels for collected packages: Fire
  Building wheel for Fire (setup.py): started
  Building wheel for Fire (setup.py): finished with status 'done'
  Created wheel for Fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115928 sha256=6e08bcb59399806145c16cf0e7d2fd73e505fe50711c178a67878f03719e534f
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\1f\10\06\2a990ee4d73a8479fe2922445e8a876d38cfbfed052284c6a1
Successfully built Fire
Installing collected packages: youtube-dl, googledrivedownloader, Fire, solos
Successfully installed Fire-0.4.0 googledrivedownloader-0.4 solos-0.4.1 youtube-dl-2021.12.17


In [3]:
!pip install soundfile

Collecting soundfile
  Downloading SoundFile-0.10.3.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl (689 kB)
Installing collected packages: soundfile
Successfully installed soundfile-0.10.3.post1


In [7]:
from tensorflow import keras
import tensorflow as tf
import json
from pathlib import Path
import random
import numpy as np
import os
import soundfile as sf

In [None]:
class DataManager:
    def __init__(self, data_dir, mix_no_min=2, training=True, mix_sources_max_no=4, mix_no_max=7, train_test_split=0.8):

        self.data_dir = data_dir
        self.type = training
        self.multimodal = False
        self.mix_no_min = mix_no_min
        self.mix_no_max = mix_no_max
        self.mix_sources_max_no = mix_sources_max_no
        self.train_test_split = train_test_split

        self.n_instruments = 13
        self.sources = ['Bassoon', 'Cello', 'Clarinet', 'DoubleBass', 'Flute',
                        'Horn', 'Oboe', 'Saxophone', 'Trombone', 'Trumpet', 'Tuba', 'Viola', 'Violin']


        self.source_weights = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        self.down_freq = 8000 # Downsample to this frequency
        self.audio_len = 48000 # No of audio samples in each 'snapshot'
        self.ft_window_size = 1022
        self.ft_hop_size = 256
        self.epsilon = 1e-9
        self.log_sample_n = 256 # TODO No idea what this does, I'll figure it out later
        self.segment_len = 256
        self.energy_predicted_sum = 1e-4
        self.dummy_spectrogram_size = (14, 2, 512, 256) # For tests

        self.metadata = self.load_meta()
        self.window = tf.signal.hann_window(self.ft_hop_size)

        self.data = {}
        self.load_data()

        # We will be taking the results seen in papers at face value,
        # and using normalised linearised Fourier Transform Spectrogram, and Wiener post processing, alongside ratio masks and L2 loss

    def increase_n_mix(self):
        if self.n_mix_max < self.mix_sources_max_no:
            self.n_mix_max += 1
            return True
        return False


    def load_meta(self):
        suffix = "*.wav" # Audio file type
        meta = dict([(source, sorted(list((Path(self.data_dir) / source).glob(suffix)))) for source in self.sources])
        # A little hard to parse, but here we go
        # Makes "meta" a dict containing a tuple. The first element is the type of source, i.e. viola, trumpet, etc
        # The second is a sorted list of all files in the data directory that match the pattern of source.wav
        # (.glob is an operation that yields all file paths matching the pattern)

        for source in meta:
            source_len = len(meta[source])
            if self.type: # True means that it's training
                meta[source] = meta[source_len][:int(self.train_test_split * source_len)]
            else:
                meta[source] = meta[source_len][int(self.train_test_split * source_len) : source_len]
            # Literally just a slightly clumsy train test split

            for path_index, path in enumerate(meta[source]):
                meta[source][path_index] = (path, sf.info(path.as_posix()).frames)
        print(meta) # For debugging

        return meta


    def load_data(self):
        for source in self.metadata:
            temp = []
            for filename, length in self.metadata[source]:
                temp.append(tf.constant(
                    sf.read(filename.asposix())[0]
                ))
                print(filename.asposix()) # For debugging
            self.data[source] = temp.copy()
        print(self.data)

    def __len__(self):
        return 8000 if self.type == "train" else 2000
        # Literally just the size of the thing

