- Run m times: Run dfft for all dictionaries on target sound to get convolution
    - Get highest value from convolution, add corresponding dictionary to solution
    - Repeat until full target sound is covered
    - Subtract solution from target sound

In [None]:
sound1 = np.array([1, 2])
sound2 = np.array([2, 2, 3, 4])
sound3 = np.array([1, 3, 2, 4])
sound_dict = [sound1, sound2, sound3]
D = len(sound_dict)
l_max = 4
conv_matrix_width = 2 * l_max - 1

sound_fts = {}
sound_fts_reverse = {}
for i, sound in enumerate(sound_dict):
    sound_fts[i] = fft(sound, conv_matrix_width)
    sound_fts_reverse[i] = fft(sound[::-1], conv_matrix_width)

generate_convolution_dict_parallelized()

In [76]:
import numpy as np
import time
import matplotlib.pyplot as plt
from scipy.fftpack import fft, ifft
from scipy.io import wavfile
from scipy.signal import stft
import sounddevice as sd
from scipy.signal import istft
import os

np.random.seed(0)

In [77]:
def fft_convolution(T, d):
    N, P = len(T), len(d)
    T_fourier = fft(T, N + P - 1)
    d_fourier = fft(d, N + P - 1)
    result = np.real(ifft(T_fourier * d_fourier))
    return result


def generate_convolution(target):
    convolution_matrix = np.zeros((D, N + l_max - 1))
    for i, sound in enumerate(sound_dict):
        convolution_result = fft_convolution(sound, target)
        result_length = len(convolution_result)
        # Place the convolution result in row i, padded with zeros on the right
        convolution_matrix[i, :result_length] = convolution_result
    return convolution_matrix


def get_largest_value_index(matrix):
    i, j = np.unravel_index(np.argmax(matrix, axis=None), matrix.shape)
    return i, j, matrix[i, j]


def generate_simultaneity_mask(i):
    l = lengths[i]
    mask = np.zeros((D, l + l_max - 1), dtype=bool)
    len_broadcast = np.array(lengths)[:, np.newaxis]
    range_array = np.arange(l + l_max - 1)[np.newaxis, :]
    mask = range_array < (len_broadcast + l - 1)
    return mask


def remove_simultaneous_sounds(matrix, i, j):
    mask = generate_simultaneity_mask(i)
    starting_point = max(j - lengths[i] + 1, 0)
    end_point = min(mask.shape[1] + starting_point, matrix.shape[1])
    matrix[:, starting_point:end_point] *= ~mask[:, : end_point - starting_point]
    return matrix

In [78]:
# load target
N = 105000
_, target = wavfile.read("samples/target.wav")
target = target[:N]
target = target.astype(np.float64)
target_norm = np.linalg.norm(target)
target /= target_norm

# load roughly 0.1 seconds of every sound in sample folder
files = os.listdir("samples/")
sound_dict = []
for file in files:
    if file != "target.wav":
        _, sound = wavfile.read("samples/" + file)
        sound = sound[10500 : 21000 + np.random.randint(-2000, 5000)]
        sound = sound.astype(np.float64)
        norm = np.linalg.norm(sound)
        if norm > 0:
            sound /= norm
        sound_dict.append(sound)

D = len(sound_dict)
sound_dict = sorted(sound_dict, key=lambda x: len(x))
lengths = [len(sound) for sound in sound_dict]
l_max = max(lengths)

In [67]:
def algo(original_target, sound_dict, iterations=10):
    target = original_target.copy()
    result = np.zeros(N)
    perm_mask = np.zeros((D, l_max + N - 1), dtype=bool)
    max_val = np.inf
    convolution_matrix_no_mask = generate_convolution(target)
    for _ in range(iterations):
        print("iteration ", _)
        convolution_matrix = convolution_matrix_no_mask.copy()
        while np.count_nonzero(convolution_matrix) > 0 and max_val > 0.01:
            i, j, max_val = get_largest_value_index(convolution_matrix)
            l = lengths[i]
            # clipping to avoid out of bounds:
            start_time, end_time = max(j - l + 1, 0), min(j + 1, N)
            clip_start = max(-(j - l + 1), 0)
            clip_end = min(clip_start + end_time - start_time, l)
            matched_sound = max_val * sound_dict[i][clip_start:clip_end]
            result[start_time:end_time] += matched_sound
            target[start_time:end_time] -= matched_sound
            perm_mask[i, start_time:end_time] = True
            # print("nonzero vals: ", np.count_nonzero(convolution_matrix))
            # print("max val: " + str(np.round(max_val, 2)))
            # print("i, j: ", i, j)
            convolution_matrix = remove_simultaneous_sounds(convolution_matrix, i, j)
        convolution_matrix_no_mask -= generate_convolution(result)
        convolution_matrix_no_mask *= ~perm_mask
    return result * target_norm

In [None]:
result = algo(target, sound_dict, 200)
# save result
# wavfile.write("result200.wav", 44100, result.astype(np.int16))

In [13]:
# np use 2 significant digits, remove scientific notation
np.set_printoptions(suppress=True, precision=2)

In [93]:
# use small simulated data with randomness
N = 4
target = np.random.randint(0, 100, N)
sound_dict = [np.random.randint(0, 100, 4 - i) for i in [1, 2, 3]]
sound_dict = [sound / np.linalg.norm(sound) for sound in sound_dict]

D = len(sound_dict)
sound_dict = sorted(sound_dict, key=lambda x: len(x))
lengths = [len(sound) for sound in sound_dict]
l_max = max(lengths)
result = np.zeros(N)

print("target: ", target)
print("sound_dict: ", sound_dict)

convolution_matrix = generate_convolution(target)
print("convolution_matrix: \n", convolution_matrix)
i, j, max_val = get_largest_value_index(convolution_matrix)
l = lengths[i]
simultaneity_mask = generate_simultaneity_mask(i)
convolution_matrix = remove_simultaneous_sounds(convolution_matrix, i, j)
print("convolution_matrix: \n", convolution_matrix)
print("i: ", i)
print("j: ", j)
print("max_val: ", max_val)

print("target: ", target)
target = remove_sound_from_target(
    target, max_val * sound_dict[i][-min(0, i - l + 1) : l], j - l + 1
)
print("target: ", target)
result[j - l + 1 : j + 1 + min(0, i - l + 1)] += (
    max_val * sound_dict[i][-min(0, i - l + 1) : l]
)
print("result: ", result)

target:  [59 78 15  4]
sound_dict:  [array([1.]), array([0.7 , 0.71]), array([0.25, 0.6 , 0.76])]
convolution_matrix: 
 [[59.   78.   15.    4.    0.    0.  ]
 [41.43 96.78 66.07 13.49  2.85  0.  ]
 [14.94 55.08 95.33 69.26 13.79  3.04]]
convolution_matrix: 
 [[ 0.    0.   15.    4.    0.    0.  ]
 [ 0.    0.    0.   13.49  2.85  0.  ]
 [ 0.    0.    0.    0.   13.79  3.04]]
i:  1
j:  1
max_val:  96.77731149169159
target:  [59 78 15  4]
target:  [-8  9 15  4]
result:  [67.95 68.91  0.    0.  ]


In [77]:
# generate 3x3 matrix of random integers
m = np.random.randint(0, 100, 9)
m = m.reshape((3, 3))
# matrix of 0s
z = np.zeros((3, 2), dtype=bool)
print(z)
print(m)
m[:, 1:] *= z
m

[[False False]
 [False False]
 [False False]]
[[78 18 96]
 [91 36 20]
 [ 3 18 77]]


array([[78,  0,  0],
       [91,  0,  0],
       [ 3,  0,  0]])

In [58]:
"""# Generate some test data
D = 5
N = 10
matrix1 = np.random.rand(D, N)
lengths = np.array([1, 2, 2, 4, 5, 5])

# Parameters for remove_simultaneous_sounds functions
i = 2
j = 5

print(matrix1)
print("lengths: ", lengths)
print("i: ", i)
print("j: ", j)
print(remove_simultaneous_sounds(matrix1, i, j))"""

'# Generate some test data\nD = 5\nN = 10\nmatrix1 = np.random.rand(D, N)\nlengths = np.array([1, 2, 2, 4, 5, 5])\n\n# Parameters for remove_simultaneous_sounds functions\ni = 2\nj = 5\n\nprint(matrix1)\nprint("lengths: ", lengths)\nprint("i: ", i)\nprint("j: ", j)\nprint(remove_simultaneous_sounds(matrix1, i, j))'