In [2]:
import numpy as np
from scipy.io import wavfile

# =======================================
# Constants
# =======================================

t_per_sample = 0.00002267573

sample_T = 1 / 44100 # Period of sample (in s)

# =======================================
# Parameters to tune for audio processing
# =======================================

g = 0.5 # Amplitude multiplier

# We use cartesian coordinates to model the motion of a point audio source
# around a listener centered around the origin

# The left ear is located at (-10, 0), the right ear at (10, 0)

left_ear_pos = np.array([-10, 0])
right_ear_os = np.array([10, 0])

'''
Adds a spatial characteristic to an input .wav audio file

param[in] input_path : path to the input audio file
param[out] output_path : paht to the output audio file
param[in] span : [0, 100] with 100 representing an audio signal coming from the right and 0 representing an audio signal
                coming from the left
'''
# TO DO: Change parameter to accept position of audio signal in space 
# Frequency attenuation 
# Hardcode left and right channel positions relative to origin

def head_transfer_function(input_path : str, output_path : str, span : int):
    span = 0 if span < 0 else min(span, 100)

    sample_rate, data = wavfile.read(input_path)
    audio_data = np.array(data, dtype=np.float32)

    if data.dtype == np.int16:
        audio_data = audio_data / 32768.0  # 16-bit audio normalization
    elif data.dtype == np.int32:
        audio_data = audio_data / 2147483648.0  # 32-bit audio normalization
    elif data.dtype == np.uint8:
        audio_data = (audio_data - 128) / 128.0  # 8-bit audio normalization

    #sample_T = 1 / sample_rate # Period of sample (in s)

    # -100 = 0.0003
    # 0 = 0
    # 100 = 0.0003

    time_delay = 0.0003 # Time delay to imitate audio traveling from one ear to the other
    sample_shift = int(time_delay / sample_T) # Number of samples we have to shift by

    right_span = span
    left_span = 100 - span

    # Channel shifts are inversely proportional to their respective channel spans
    # thus directly proportional with the span of the opposite channel

    if span >= 50:
        right_sample_shift = 0
        left_sample_shift = int(0.0003 * ((span - 50) / 50) / sample_T)
    else:
        right_sample_shift = int(int(0.0003 * ((50 - span) / 50) / sample_T))
        left_sample_shift = 0

    left_sample_padding = right_sample_shift
    right_sample_padding = left_sample_shift

    left_amp_mult = g + (1 - g) * (left_span / 100)
    right_amp_mult = g + (1 - g) * (right_span / 100)

    '''left_amp_mult = 1 if span <= 100 else (g * (100 / (span - 100)))
    right_amp_mult = 1 if span >= 100 else (g * (100 / (100 - span)))'''

    print(f"span : left shift : right shift | {span, left_sample_shift, right_sample_shift}")
    print(f"left_amp_mult {left_amp_mult}, right_amp_mult {right_amp_mult}")

    left_channel_padded = np.array([0. for _ in range(left_sample_shift)] + [audio_data[i] for i in range(len(audio_data))] + [0. for _ in range(left_sample_padding)], 
                                   dtype=np.float32)
    left_channel_padded *= left_amp_mult
    right_channel_padded = np.array([0. for _ in range(right_sample_shift)] + [audio_data[i] for i in range(len(audio_data))] + [0. for _ in range(right_sample_padding)], 
                                   dtype=np.float32)
    right_channel_padded *= right_amp_mult

    return (left_channel_padded, right_channel_padded, sample_rate)
    

def get_span_from_point(point : np.ndarray):
    point = np.array(point)
    #dist = np.linalg.norm(left_ear_pos - point)
    r_dist = np.linalg.norm(right_ear_os - point)
    l_dist = np.linalg.norm(left_ear_pos - point)

    diff = abs(r_dist - l_dist)
    diff = min(diff, 20)

    if r_dist <= l_dist:
        span = 50 + 50 * (diff / 20)
    else:
        span = 50 - (50 * (diff / 20))
    
    return span


In [9]:
# Returns a list of points representing the motion of a particle based on the length of time spanned by an input audio file
def model_particle_orbit(input_path : str):
    t = 0
    pos = []

    _, data = wavfile.read(input_path)
    wav = np.array(data, dtype=np.float32)

    for _ in wav:
        # Our point does a complete rotation in 2 second
        point = (-10 * np.sin(2 * np.pi * t / 2), 10 * np.cos(2 * np.pi * t / 2))
        pos.append(point)
        t += t_per_sample  
    return pos

In [5]:
def spatialize_from_point(input_path : str, output_path : str, point : list):
    try:
        _, _ = point
    except ValueError:
        print(f"ERROR: Point passed into spatialize_from_point is of invalid format. Expected (x, y). Received: {point}")
        return
    
    span = get_span_from_point(point)

    left_channel_padded, right_channel_padded, sample_rate = head_transfer_function(input_path, output_path, span)

    tone_y_stereo=np.vstack((left_channel_padded, right_channel_padded))
    tone_y_stereo=tone_y_stereo.transpose()
    wavfile.write(output_path, sample_rate, tone_y_stereo)

def spatialize_from_points(input_path : str, output_path : str, points : list[list]):
    if len(points) == 0:
        print("ERROR: List of points contains no entries")
        return

    left_channel = []
    right_channel = []
    for point in points:
        try:
            _, _ = point
        except ValueError:
            print(f"ERROR: Point passed into spatialize_from_point is of invalid format. Expected (x, y). Received: {point}")
            return
        
        span = get_span_from_point(point)

        left_channel_padded, right_channel_padded, sample_rate = head_transfer_function(input_path, output_path, span)


        #left_channel = np.concatenate((left_channel, left_channel_padded))
        #right_channel = np.concatenate((right_channel, right_channel_padded))

        left_channel.append(left_channel_padded)
        right_channel.append(right_channel_padded)

        #print(sample_rate)

        print(f"Len of both channels (left, right) : {len(left_channel)}, {len(right_channel)}")
    
    #print(right_channel)

    # TO DO: Figure out why accumulating into two channels with np.concatenate was not working

    left_channel_final = []
    for audio in left_channel:
        for sample in audio:
            left_channel_final.append(sample)

    right_channel_final = []
    for audio in right_channel:
        for sample in audio:
            right_channel_final.append(sample)
    
    left_channel_final = np.array(left_channel_final)
    right_channel_final = np.array(right_channel_final)
    
    tone_y_stereo=np.vstack((left_channel_final, right_channel_final))
    tone_y_stereo=tone_y_stereo.transpose()
    wavfile.write(output_path, 44100, tone_y_stereo)

In [77]:
class RingBuf:
    # Constructor (initializer)
    def __init__(self):
        self.buf = [0. for _ in range(14)]
        self.l_ptr = 0
        self.r_ptr = 0
        self.w_ptr = 0

    def write(self, val : float):
        self.buf[self.w_ptr] = val
        self.w_ptr = (self.w_ptr + 1) % len(self.buf)
        self.r_ptr = (self.r_ptr + 1) % len(self.buf)
        self.l_ptr = (self.l_ptr + 1) % len(self.buf)
    
    def read(self, span):
        #span = get_span_from_point(pos)
        left_sample_shift = 0
        right_sample_shift = 0
        if span >= 50:
            left_sample_shift = int(0.0003 * ((span - 50) / 50) / sample_T)
            self.r_ptr = (self.w_ptr - 1) % len(self.buf)
            self.l_ptr = (self.w_ptr - left_sample_shift - 1) % len(self.buf)
        else:
            right_sample_shift = int(0.0003 * ((50 - span) / 50) / sample_T)
            self.l_ptr = (self.w_ptr - 1) % len(self.buf)
            self.r_ptr = (self.w_ptr - right_sample_shift - 1) % len(self.buf)
        
        #print(f"L ({left_sample_shift}) | R ({right_sample_shift})")
        #print(f"L_ptr ({self.l_ptr}) | R_ptr ({self.r_ptr})")
        return (self.buf[self.l_ptr], self.buf[self.r_ptr])



        

In [76]:
rb = RingBuf()
for i in range(14):
    rb.write(float(i))
rb.read(0)


L_ptr (13) | R_ptr (0)


(13.0, 0.0)

In [79]:
def spatialize_over_time(input_path : str, output_path : str, pos_lst : list):
    sample_rate, data = wavfile.read(input_path)
    audio_data = np.array(data, dtype=np.float32)

    if data.dtype == np.int16:
        audio_data = audio_data / 32768.0  # 16-bit audio normalization
    elif data.dtype == np.int32:
        audio_data = audio_data / 2147483648.0  # 32-bit audio normalization
    elif data.dtype == np.uint8:
        audio_data = (audio_data - 128) / 128.0  # 8-bit audio normalization

    sample_T = 1 / sample_rate # Period of sample (in s)

    # -100 = 0.0003
    # 0 = 0
    # 100 = 0.0003

    #print(audio_data.shape)

    # Known Issue: Sometimes audio data will duplicate samples into two columns

    if(len(audio_data.shape) > 1):
        print("whoopsy daisy")
        audio_data = audio_data[:, 0:1]
        audio_data = audio_data.flatten()

    #print(data)
    #print(audio_data.shape)
    #return
    #return

    channel_buf = RingBuf()
    left = []
    right = []

    for i in range(len(audio_data)):
        pos = pos_lst[i]
        span = get_span_from_point(pos)
        right_span = span
        left_span = 100 - span

        #print(audio_data[i])

        channel_buf.write(audio_data[i])
        l, r = channel_buf.read(span)

        #left.append(audio_data[i])
        #right.append(audio_data[i])
    
        left_amp_mult = g + (1 - g) * (left_span / 100)
        right_amp_mult = g + (1 - g) * (right_span / 100)

        #print(f"{l} | {r}")
        left.append(float(l * left_amp_mult))
        right.append(float(r * right_amp_mult))
        # iterate through middle channel
        # Add middle channel to rin buffer
    

    left_channel = np.array(left, dtype=np.float32)
    right_channel = np.array(right, dtype=np.float32)

    assert len(left_channel) == len(right_channel)

    tone_y_stereo=np.vstack((left_channel, right_channel))
    tone_y_stereo=tone_y_stereo.transpose()
    wavfile.write(output_path, 44100, tone_y_stereo)

    '''time_delay = 0.0003 # Time delay to imitate audio traveling from one ear to the other
    sample_shift = int(time_delay / sample_T) # Number of samples we have to shift by

    right_span = span
    left_span = 100 - span

    # Channel shifts are inversely proportional to their respective channel spans
    # thus directly proportional with the span of the opposite channel

    if span >= 50:
        right_sample_shift = 0
        left_sample_shift = int(0.0003 * ((span - 50) / 50) / sample_T)
    else:
        right_sample_shift = int(int(0.0003 * ((50 - span) / 50) / sample_T))
        left_sample_shift = 0

    left_sample_padding = right_sample_shift
    right_sample_padding = left_sample_shift

    left_amp_mult = g + (1 - g) * (left_span / 100)
    right_amp_mult = g + (1 - g) * (right_span / 100)'''

In [15]:
points = model_particle_orbit("long_tone.wav")

In [16]:
print(len(points))

2725888


In [62]:
spatialize_over_time("long_tone.wav", "tone_over_time.wav", points)

whoopsy daisy


In [81]:
head_transfer_function("drum.wav", "experiment.wav", 20)

span : left shift : right shift | (20, 2, 10)
left_amp_mult 0.9, right_amp_mult 0.6


(array([ 0.        ,  0.        , -0.00101624, ...,  0.        ,
         0.        ,  0.        ], dtype=float32),
 array([ 0.        ,  0.        ,  0.        , ..., -0.00032959,
         0.        ,  0.        ], dtype=float32),
 44100)

In [169]:
spatialize_from_point("drum.wav", "experiment.wav", [100, 10])

spatialize_from_point("drum.wav", "left.wav", [-20, 0])
spatialize_from_point("drum.wav", "slightly_right.wav", [1, 10])

spatialize_from_point("drum.wav", "righter.wav", [3,10])

spatialize_from_point("drum.wav", "distant.wav", [0,-20])


span : left shift : right shift | (99.74939697624609, 13, 0)
left_amp_mult 0.5012530151187695, right_amp_mult 0.9987469848812305
span : left shift : right shift | (0.0, 0, 13)
left_amp_mult 1.0, right_amp_mult 0.5
span : left shift : right shift | (53.53111175061199, 0, 0)
left_amp_mult 0.73234444124694, right_amp_mult 0.76765555875306
span : left shift : right shift | (60.48665962780756, 2, 0)
left_amp_mult 0.6975667018609621, right_amp_mult 0.8024332981390379
span : left shift : right shift | (50.0, 0, 0)
left_amp_mult 0.75, right_amp_mult 0.75


In [170]:
spatialize_from_point("defaultAudio.wav", "beep_right.wav", [10,0])

span : left shift : right shift | (100.0, 13, 0)
left_amp_mult 0.5, right_amp_mult 1.0


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (31555,) + inhomogeneous part.

In [142]:
points = [[10, 0], [0,0], [-10,0], [0,0]]
    
spatialize_from_points("drum.wav", "concat_sounds.wav", points)

span : left shift : right shift | (100.0, 13, 0)
left_amp_mult 0.5, right_amp_mult 1.0
Len of both channels (left, right) : 1, 1
span : left shift : right shift | (50.0, 0, 0)
left_amp_mult 0.75, right_amp_mult 0.75
Len of both channels (left, right) : 2, 2
span : left shift : right shift | (0.0, 0, 13)
left_amp_mult 1.0, right_amp_mult 0.5
Len of both channels (left, right) : 3, 3
span : left shift : right shift | (50.0, 0, 0)
left_amp_mult 0.75, right_amp_mult 0.75
Len of both channels (left, right) : 4, 4


In [177]:
def hrt_test(input_path : str, output_path : str):
    span = 100


    sample_rate, data = wavfile.read(input_path)
    audio_data = np.array(data, dtype=np.float32)

    if data.dtype == np.int16:
        audio_data = audio_data / 32768.0  # 16-bit audio normalization
    elif data.dtype == np.int32:
        audio_data = audio_data / 2147483648.0  # 32-bit audio normalization
    elif data.dtype == np.uint8:
        audio_data = (audio_data - 128) / 128.0  # 8-bit audio normalization

    sample_T = 1 / sample_rate # Period of sample (in s)

    # -100 = 0.0003
    # 0 = 0
    # 100 = 0.0003

    time_delay = 0.0003 # Time delay to imitate audio traveling from one ear to the other
    sample_shift = int(time_delay / sample_T) # Number of samples we have to shift by

    right_span = span
    left_span = 100 - span

    # Channel shifts are inversely proportional to their respective channel spans
    # thus directly proportional with the span of the opposite channel

    if span >= 50:
        right_sample_shift = 0
        left_sample_shift = int(0.0003 * ((span - 50) / 50) / sample_T)
    else:
        right_sample_shift = int(int(0.0003 * ((50 - span) / 50) / sample_T))
        left_sample_shift = 0

    left_sample_padding = right_sample_shift
    right_sample_padding = left_sample_shift

    left_amp_mult = g + (1 - g) * (left_span / 100)
    right_amp_mult = g + (1 - g) * (right_span / 100)

    '''left_amp_mult = 1 if span <= 100 else (g * (100 / (span - 100)))
    right_amp_mult = 1 if span >= 100 else (g * (100 / (100 - span)))'''

    print(f"span : left shift : right shift | {span, left_sample_shift, right_sample_shift}")
    print(f"left_amp_mult {left_amp_mult}, right_amp_mult {right_amp_mult}")

    left_channel_padded = np.array([0. for _ in range(left_sample_shift)] + [audio_data[i] for i in range(len(audio_data))] + [0. for _ in range(left_sample_padding)], 
                                   dtype=np.float32)
    left_channel_padded *= left_amp_mult
    right_channel_padded = np.array([0. for _ in range(right_sample_shift)] + [audio_data[i] for i in range(len(audio_data))] + [0. for _ in range(right_sample_padding)], 
                                   dtype=np.float32)
    right_channel_padded *= right_amp_mult

    # Experimenting around with low pass filter on left channel

    #bin_size = int(10 * (span - 50) / 50)
    bin_size = 5
    left_channel_final = []


    for i in range(len(left_channel_padded)):
        left_i = max(0, i - bin_size)
        s = sum(left_channel_padded[left_i : i + 1])
        left_channel_final.append(s / (i - left_i) + 1)        
        
    left_channel_final = np.array(left_channel_final, dtype=np.float32)
    
    print(f"len of left channel {len(left_channel_final)} : len of right channel {len(right_channel_padded)}")

    tone_y_stereo=np.vstack((left_channel_final, right_channel_padded))
    tone_y_stereo=tone_y_stereo.transpose()
    wavfile.write(output_path, 44100, tone_y_stereo)

In [178]:
hrt_test("drum.wav", "low_pass.wav")
#hrt_test("defaultAudio.wav", "low_pass_beep.wav")

span : left shift : right shift | (100, 13, 0)
left_amp_mult 0.5, right_amp_mult 1.0
len of left channel 74899 : len of right channel 74899


In [57]:
spatialize_over_time("beep-02.wav", "beep_out.wav", points)

[23045 23036 23042 ...  -513  -255     0]
(13416,)


In [80]:
spatialize_over_time("mid_tone.wav", "mid_ex_out.wav", points)

whoopsy daisy
