In [1]:
%pip install torchaudio

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.11 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
import render
import rooms.dataset
import evaluate
import torch
import sounddevice as sd
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import metrics

In [2]:
torch.set_default_dtype(torch.float32)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
D_0 = rooms.dataset.dataLoader("classroomBase")

In [4]:
D_1 = rooms.dataset.dataLoader("classroomAddedPanel")

## Original audio

In [6]:
sample_rate = 48000

sd.play(D_0.music_dls[0][0], sample_rate)

sd.wait()

## Base

In [7]:
print("Source position: ", D_0.speaker_xyz)
print("Listener position: ", D_0.xyzs[0])

Source position:  [3.5838 5.723  1.2294]
Listener position:  [2.4241125 0.581025  0.32     ]


In [4]:
L_0 = render.get_listener(source_xyz=D_0.speaker_xyz, listener_xyz = D_0.xyzs[0], surfaces=D_0.all_surfaces, 
                                                speed_of_sound=D_0.speed_of_sound,
                                                parallel_surface_pairs=D_0.parallel_surface_pairs,
                                                max_order=D_0.max_order, max_axial_order=D_0.max_axial_order)

Considered Paths:	59050
Total Considered Paths, after Axial:	59080
Valid Paths:	851


In [9]:
# Cuda check
print(torch.version.cuda)
print(torch.cuda.is_available())


12.1
False


In [5]:
R_0 = render.Renderer(n_surfaces=len(D_0.all_surfaces))

In [6]:
# Use many GPUs if available
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs")
    R_0 = nn.DataParallel(R_0).module

In [7]:
pt_file = torch.load('./models/classroomBase/weights.pt', map_location=device)
R_0.energy_vector = nn.Parameter(pt_file['model_state_dict']['energy_vector'])
R_0.source_response = nn.Parameter(pt_file['model_state_dict']['source_response'])
R_0.directivity_sphere = nn.Parameter(pt_file['model_state_dict']['directivity_sphere'])
R_0.decay = nn.Parameter(pt_file['model_state_dict']['decay'])
R_0.RIR_residual = nn.Parameter(pt_file['model_state_dict']['RIR_residual'])
R_0.spline_values = nn.Parameter(pt_file['model_state_dict']['spline_values'])

R_0.bp_ord_cut_freqs.to(device) 

tensor([  70.,  400.,  800., 1000., 1300., 2000.], device='cuda:0',
       grad_fn=<ToCopyBackward0>)

In [None]:
R_0.mic_180_loss = {1000: 0, 5000:2, 10000: 5, 15000:8, 20000:14 } #how many decibels are lost at 180 degrees
#R_0.mic_180_loss = {1000: 0, 2000:0, 4000: 0, 6000:0, 16000:0, 20000:0 }

R_0.mic_0_gain = {1000: 1, 5000: 1, 10000: 1, 15000: 0.8, 20000: 0.5} #amplitude at 0 degrees (normalized)
# R_0.mic_0_gain = {1000: 1, 2000: 1, 4000: 0.9, 6000: 0.87, 16000: 1.07, 20000: 1.48}

R_0.mic_direction = np.array([0,1,0]) # direction where the head opf the microphone is oriented

R_0.render_early_microphone_response(L_0)

mic_response tensor([[1.0000, 1.0000, 1.0000,  ..., 0.4333, 0.4333, 0.4333],
        [1.0000, 1.0000, 1.0000,  ..., 0.1122, 0.1122, 0.1122],
        [1.0000, 1.0000, 1.0000,  ..., 0.4622, 0.4622, 0.4622],
        ...,
        [1.0000, 1.0000, 1.0000,  ..., 0.2325, 0.2325, 0.2325],
        [1.0000, 1.0000, 1.0000,  ..., 0.2316, 0.2316, 0.2316],
        [1.0000, 1.0000, 1.0000,  ..., 0.2319, 0.2319, 0.2319]],
       device='cuda:0')


tensor([-1.9782e-09,  2.2161e-09,  1.4603e-09,  ..., -1.1596e-18,
         3.4669e-18,  1.1294e-18], device='cuda:0', grad_fn=<MulBackward0>)

In [None]:
R_0.cardioid_exponents = {125: 0.3, 1000:0.4, 8000: 0.4, 16000:1.2} #exponent that describes the cardioid at each frequency
#R_0.cardioid_exponents = {125: 0.3, 1000:0.4, 8000: 0.4, 16000:1.2} 

R_0.mic_0_gain = {125: 0.84, 1000: 1, 8000: 1.25, 16000: 0.97} #amplitude at 0 degrees (normalized)
# R_0.mic_0_gain = {1000: 1, 2000: 1, 4000: 0.9, 6000: 0.87, 16000: 1.07, 20000: 1.48}

R_0.mic_direction = np.array([-0.2168, -0.9613  , -0.1700]) # direction where the head opf the microphone is oriented


R_0.render_early_cardioid(L_0)

direction tensor([-0.2168, -0.9613, -0.1700], dtype=torch.float64)
direction tensor([-0.1791,  0.9737, -0.1405], dtype=torch.float64)
direction tensor([-0.1201, -0.9883, -0.0942], dtype=torch.float64)
direction tensor([-0.2111, -0.9359,  0.2820], dtype=torch.float64)
direction tensor([-0.1763, -0.7816, -0.5984], dtype=torch.float64)
direction tensor([ 0.7548, -0.6460, -0.1142], dtype=torch.float64)
direction tensor([-0.8447, -0.5270, -0.0932], dtype=torch.float64)
direction tensor([-0.0551, -0.9975, -0.0432], dtype=torch.float64)
direction tensor([-0.1073,  0.9907, -0.0841], dtype=torch.float64)
direction tensor([-0.1191, -0.9801,  0.1591], dtype=torch.float64)
direction tensor([-0.1116, -0.9187, -0.3789], dtype=torch.float64)
direction tensor([ 0.5309, -0.8436, -0.0804], dtype=torch.float64)
direction tensor([-0.6518, -0.7550, -0.0719], dtype=torch.float64)
direction tensor([-0.1759,  0.9560,  0.2350], dtype=torch.float64)
direction tensor([-0.1399, -0.6204, -0.7717], dtype=torch.floa

tensor([-2.1748e-10,  3.3536e-10,  1.5356e-10,  ...,  1.5602e-18,
         1.7649e-18, -3.4684e-19], device='cuda:0', grad_fn=<MulBackward0>)

In [9]:
Early = R_0.render_early(L_0)

In [10]:
RIR_0 = R_0.render_RIR(L_0)

In [11]:
loss_decay= metrics.training_loss_with_decay(Early, RIR_0)
loss_decay

D: tensor([1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0002e+00, 1.2574e+00,
        3.0009e+00, 1.9871e+00, 1.3204e+00, 1.1892e+00, 1.3008e+00, 1.2835e+00,
        1.2619e+00, 1.2247e+00, 1.0958e+00, 1.1185e+00, 1.1818e+00, 1.1485e+00,
        1.1001e+00, 1.0912e+00, 1.1496e+00, 1.1543e+00, 1.1415e+00, 1.6828e+00,
        1.5202e+00, 1.4540e+00, 1.2486e+00, 1.1415e+00, 1.0566e+00, 1.0690e+00,
        1.0987e+00, 1.0728e+00, 1.2703e+00, 1.1677e+00, 1.4523e+00, 1.3839e+00,
        1.4068e+00, 1.7123e+00, 1.3204e+00, 1.9973e+00, 1.6056e+00, 1.6719e+00,
        1.4616e+00, 1.2978e+00, 1.1333e+00, 1.1107e+00, 1.1738e+00, 1.0620e+00,
        1.1036e+00, 1.2398e+00, 1.5529e+00, 1.8105e+00, 1.5034e+00, 2.4998e+00,
        1.5107e+00, 2.8437e+00, 2.0318e+00, 1.1298e+00, 1.1830e+00, 1.1331e+00,
        1.0575e+00, 1.0166e+00, 1.0248e+00, 1.0460e+00, 1.0115e+00, 1.0378e+00,
        1.1868e+00, 1.4673e+00, 1.5330e+00, 1.1439e+00, 1.3134e+00, 1.2357e+00,
        1.2372e+00, 1.2893e+00, 1.113

Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.)
  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


tensor(2.5757, grad_fn=<AddBackward0>)

In [12]:
loss = metrics.training_loss(Early, RIR_0)
loss

tensor(1.7715, grad_fn=<AddBackward0>)

In [None]:
RIR_0

In [14]:
torch.cuda.empty_cache()

In [13]:
import fibonacci_utilities as fib
azimuths, elevations = fib.fibonacci_azimuths_and_elevations(3)

In [14]:
RIR_directional = R_0.render_early_directional(L_0, azimuths, elevations)
RIR_directional

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 

In [None]:
len(RIR_directional[4])

In [None]:
loss_directional = metrics.training_loss_directional(RIR_directional, RIR_directional)
loss_directional

In [13]:
RIR_by_direction_oldVersion = R_0.render_RIR_by_directions_oldVersion(L_0)

In [None]:
len(RIR_by_direction_oldVersion[1]['responses'])

In [None]:
RIR_by_direction_oldVersion[1]

In [13]:
angular_sensitivities_em64=[{'frequency_range': (20, 1000), 'angle': 90}, {'frequency_range': (1000, 5000), 'angle': 60}, {'frequency_range': (5000, 20000), 'angle': 45}]

In [None]:
loss_with_zeros = metrics.training_loss_considering_directionality(render.initialize_directional_list(angular_sensitivities_em64, 96000, device), RIR_by_direction_oldVersion)
loss_with_zeros

In [None]:
loss_with_itself = metrics.training_loss_considering_directionality(RIR_by_direction_oldVersion, RIR_by_direction_oldVersion)
loss_with_itself

In [None]:
print(RIR_0.unsqueeze(0).shape)
print(D_0.music_dls[0:1,...].shape)

In [None]:
music_0 = evaluate.render_music(RIR_0.unsqueeze(0), D_0.music_dls[0:1,...], device=device)[0][0]

In [None]:
print(music_0.shape)

In [None]:
plt.plot(music_0)
plt.title('Music')
plt.show()

In [None]:
sample_rate = 48000

sd.play(music_0, sample_rate)

sd.wait()

## added panel

In [None]:
print("Source position: ", D_1.speaker_xyz)
print("Listener position: ", D_1.xyzs[0])

In [None]:
L_1 = render.get_listener(source_xyz=D_1.speaker_xyz, listener_xyz = D_1.xyzs[0], surfaces=D_1.all_surfaces, 
                                                speed_of_sound=D_1.speed_of_sound,
                                                parallel_surface_pairs=D_1.parallel_surface_pairs,
                                                max_order=D_1.max_order, max_axial_order=D_1.max_axial_order )

In [22]:
R_1 = render.Renderer(n_surfaces=len(D_1.all_surfaces))

In [23]:
modified_energy_vector = torch.tensor([[[-0.5305, -0.4278, -0.2579, -0.3598, -1.6096, -0.7284, -1.8826,
          -1.6226, -1.5753, -1.9941],
         [ 0.5305,  0.4278,  0.2579,  0.3598,  1.6096,  0.7284,  1.8826,
           1.6226,  1.5753,  1.9941]],

        [[-2.4384,  0.8858, -0.2965, -1.5742, -0.2930, -1.9829, -1.7962,
          -1.9131, -0.0154,  0.2472],
         [ 2.4384, -0.8858,  0.2965,  1.5742,  0.2930,  1.9829,  1.7962,
           1.9131,  0.0154, -0.2472]],

        [[-0.9421, -0.6755, -3.6006, -0.7103, -0.8803, -1.3149, -2.2866,
          -2.6399, -2.8685, -2.1082],
         [ 0.9421,  0.6755,  3.6006,  0.7103,  0.8803,  1.3149,  2.2866,
           2.6399,  2.8685,  2.1082]],

        [[-4.0914, -2.7232, -0.8506, -2.3887, -3.6858, -1.3391, -2.6380,
          -1.8897, -2.1227, -3.5193],
         [ 4.0914,  2.7232,  0.8506,  2.3887,  3.6858,  1.3391,  2.6380,
           1.8897,  2.1227,  3.5193]],

        [[-1.1539, -0.4459, -0.3624, -0.7596, -0.7480, -1.5066, -0.9996,
          -2.1747, -0.6960, -2.0290],
         [ 1.1539,  0.4459,  0.3624,  0.7596,  0.7480,  1.5066,  0.9996,
           2.1747,  0.6960,  2.0290]],

        [[-1.7243, -1.1099, -0.0423, -1.1507, -1.2209, -0.4600, -2.0697,
          -2.2589, -2.4818, -1.5450],
         [ 1.7243,  1.1099,  0.0423,  1.1507,  1.2209,  0.4600,  2.0697,
           2.2589,  2.4818,  1.5450]],

        [[-7.8657, -7.9064, -4.2665, -2.9527, -0.6743, -0.7566,  1.5479,
          -1.3406, -2.4598, -2.1531],
         [ 7.8657,  7.9064,  4.2665,  2.9527,  0.6743,  0.7566, -1.5479,
           1.3406,  2.4598,  2.1531]],

        [[-4.5917, -5.2178, -4.6895, -3.5287, -3.0246,  0.2444, -1.6953,
          -1.4810, -1.9807, -1.9190],
         [ 4.5917,  5.2178,  4.6895,  3.5287,  3.0246, -0.2444,  1.6953,
           1.4810,  1.9807,  1.9190]],

        [[-6.7398, -2.8415, -3.7433, -0.8007, -2.0138, -1.9061, -2.0395,
          -2.8121, -3.1212, -2.7330],
         [ 6.7398,  2.8415,  3.7433,  0.8007,  2.0138,  1.9061,  2.0395,
          2.8121,  3.1212,  2.7330]],
           
        [[-6.7398, -2.8415, -3.7433, -0.8007, -2.0138, -1.9061, -2.0395,
          -2.8121, -3.1212, -2.7330],
         [ 6.7398,  2.8415,  3.7433,  0.8007,  2.0138,  1.9061,  2.0395,
          2.8121,  3.1212,  2.7330]]])

In [24]:
pt_file = torch.load('./models/classroomBase/weights.pt', map_location=device)
R_1.energy_vector = nn.Parameter(modified_energy_vector)
R_1.source_response = nn.Parameter(pt_file['model_state_dict']['source_response'])
R_1.directivity_sphere = nn.Parameter(pt_file['model_state_dict']['directivity_sphere'])
R_1.decay = nn.Parameter(pt_file['model_state_dict']['decay'])
R_1.RIR_residual = nn.Parameter(pt_file['model_state_dict']['RIR_residual'])
R_1.spline_values = nn.Parameter(pt_file['model_state_dict']['spline_values'])

R_1.bp_ord_cut_freqs.to(device) 

In [25]:
torch.cuda.empty_cache()

In [None]:
RIR_1 = R_1.render_RIR(L_1)

In [None]:
print(RIR_1.unsqueeze(0).shape)
print(D_1.music_dls[0:1,...].shape)

In [None]:
music_1 = evaluate.render_music(RIR_1.unsqueeze(0), D_1.music_dls[0:1,...], device=device)[0][0]

In [None]:
plt.plot(music_1)
plt.title('Music')
plt.show()

In [None]:
sample_rate = 48000

sd.play(music_1, sample_rate)

sd.wait()