In [4]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import torch
import librosa

from network import NAF
from sound_loader import sound_samples
from utilities import to_audio

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Loading data...')
dataset = sound_samples(num_samples=20)
# Load NAF with selected configuration
net = NAF(input_dim = 288, min_xy=np.array([-1, -1]), max_xy=np.array([1, 1])).to(device)
state_dict = torch.load('saved/net_20230910_172523_loss_0.0013.pth')
net.load_state_dict(state_dict)
net.eval()

sr = 48000

# Create initial data for the blue and red points
x_blue = 0
y_blue = 0
x_red = 0
y_red = 0

def update_plot(x_blue, y_blue, x_red, y_red):
    s = np.array([x_blue, y_blue, 1])
    m = np.array([x_red, y_red, 0])
    src = torch.tensor(s).unsqueeze(1).to(device)
    mic = torch.tensor(m).unsqueeze(1).to(device)
    out = net.spectrogram_at(src, mic).cpu()
    out[:,:,0] = (out[:,:,0] * dataset.std) + dataset.mean
    print(np.min(out[:,:,0].numpy()), np.max(out[:,:,0].numpy()))
    out[:,:,1] = out[:,:,1] * 180


    plt.clf()
    
    # Create a gridspec with 2 rows and 3 columns, specifying subplot widths
    gs = gridspec.GridSpec(3, 3, width_ratios=[4, 2, 2])
    
    # Set the aspect ratio to be equal for the scatter plot
    ax1 = plt.subplot(gs[0,0])
    ax1.set_aspect('equal', adjustable='box')
    
    # Blue point
    ax1.scatter(x_blue, y_blue, color='blue', marker='o', label='Blue Point')
    
    # Red point
    ax1.scatter(x_red, y_red, color='red', marker='o', label='Red Point')
    
    ax1.set_xlim(-1, 1)
    ax1.set_ylim(-1, 1)
    ax1.set_xlabel('X')
    ax1.set_ylabel('Y')
    ax1.grid()
    ax1.legend()
    ax1.set_title('Interactive XY Scatter Plot')
    
    # Add a plot on the right
    ax2 = plt.subplot(gs[0,1])
    im = ax2.imshow(out[:,:,0], cmap='viridis', aspect='auto')
    ax2.set_title('Right Plot')
    cbar = plt.colorbar(im, ax=ax2)
    cbar.set_label('Color Bar Label')

    ax3 = plt.subplot(gs[0,2])
    ax3.imshow(out[:,:,1], cmap='viridis', aspect='auto')
    ax3.set_title('Right Plot')

    # Create a single subplot that spans the entire last row
    ax4 = plt.subplot(gs[1, :])  # The colon (:) means it spans all columns in the last row
    signal = to_audio(out[:,:,0], out[:,:,1])
    time = np.arange(0, len(signal)) / sr

    ax4.plot(time, signal)

    ax5 = plt.subplot(gs[2,:])
    fft = np.fft.fft(signal)
    freq_sample = np.fft.fftfreq(len(signal), 1.0 / sr)
    half_n = len(signal) // 2  # Only plot one side, assuming real input
    plt.plot(freq_sample[:half_n], np.abs(fft[:half_n]), label='Result')

    plt.gcf().set_size_inches(10,10)
    
    plt.tight_layout()
    plt.show()

# Create interactive sliders for X and Y coordinates of the blue point
x_blue_slider = widgets.FloatSlider(value=0, min=-1, max=1, step=0.01, description='Blue X:', continuous_update=False)
y_blue_slider = widgets.FloatSlider(value=0, min=-1, max=1, step=0.01, description='Blue Y:', continuous_update=False)

# Create interactive sliders for X and Y coordinates of the red point
x_red_slider = widgets.FloatSlider(value=0.9, min=-1, max=1, step=0.01, description='Red X:', continuous_update=False)
y_red_slider = widgets.FloatSlider(value=0, min=-1, max=1, step=0.01, description='Red Y:', continuous_update=False)

# Create interactive widget
interactive_plot = widgets.interactive(update_plot, x_blue=x_blue_slider, y_blue=y_blue_slider, x_red=x_red_slider, y_red=y_red_slider)

# Display the widgets
display(interactive_plot)

Loading data...


interactive(children=(FloatSlider(value=0.0, continuous_update=False, description='Blue X:', max=1.0, min=-1.0…

In [14]:
from IPython.display import Audio
import numpy as np

# Create a NumPy array representing audio data (replace this with your audio data)
sample_rate = 44100  # Replace with your sample rate
duration = 5  # Duration in seconds
t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
audio_data = np.sin(2 * np.pi * 440.0 * t)

# Play the audio
Audio(data=audio_data, rate=sample_rate)
# Load audio files
audio1, sr1 = librosa.load('melody.wav', sr=None)  # Replace 'audio1.wav' with your audio file

s = np.array([x_blue, y_blue, 1])
m = np.array([x_red, y_red, 0])
src = torch.tensor(s).unsqueeze(1).to(device)
mic = torch.tensor(m).unsqueeze(1).to(device)
out = net.spectrogram_at(src, mic).cpu()
out[:,:,0] = (out[:,:,0] * dataset.std) + dataset.mean
print(np.min(out[:,:,0].numpy()), np.max(out[:,:,0].numpy()))
out[:,:,1] = out[:,:,1] * 180

signal = to_audio(out[:,:,0], out[:,:,1])


# Make sure both audio signals have the same length
min_length = min(len(audio1), len(signal))
audio1 = audio1[:min_length]
audio2 = signal[:min_length]

convolved_audio = np.convolve(audio1, audio2, mode='full')
Audio(data=audio1, rate=sample_rate)
Audio(data=convolved_audio, rate=sample_rate)



-80.35128 -1.1548843
