In [25]:

# Configuration
FPS = 24
FFT_WINDOW_SECONDS = 0.25 # how many seconds of audio make up an FFT window

# Note range to display
FREQ_MIN = 10
FREQ_MAX = 1000

# Notes to display
TOP_NOTES = 3

# Names of the notes
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]

# Output size. Generally use SCALE for higher res, unless you need a non-standard aspect ratio.
RESOLUTION = (640, 360)
SCALE = 1 # 0.5=QHD(960x540), 1=HD(1920x1080), 2=4K(3840x2160)

In [26]:
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.io import wavfile # get the api
import pandas as pd
import os


AUDIO_FILE = "./content/sample_audio.wav"

fs, data = wavfile.read(AUDIO_FILE) # load the data
audio = data.T[0] # this is a two channel soundtrack, get the first track

audio_quant = np.array(audio)
audio_hw = np.int32(audio_quant/np.max(abs(audio_quant)) * (2**31 - 1))



# audio_to_csv = pd.DataFrame(audio)
# audio_to_csv.to_csv("./audio_data.csv")

FRAME_STEP = (fs / FPS) # audio samples per video frame
FFT_WINDOW_SIZE = int(fs * FFT_WINDOW_SECONDS)
AUDIO_LENGTH = len(audio)/fs
audio_hw

array([29937577, 29358674, 29358674, ...,        0,        0,        0])

Several utility functions.

In [27]:
import plotly.graph_objects as go

def plot_fft(p, xf, fs, notes, dimensions=(360,180)):
  layout = go.Layout(
      title="frequency spectrum",
      autosize=False,
      width=dimensions[0],
      height=dimensions[1],
      xaxis_title="Frequency (note)",
      yaxis_title="Magnitude",
      font={'size' : 12}
  )

  fig = go.Figure(layout=layout,
                  layout_xaxis_range=[FREQ_MIN,FREQ_MAX],
                  layout_yaxis_range=[0,1]
                  )
  
  fig.add_trace(go.Scatter(
      x = xf,
      y = p))
  
  for note in notes:
    fig.add_annotation(x=note[0]+10, y=note[2],
            text=note[1],
            font = {'size' : 20},
            showarrow=False)
  return fig

def extract_sample(audio, frame_number):
  end = frame_number * FRAME_OFFSET
  begin = int(end - FFT_WINDOW_SIZE)

  if end == 0:
    # We have no audio yet, return all zeros (very beginning)
    return np.zeros((np.abs(begin)),dtype=float)
  elif begin<0:
    # We have some audio, padd with zeros
    return np.concatenate([np.zeros((np.abs(begin)),dtype=float),audio[0:end]])
  else:
    # Usually this happens, return the next sample
    return audio[begin:end]

def find_top_notes(fft,num):
  if np.max(fft.real)<0.001:
    return []

  lst = [x for x in enumerate(fft.real)]
  lst = sorted(lst, key=lambda x: x[1],reverse=True)

  idx = 0
  found = []
  found_note = set()
  while( (idx<len(lst)) and (len(found)<num) ):
    f = xf[lst[idx][0]]
    y = lst[idx][1]
    n = freq_to_number(f)
    n0 = int(round(n))
    name = note_name(n0)

    if name not in found_note:
      found_note.add(name)
      s = [f,note_name(n0),y]
      found.append(s)
    idx += 1
    
  return found

Run the FFT on individual samples of the audio and generate video frames of the frequency chart.

In [28]:
from pynq import Overlay
overlay = Overlay("./overlay/fft.bit")
fft_overlay = overlay.fft_warp_0


In [29]:
from pynq import allocate
input_buffer = allocate(shape=(FRAME_OFFSET*FRAME_COUNT,), dtype='i4')
output_buffer = allocate(shape=(2**14 *FRAME_COUNT,), dtype='i4')

In [33]:
import numpy as np
import tqdm
import time
import plotly.offline as py

!rm ./content/*.png


def freq_to_number(f): return 69 + 12*np.log2(f/440.0)
def number_to_freq(n): return 440 * 2.0**((n-69)/12.0)
def note_name(n): return NOTE_NAMES[n % 12] + str(int(n/12 - 1))

# Hanning window function
audio_hw = audio_hw[0:FRAME_COUNT*FRAME_OFFSET]
np.copyto(input_buffer, np.int32(audio_hw))

input_buffer[10000:10009]

fft_overlay.s_axi_control.write(0x10,input_buffer.physical_address)
fft_overlay.s_axi_control.write(0x1c,output_buffer.physical_address)



rm: cannot remove './content/*.png': No such file or directory


In [None]:
import time

fft_overlay.s_axi_CTRL.write(0x00, 0x01)
start_time = time.time()
while True:
    reg = fft_overlay.s_axi_CTRL.read(0x00)
    if reg != 1:
        break
end_time = time.time()

print("耗时：{}s".format(end_time - start_time))
xf = np.fft.rfftfreq(16384, 1/fs)


for frame_number in range(FRAME_COUNT):
    fig = plot_fft(output_buffer,xf,fs,s,RESOLUTION)
    

In [None]:
output_buffer[10000:10009]

--- 
### 0718

In [4]:

import numpy as np
import tqdm
import time
import plotly.offline as py

!rm ./content/*.png


def freq_to_number(f): return 69 + 12*np.log2(f/440.0)
def number_to_freq(n): return 440 * 2.0**((n-69)/12.0)
def note_name(n): return NOTE_NAMES[n % 12] + str(int(n/12 - 1))

# Hanning window function
start_time = time.time()
window = 0.5 * (1 - np.cos(np.linspace(0, 2*np.pi, FFT_WINDOW_SIZE, False)))
end_time = time.time()

print("window running time: %.2f." %(end_time - start_time))
# np_to_csv = pd.DataFrame(data = window)

# np_to_csv.to_csv("./window.csv")

start_time = time.time()
xf = np.fft.rfftfreq(16384, 1/fs)

# xf_to_csv = pd.DataFrame(data = xf)
# xf_to_csv.to_csv("./xf.csv")

FRAME_COUNT = int(AUDIO_LENGTH*FPS)
FRAME_OFFSET = int(len(audio)/FRAME_COUNT)
end_time = time.time()
print("fft running time: %.2f." %(end_time - start_time))

# Pass 1, find out the maximum amplitude so we can scale.
mx = 0
start_time = time.time()
for frame_number in range(FRAME_COUNT):
  sample = extract_sample(audio, frame_number)

  fft = np.fft.rfft(sample * window)
  fft = np.abs(fft).real 
  mx = max(np.max(fft),mx)

end_time = time.time()
print(f"Max amplitude: {mx}")
print("loop running time: %.2f." %(end_time - start_time))
# Pass 2, produce the animation
# for frame_number in tqdm.tqdm(range(FRAME_COUNT)):

frames_data = []

for frame_number in range(FRAME_COUNT):
    
  sample = extract_sample(audio, frame_number)

  fft = np.fft.rfft(sample * window)
  fft = np.abs(fft) / mx 
     
  s = find_top_notes(fft,TOP_NOTES)
#   s_to_csv = pd.DataFrame(data = s)
#   s_to_csv.to_csv(f"./notes/s{frame_number}.csv")

#   fft_real_to_csv = pd.DataFrame(data = fft.real)
#   fft_real_to_csv.to_csv(f"./fft/fft_real{frame_number}.csv")

  frame_data = pd.DataFrame({'Frequency (note)': xf, 'Magnitude': fft.real})
  frames_data.append(frame_data)
    
  fig = plot_fft(fft.real,xf,fs,s,RESOLUTION)

#   start_time = time.time()
#   fig.write_image(f"./content/frame{frame_number}.png",scale=1)

#   py.iplot(fig)

#   end_time = time.time()
#   print("write running time: %.2f." %(end_time - start_time))  


rm: cannot remove './content/*.png': No such file or directory
window running time: 0.01.
fft running time: 0.13.
Max amplitude: 34071601.562644176
loop running time: 2.50.


Use dash to create an interactive app.

In [12]:
from pynq import allocate
sample_len = len(aud_in)
input_buffer = allocate(shape=(sample_len,), dtype='i4')
output_buffer = allocate(shape=(sample_len,), dtype='i4')
coef_buffer = allocate(shape=(99,), dtype='i4')

NameError: name 'aud_in' is not defined

In [None]:
np.copyto(input_buffer, np.int32(aud_in))
np.copyto(coef_buffer, hpf_coeffs_hw)

In [5]:
# import requirements
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash

def plot_fft2(p, xf, fs, dimensions=(360,180)):
  layout = go.Layout(
      title="frequency spectrum",
      autosize=False,
      width=dimensions[0],
      height=dimensions[1],
      xaxis_title="Frequency (note)",
      yaxis_title="Magnitude",
      font={'size' : 12}
  )

  fig = go.Figure(layout=layout,
                  layout_xaxis_range=[FREQ_MIN,FREQ_MAX],
                  layout_yaxis_range=[0,1]
                  )
  
  fig.add_trace(go.Scatter(
      x = xf,
      y = p))
  
  return fig


# define app name

app = dash.Dash(__name__)
# app = JupyterDash(__name__)

total_frames = FRAME_COUNT
# define chart and slider 
app.layout = html.Div([
    dcc.Graph(id='frequency-spectrum'),
    dcc.Slider(
        id='frame-slider',
        min=0,  # 最小帧索引
        max=total_frames - 1,  # 最大帧索引
        value=0,  # 默认帧索引
        marks={i: f'Frame {i}' for i in range(total_frames)},  # 滑块标记
        step=1  # 滑块步长
    )
])

# write callback functions.
@app.callback(
    Output('frequency-spectrum', 'figure'),
    [Input('frame-slider', 'value')]
)
def update_figure(selected_frame):
    selected_frame_data = frames_data[selected_frame]

    # 在这里使用 plot_fft 函数来绘制频谱图，并返回绘制的图像对象
    # 假设您已经有了 fs 和 notes 的定义
    p_frame = selected_frame_data['Frequency (note)'].values
    xf_frame = selected_frame_data['Magnitude'].values
    fig = plot_fft2(p_frame, xf_frame, fs)
    
# run app.

if __name__ == '__main__':
#     app.run_server(mode='inline')
    app.run_server(host='192.168.137.166', port=8110, debug=False)
    

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


OSError: [Errno 99] Cannot assign requested address

Use [ffmpeg](https://ffmpeg.org/) to combine the input audio WAV and the individual frame images into a MP4 video.

In [None]:
!ffmpeg -y -r {FPS} -f image2 -s 1920x1080 -i /home/xilinx/jupyter_notebooks/fft_prjs/content/frame%d.png -i {AUDIO_FILE} -c:v libx264 -pix_fmt yuv420p movie.mp4

Download the generated movie.

In [None]:
# from google.colab import files
# files.download('movie.mp4')