In [19]:
# Configuration
FPS = 24
FFT_WINDOW_SECONDS = 0.25 # how many seconds of audio make up an FFT window

# Note range to display
FREQ_MIN = 10
FREQ_MAX = 1000

# Notes to display
TOP_NOTES = 3

# Names of the notes
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]

# Output size. Generally use SCALE for higher res, unless you need a non-standard aspect ratio.
RESOLUTION = (1920, 1080)
SCALE = 1 # 0.5=QHD(960x540), 1=HD(1920x1080), 2=4K(3840x2160)

In [20]:
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.io import wavfile # get the api
import pandas as pd
import os
import numpy as np

AUDIO_FILE = "./content/sample_audio.wav"

fs, data = wavfile.read(AUDIO_FILE) # load the data
audio = data.T[0] # this is a two channel soundtrack, get the first track

audio_quant = np.array(audio)
audio_hw = np.int32(audio_quant/np.max(abs(audio_quant)) * (2**31 - 1))

FRAME_STEP = (fs / FPS) # audio samples per video frame
FFT_WINDOW_SIZE = int(fs * FFT_WINDOW_SECONDS)
AUDIO_LENGTH = len(audio_hw)/fs

array([29937577, 29358674, 29358674, ...,        0,        0,        0])

Several utility functions.

In [21]:
import plotly.graph_objects as go

def plot_fft(p, xf, fs, notes, dimensions=(960,540)):
  layout = go.Layout(
      title="frequency spectrum",
      autosize=False,
      width=dimensions[0],
      height=dimensions[1],
      xaxis_title="Frequency (note)",
      yaxis_title="Magnitude",
      font={'size' : 18}
  )

  fig = go.Figure(layout=layout,
                  layout_xaxis_range=[FREQ_MIN,FREQ_MAX],
                  layout_yaxis_range=[0,1]
                  )
  
  fig.add_trace(go.Scatter(
      x = xf,
      y = p))
  
  for note in notes:
    fig.add_annotation(x=note[0]+10, y=note[2],
            text=note[1],
            font = {'size' : 24},
            showarrow=False)
  return fig

def find_top_notes(fft,num):
  if np.max(fft.real)<0.001:
    return []

  lst = [x for x in enumerate(fft.real)]
  lst = sorted(lst, key=lambda x: x[1],reverse=True)

  idx = 0
  found = []
  found_note = set()
  while( (idx<len(lst)) and (len(found)<num) ):
    f = xf[lst[idx][0]]
    y = lst[idx][1]
    n = freq_to_number(f)
    n0 = int(round(n))
    name = note_name(n0)

    if name not in found_note:
      found_note.add(name)
      s = [f,note_name(n0),y]
      found.append(s)
    idx += 1
    
  return found

Run the FFT on individual samples of the audio and generate video frames of the frequency chart.

In [22]:
from pynq import Overlay
overlay = Overlay("./overlay/fft.bit")
fft_overlay = overlay.fft_warp_0
# ? overlay
FRAME_COUNT = int(AUDIO_LENGTH*FPS)
FRAME_OFFSET = int(len(audio_hw)/FRAME_COUNT)

In [23]:
from pynq import allocate

# Allocating memory for IP usage
input_buffer = allocate(shape=(FRAME_OFFSET*FRAME_COUNT,), dtype='i4')
output_buffer = allocate(shape=(2**14 *FRAME_COUNT,), dtype='i4')

In [24]:
import numpy as np
import plotly.offline as py

!rm ./content/*.png

# Functions to find the notes
def freq_to_number(f): return 69 + 12*np.log2(f/440.0)
def number_to_freq(n): return 440 * 2.0**((n-69)/12.0)
def note_name(n): return NOTE_NAMES[n % 12] + str(int(n/12 - 1))

# Process audio data
audio_hw = audio_hw[0:FRAME_COUNT*FRAME_OFFSET]
np.copyto(input_buffer, np.int32(audio_hw))

input_buffer[10000:10009]

# Write data length to the corresponding register
fft_overlay.s_axi_control.write(0x10,input_buffer.physical_address)
fft_overlay.s_axi_control.write(0x1c,output_buffer.physical_address)


rm: cannot remove './content/*.png': No such file or directory


In [None]:
import time

# Start running IP
fft_overlay.s_axi_CTRL.write(0x00, 0x01)
start_time = time.time()
while True:
    reg = fft_overlay.s_axi_CTRL.read(0x00)
    if reg != 1:
        break
end_time = time.time()

print("耗时：{}s".format(end_time - start_time))
xf = np.fft.rfftfreq(16384, 1/fs)

#  Drawing and saving locally 
for frame_number in range(FRAME_COUNT):
    s = find_top_notes(fft,TOP_NOTES)
    fig = plot_fft(output_buffer[frame_number*(2**14):(frame_number+1)*(2**14)],xf,fs,s,RESOLUTION)
    fig.write_image(f"./content/frame{frame_number}.png",scale=1)
    py.iplot(fig)
    

In [None]:
output_buffer[10000:10009]

Use [ffmpeg](https://ffmpeg.org/) to combine the input audio WAV and the individual frame images into a MP4 video.

In [None]:
!ffmpeg -y -r {FPS} -f image2 -s 1920x1080 -i /home/xilinx/jupyter_notebooks/fft_prjs/content/frame%d.png -i {AUDIO_FILE} -c:v libx264 -pix_fmt yuv420p movie.mp4