<a href="https://colab.research.google.com/github/x1001000/raspberrypi3-yamnet-sed/blob/main/colab_notebooks/%E5%8F%B0%E7%81%A3%E7%9A%84%E9%9F%B3%E6%A8%82%E5%9E%83%E5%9C%BE%E8%BB%8A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# At runtime reset

## install

In [None]:
!pip install soundfile
!pip install git+https://github.com/nficano/pytube
!pip install pydub

Collecting git+https://github.com/nficano/pytube
  Cloning https://github.com/nficano/pytube to /tmp/pip-req-build-qs7y8ipu
  Running command git clone -q https://github.com/nficano/pytube /tmp/pip-req-build-qs7y8ipu
Building wheels for collected packages: pytube
  Building wheel for pytube (setup.py) ... [?25l[?25hdone
  Created wheel for pytube: filename=pytube-10.5.3-cp37-none-any.whl size=42794 sha256=1e9b71c8e0ff9e065616446dfb9bb97b24f542b341c82f04927daed8207ec4f5
  Stored in directory: /tmp/pip-ephem-wheel-cache-fu9manin/wheels/44/da/40/3b5e03abe33a91895343814fb44b309512375408f4a909555b
Successfully built pytube
Installing collected packages: pytube
Successfully installed pytube-10.5.3
Collecting pydub
  Downloading https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl
Installing collected packages: pydub
Successfully installed pydub-0.25.1


## download YAMNet (15M bytes)

In [None]:
# !curl -O https://storage.googleapis.com/audioset/yamnet.h5
# !git clone https://github.com/tensorflow/models
# !cp models/research/audioset/yamnet/* .
!git clone https://github.com/x1001000/raspberrypi3-yamnet-sed
!cp raspberrypi3-yamnet-sed/yamnet/* .

Cloning into 'raspberrypi3-yamnet-sed'...
remote: Enumerating objects: 27, done.[K
remote: Counting objects: 100% (27/27), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 289 (delta 11), reused 8 (delta 3), pack-reused 262[K
Receiving objects: 100% (289/289), 38.04 MiB | 24.53 MiB/s, done.
Resolving deltas: 100% (163/163), done.


# At runtime restart

## import

In [None]:
import numpy as np
import resampy
import soundfile as sf
import tensorflow as tf

import params as yamnet_params
import yamnet as yamnet_model

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models

from pytube import Playlist, YouTube
from pydub import AudioSegment

from IPython.display import display, Audio
from time import sleep

## load YAMNet (3.7M params)

In [None]:
params = yamnet_params.Params()
yamnet = yamnet_model.yamnet_frames_model(params)
yamnet.load_weights('yamnet.h5')
yamnet_classes = yamnet_model.class_names('yamnet_class_map_zh-tw.csv')
yamnet_classes = np.concatenate([yamnet_classes, np.array(['台灣垃圾車'])])

yamnet.summary()

Model: "yamnet_frames"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None,)]            0                                            
__________________________________________________________________________________________________
tf.compat.v1.shape (TFOpLambda) (1,)                 0           input_1[0][0]                    
__________________________________________________________________________________________________
tf.__operators__.getitem (Slici ()                   0           tf.compat.v1.shape[0][0]         
__________________________________________________________________________________________________
tf.math.maximum_1 (TFOpLambda)  ()                   0           tf.__operators__.getitem[0][0]   
______________________________________________________________________________________

## def data_from_YouTube(url)

In [None]:
def data_from_YouTube(url):
    title = YouTube(url).title
    stream = YouTube(url).streams.get_audio_only()
    if stream:
        default_filename = stream.default_filename
        print(stream.download(), '✅已下載，匯出wav檔...', end='')
        AudioSegment.from_file(default_filename).export(default_filename+'.wav', format='wav')
        print('匯出waveform陣列...')
        file_name = default_filename+'.wav'

        # https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/inference.py#L40
        wav_data, sr = sf.read(file_name, dtype=np.int16)
        assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
        waveform = wav_data / tf.int16.max#32768.0  # Convert to [-1.0, +1.0]
        waveform = waveform.astype('float32')

        # Convert to mono and the sample rate expected by YAMNet.
        if len(waveform.shape) > 1:
            waveform = np.mean(waveform, axis=1)
        if sr != params.sample_rate:
            waveform = resampy.resample(waveform, sr, params.sample_rate)
        
        return title, waveform
    else:
        print(f'No audio_only stream from {title}!')

## def data_from_Playlist(url)

In [None]:
def data_from_Playlist(url, begin=1, end=None):
    titles = []
    waveforms = []
    for url in Playlist(url).video_urls[begin-1:end]:
        title, waveform = data_from_YouTube(url)
        waveforms.append(waveform)
        titles.append(title)
        sleep(10) # HTTP 429 hack
    return titles, waveforms

# Data prep for TRAINING and TESTING

## extract features X and encode labels Y

In [None]:
# TRAINING_data
playlists = [(False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wRk1sv9XWBpC2wcBEqufR_o']), # 電視節目
             #(False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wQIB3c6siLyc8qa25iIDiKq']), # 街道交通 cause underfitting
             (False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wSQp7_JiKgEuQSwJiiwvq7M']), # 救護車
             (False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wQkx2hcJMyD-7fB6xqhRz_B']), # 警車
             (False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wQ29QPFe6LfvWRMqId-N_75']), # 消防車
             (True, ['https://www.youtube.com/playlist?list=PL9deQcOOEJtV7_M52CH819jcN9Ty7ql0h'])]  # 台灣垃圾車

X = Y = None
for label, playlist in playlists:
    titles, waveforms = data_from_Playlist(*playlist)
    for title, waveform in zip(titles, waveforms):
        scores, embeddings, spectrogram = yamnet(waveform)
        if X is Y is None:
            X = embeddings
            Y = tf.concat([scores>=0.5, [[label]]*len(scores)], axis=1)
        else:
            X = np.concatenate((X, embeddings))
            Y = np.concatenate((Y, tf.concat([scores>=0.5, [[label]]*len(scores)], axis=1)))
        print(X.shape, Y.shape, title)
TRAINING_data = X, Y

/content/反黑箱服貿319晚公民的民主教育 好猿街頭紀錄.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/Humans Need Not Apply.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/十分鐘讓你看懂「科技性失業」－人類不適任的未來.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/I SUED THE SCHOOL SYSTEM !!!.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/文茜 評能源政策.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20181223【文茜世界財經週報】108年課綱新增科技程式教育　台灣準備好了嗎？.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/柯文哲市長參訪以色列首站 Mobileye自駕車公司 阿北反思臺灣產業政策.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/228到底怎麼回事 國民政府登島大屠殺.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/台灣禁出口罩，大陸人看范瑋琪和館長吵架，誰比較有道理？.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/早安打工人，又是996的一天，来看看计划经济？.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/舖設海底防線 爭取地震黃金10秒鐘.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/什麼是資本主義？經濟危機的本質？比特幣將泡沫？機器害勞工失業？理解當代社會的必讀之作！EP16 《爸爸寄來的經濟學情書》 說書【經濟學人文社科】.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
(535, 1024) (535, 522) #反黑箱服貿/319晚/公民的民主教育# 好猿街頭紀錄
(2410, 1024) (2410, 522) Humans Need Not Apply
(4400, 1024) (4400, 522) 十分鐘讓你看懂「科技

In [None]:
# TESTING_data
playlists = [(False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wSujknY_kq0mmFeiUOpLA48']), # 電影
             (False, ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wQ1DlTmhUvgy8RZndx6r_li']), # 音樂
             (True, ['https://www.youtube.com/playlist?list=PL9deQcOOEJtUKab8VBKnom0d8BrYBsvlg'])]  # 台灣垃圾車

X = Y = None
for label, playlist in playlists:
    titles, waveforms = data_from_Playlist(*playlist)
    for title, waveform in zip(titles, waveforms):
        scores, embeddings, spectrogram = yamnet(waveform)
        if X is Y is None:
            X = embeddings
            Y = tf.concat([scores>=0.5, [[label]]*len(scores)], axis=1)
        else:
            X = np.concatenate((X, embeddings))
            Y = np.concatenate((Y, tf.concat([scores>=0.5, [[label]]*len(scores)], axis=1)))
        print(X.shape, Y.shape, title)
TESTING_data = X, Y

/content/The Social Network (2010) - Hacking scene.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/微電影【消失Disappear】全片30分鐘首映會播映版育成高中牛奶糖教師教育劇團.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/【爆神片】《肖申克的救赎》等待和希望是人生的最大意义.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/《一級玩家》彩蛋解析 以及背後的真正意義  電影解析  超粒方.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/侏羅紀公園vs侏羅紀世界 經典與平庸  超粒方.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/The COMPLETE MCU Recap  CRAM IT (Avengers Endgame Edition).mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/漫威十年，I AM IRONMAN.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/《复联4》前必看！一口气看完21部漫威电影，完整的时间线剧情讲解！.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/《复仇者联盟4》剧情解读 寡姐是鹰眼至爱？时空穿梭怎么个玩法？美队为何选择他作为继任者？.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/i shouldnt be alive unless it was for a reason tony stark.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/HBO Asia  《西方極樂園》第三季預告.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/【全面啟動】10周年紀念版 8月12日(週三) 再次體驗大銀幕震撼.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/世紀大雜燴! 匯集時代精華的大作 續談巴哈-郭德堡變奏曲.mp4 ✅已下載

## learn about YAMNet's output shapes

In [None]:
STFTs = 1 # No matter how short wav is, even 0s, 1 STFT will be done!
SR = int(params.sample_rate)
scores, embeddings, spectrogram = yamnet(waveform[:round((0.015 + 0.48*STFTs + 0.001)*SR)]) # round rather than int to avoid 0.30000000000000004.com
print(scores.shape)
print(embeddings.shape)
print(spectrogram.shape)
assert scores.shape[1] == 521 # audio event classes
assert embeddings.shape[1] == 1024 # -dimension embedding
assert spectrogram.shape[1] == 64 # mel bins covering the range 125-7500 Hz
assert len(scores) == len(embeddings) == len(spectrogram) / 48 - 1 == STFTs

(1, 521)
(1, 1024)
(96, 64)


# Customize a model from YAMNet's output layer

## learn about YAMNet's output layers

In [None]:
# https://www.tensorflow.org/guide/keras/sequential_model
feature_extractor = keras.Model(
    inputs=yamnet.inputs,
    outputs=[layer.output for layer in yamnet.layers],
)
features = feature_extractor(waveform)
scores, embeddings, spectrogram = yamnet(waveform)
assert (features[-3]==embeddings).numpy().all()
assert (features[-1]==tf.keras.layers.Activation('sigmoid')(features[-2])).numpy().all()
assert (features[-1]==scores).numpy().all()

## duplicate the neurons

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=1024),
        layers.Dense(521, activation="sigmoid"),
    ]
)
model.summary()
# Layer.weights returns Tensor
# Layer.get_weights() returns numpy.ndarray
weights = yamnet.layers[-2].get_weights()[0]
bias = yamnet.layers[-2].get_weights()[1]
model.set_weights([weights, bias])
assert (model.get_weights()[0] == yamnet.layers[-2].get_weights()[0]).all()
assert (model.get_weights()[1] == yamnet.layers[-2].get_weights()[1]).all()
# Somehow outputs are not identical but almost
print(scores.shape[0] * scores.shape[1])
print((scores == model.predict(embeddings)).numpy().sum())

## duplicate the neurons into a bigger layer

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=1024),
        layers.Dense(522, activation="sigmoid"),
    ]
)
model.summary()
# Layer.weights returns Tensor
# Layer.get_weights() returns numpy.ndarray
weights = model.get_weights()[0]#np.zeros(model.get_weights()[0].shape)
bias = model.get_weights()[1]#np.zeros(model.get_weights()[1].shape)
weights[:,:521] = yamnet.layers[-2].get_weights()[0]
bias[:521] = yamnet.layers[-2].get_weights()[1]
model.set_weights([weights, bias])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 522)               535050    
Total params: 535,050
Trainable params: 535,050
Non-trainable params: 0
_________________________________________________________________


# Model TRAINING: model.fit

In [None]:
# https://www.pyimagesearch.com/2018/05/07/multi-label-classification-with-keras/
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
X, Y = TRAINING_data
model.fit(x=X,
          y=Y,
          validation_split=0.2,
          batch_size=32,        # the smaller, the slower, limited by GPU if any
          epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f715a918ad0>

# Model TESTING: model.evaluate

In [None]:
X, Y = TRAINING_data
loss, accuracy = model.evaluate(X, Y)
accuracy



0.7829414010047913

In [None]:
X, Y = TESTING_data
loss, accuracy = model.evaluate(X, Y)
accuracy



0.9176720380783081

In [None]:
model.save('20210316-3.h5')

# Run inference on a playlist

## load the model as the output layer

In [None]:
model = models.load_model('20210316-3.h5')

## w/wo custom output layer

In [None]:
playlist = ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wQIB3c6siLyc8qa25iIDiKq'] # 街道交通

MA = 5
SR = int(params.sample_rate)

titles, waveforms = data_from_Playlist(*playlist)
print('\nRunning inference...')
for title, waveform in zip(titles, waveforms):
    print('\n'+title)
    #display(Audio(waveform, rate=SR)) # crashes if waveform is big
    for i in range(MA, len(waveform)//SR, MA):
        scores, embeddings, spectrogram = yamnet(waveform[(i-MA)*SR:i*SR])
        # Scores is a matrix of (time_frames, num_classes) classifier scores.
        # Average them along time to get an overall classifier output for the clip.
        # prediction = np.mean(scores[:-1], axis=0) # last one scores comes from insufficient samples
        # w👇 wo☝️
        prediction = np.mean(model.predict(embeddings[:-1]), axis=0)
        # Report the highest-scoring classes and their scores.
        top5 = np.argsort(prediction)[::-1][:5]
        print(f'{i//60}:{i%60:2d}',
            ''.join(f" {prediction[i]:.2f} 👉{yamnet_classes[i][:12].ljust(12, '　')}" for i in top5))

/content/纽约市街道声音 纽约街头噪音，汽车和人群噪音.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/繁忙的街道和道路环境声 - 30分钟.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/City Street Ambience Sound Effect [FREE DOWNLOAD  ROYALTY FREE].mp4 ✅已下載，匯出wav檔...匯出waveform陣列...

Running inference...

纽约市街道声音 纽约街头噪音，汽车和人群噪音
0: 5  0.64 👉車輛　　　　　　　　　　 0.26 👉交通噪聲，道路噪聲　　　 0.00 👉風　　　　　　　　　　　 0.00 👉台灣垃圾車　　　　　　　 0.00 👉機動車輛（道路）　　　　
0:10  0.85 👉車輛　　　　　　　　　　 0.24 👉交通噪聲，道路噪聲　　　 0.00 👉風　　　　　　　　　　　 0.00 👉現場記錄　　　　　　　　 0.00 👉白噪音　　　　　　　　　
0:15  0.96 👉車輛　　　　　　　　　　 0.22 👉交通噪聲，道路噪聲　　　 0.00 👉機動車輛（道路）　　　　 0.00 👉台灣垃圾車　　　　　　　 0.00 👉現場記錄　　　　　　　　
0:20  0.93 👉車輛　　　　　　　　　　 0.30 👉交通噪聲，道路噪聲　　　 0.00 👉現場記錄　　　　　　　　 0.00 👉風　　　　　　　　　　　 0.00 👉汽車喇叭，汽車喇叭聲，喇
0:25  1.00 👉車輛　　　　　　　　　　 0.73 👉交通噪聲，道路噪聲　　　 0.00 👉現場記錄　　　　　　　　 0.00 👉安靜　　　　　　　　　　 0.00 👉風　　　　　　　　　　　
0:30  1.00 👉車輛　　　　　　　　　　 0.49 👉交通噪聲，道路噪聲　　　 0.00 👉機動車輛（道路）　　　　 0.00 👉風　　　　　　　　　　　 0.00 👉現場記錄　　　　　　　　
0:35  0.99 👉車輛　　　　　　　　　　 0.24 👉交通噪聲，道路噪聲　　　 0.00 👉現場記錄　　　　　　　　 0.00 👉安靜　　　　　　　　　　 0.00 👉汽車喇叭，汽車喇叭聲，喇
0:40  0.99 👉

In [None]:
playlist = ['https://www.youtube.com/playlist?list=PL9deQcOOEJtUKab8VBKnom0d8BrYBsvlg'] # 台灣垃圾車

MA = 5
SR = int(params.sample_rate)

titles, waveforms = data_from_Playlist(*playlist)
print('\nRunning inference...')
for title, waveform in zip(titles, waveforms):
    print('\n'+title)
    #display(Audio(waveform, rate=SR)) # crashes if waveform is big
    for i in range(MA, len(waveform)//SR, MA):
        scores, embeddings, spectrogram = yamnet(waveform[(i-MA)*SR:i*SR])
        # Scores is a matrix of (time_frames, num_classes) classifier scores.
        # Average them along time to get an overall classifier output for the clip.
        # prediction = np.mean(scores[:-1], axis=0) # last one scores comes from insufficient samples
        # w👇 wo☝️
        prediction = np.mean(model.predict(embeddings[:-1]), axis=0)
        # Report the highest-scoring classes and their scores.
        top5 = np.argsort(prediction)[::-1][:5]
        print(f'{i//60}:{i%60:2d}',
            ''.join(f" {prediction[i]:.2f} 👉{yamnet_classes[i][:12].ljust(12, '　')}" for i in top5))

/content/2020090910福興鄉垃圾車KED-5321(代763-UX)，回收車KEG-9913.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200805鹿港鎮，福興鄉垃圾車956-UR，848-UD，回收車KED-5002.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200721彰化縣福興鄉垃圾車848-UD，回收車KED-5002.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200722彰化縣福興鄉垃圾車723-UX，回收車755-UD.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200722彰化縣福興鄉垃圾車700-VQ，回收車KEB-2693.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200720 0805福興鄉垃圾車442-VQ(代721-UX)，回收車922-UD.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200717彰化縣福興鄉垃圾車722-UX，回收車365-UQ.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/20200714 21彰化縣福興鄉垃圾車763-UX，回收車KEG-9913.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...

Running inference...

20200909,10福興鄉垃圾車KED-5321(代763-UX)，回收車KEG-9913
0: 5  0.85 👉台灣垃圾車　　　　　　　 0.19 👉印表機　　　　　　　　　 0.19 👉機械　　　　　　　　　　 0.00 👉鬧鐘　　　　　　　　　　 0.00 👉吸塵器　　　　　　　　　
0:10  0.48 👉車輛　　　　　　　　　　 0.26 👉台灣垃圾車　　　　　　　 0.08 👉倒車蜂鳴聲　　　　　　　 0.01 👉應急車輛　　　　　　　　 0.01 👉機動車輛（道路）　　　　
0:15  0.57 👉台灣垃圾車　　　　　　　 0.00 👉機械　　　　　　　　　　 0.00 👉印表機　　　　　　　　　 0.00 👉培

In [None]:
playlist = ['https://www.youtube.com/playlist?list=PL0Q2eQA7p-wSQp7_JiKgEuQSwJiiwvq7M'] # 救護車

MA = 5
SR = int(params.sample_rate)

titles, waveforms = data_from_Playlist(*playlist)
print('\nRunning inference...')
for title, waveform in zip(titles, waveforms):
    print('\n'+title)
    #display(Audio(waveform, rate=SR)) # crashes if waveform is big
    for i in range(MA, len(waveform)//SR, MA):
        scores, embeddings, spectrogram = yamnet(waveform[(i-MA)*SR:i*SR])
        # Scores is a matrix of (time_frames, num_classes) classifier scores.
        # Average them along time to get an overall classifier output for the clip.
        # prediction = np.mean(scores[:-1], axis=0) # last one scores comes from insufficient samples
        # w👇 wo☝️
        prediction = np.mean(model.predict(embeddings[:-1]), axis=0)
        # Report the highest-scoring classes and their scores.
        top5 = np.argsort(prediction)[::-1][:5]
        print(f'{i//60}:{i%60:2d}',
            ''.join(f" {prediction[i]:.2f} 👉{yamnet_classes[i][:12].ljust(12, '　')}" for i in top5))

/content/台灣救護車音效.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/台北市救護車緊急出動 Taipei Ambulance Responding.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/Mercedes-Benz AMBULANCE 賓士救護車出勤.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...
/content/大台北民間救護車緊急出動 Private Ambulances Responding in Metro Taipei.mp4 ✅已下載，匯出wav檔...匯出waveform陣列...

Running inference...

台灣救護車音效
0: 5  0.46 👉應急車輛　　　　　　　　 0.45 👉警笛　　　　　　　　　　 0.44 👉車輛　　　　　　　　　　 0.43 👉機動車輛（道路）　　　　 0.41 👉救護車（警報）　　　　　
0:10  0.43 👉應急車輛　　　　　　　　 0.37 👉救護車（警報）　　　　　 0.33 👉警笛　　　　　　　　　　 0.33 👉車輛　　　　　　　　　　 0.26 👉機動車輛（道路）　　　　
0:15  0.54 👉警笛　　　　　　　　　　 0.46 👉應急車輛　　　　　　　　 0.39 👉救護車（警報）　　　　　 0.35 👉機動車輛（道路）　　　　 0.35 👉車輛　　　　　　　　　　
0:20  0.67 👉應急車輛　　　　　　　　 0.54 👉救護車（警報）　　　　　 0.52 👉警笛　　　　　　　　　　 0.35 👉報警　　　　　　　　　　 0.26 👉機動車輛（道路）　　　　
0:25  0.78 👉應急車輛　　　　　　　　 0.77 👉警笛　　　　　　　　　　 0.68 👉報警　　　　　　　　　　 0.67 👉救護車（警報）　　　　　 0.60 👉機動車輛（道路）　　　　
0:30  0.85 👉應急車輛　　　　　　　　 0.70 👉警笛　　　　　　　　　　 0.58 👉救護車（警報）　　　　　 0.43 👉報警　　　　　　　　　　 0.37 👉機動車輛（道路）　　　　
0:35  0.96 👉應急車輛　　　　　　　　 0.79 👉警笛　　　　　　　　　　 0.67 👉機動車