## Data Collection: youtube
The following audio samples will be used as test data (maybe). <br/>They were collected from the following: <br/>
### Shuffles:
**1** (Source : https://www.youtube.com/watch?v=aJa3DYnAjqU )<br/>
**2** (Source: https://en.wikipedia.org/wiki/Tap_dance_technique )<br/>
**3** (Source: https://www.youtube.com/watch?v=dYCtgIm43Rk )<br/>
**4** (Source: https://www.youtube.com/watch?v=Xbamib_k874 )<br/>
### Ball changes:
**1** (Source: https://www.youtube.com/watch?v=hi0wkLqPGR0 )<br/>
**2** (Source: https://en.wikipedia.org/wiki/Tap_dance_technique )<br/>
**3** (Source: https://www.youtube.com/watch?v=dYCtgIm43Rk )<br/>
**4** (Source: https://www.youtube.com/watch?v=Xbamib_k874)<br/>

In [1]:
from scipy.io import wavfile as wav
from scipy.io.wavfile import read, write
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pydub import AudioSegment, effects
from pydub.silence import split_on_silence, detect_nonsilent, detect_silence
from scipy.signal import argrelextrema, find_peaks, peak_prominences, find_peaks_cwt, normalize
from sklearn.metrics import accuracy_score

In [2]:
def get_wav_mp4(mp4_file, wav_name):
    song = AudioSegment.from_file(mp4_file, "mp4")
    new_song = song.export(wav_name, format="wav")
    return new_song

#def set_target_amplitude(sound, target_dBFS = -30):
#    change_in_dBFS = target_dBFS - sound.dBFS
#    return sound.apply_gain(change_in_dBFS)
#
def build_list(step, folder, length):
    i = 1
    step_list = []
    while i <= length :
        name = step + "/" + str(folder) + "/" +str(i) + ".wav"
        step_list.append(name)
        i += 1
    return step_list

## Steps:
- Export all recordings as wav files <br/>
- Manually extract the relevant clip. Set channel to 1 (mono). <br/>
- Split up the audio into its components.<br/>
- Check dBFS for all files
- Save and export as wav files.

 **1** 

In [3]:
get_wav_mp4('../../../Source/Uncut/Shuffle_1.mp4', '../../../Source/Uncut/Shuffle_1.wav');

In [4]:
song = AudioSegment.from_wav('../../../Source/Uncut/Shuffle_1.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 30 * 1000
end = 34 * 1000
three_shuffles = song[start:end]

In [5]:
shuffle_1 = three_shuffles[:(three_shuffles.duration_seconds/3)*1000]
shuffle_2 = three_shuffles[(three_shuffles.duration_seconds/3)*1000:(three_shuffles.duration_seconds/3)*2000]
shuffle_3 = three_shuffles[(three_shuffles.duration_seconds/3)*2000:]

In [6]:
path_list = [shuffle_1, shuffle_2, shuffle_3]
for audio in path_list:
    print(audio.dBFS)

-44.733926680137785
-42.455059634001046
-43.3040383325111


In [7]:
path_list = [shuffle_1, shuffle_2, shuffle_3]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Shuffle/1/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

 **2**

In [8]:
full_clip = AudioSegment.from_file('../../../Source/Uncut/Tap_Dance_Technique_2.webm', "webm")
full_clip.export('../../../Source/Uncut/Tap_Dance_Technique_2.wav', format="wav");

In [9]:
song = AudioSegment.from_wav('../../../Source/Uncut/Tap_Dance_Technique_2.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
# I noticed during feature extraction that this clip has a frame_width of 4 instead of 2
song = song.set_sample_width(2)
start = 7.5 * 1000
end = 9 * 1000
shuffle = song[start:end]

In [10]:
path_list = [shuffle]
for audio in path_list:
    print(audio.dBFS)

-33.76161323811785


In [11]:
path_list = [shuffle]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Shuffle/2/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

 **3** 

In [12]:
get_wav_mp4('../../../Source/Uncut/Shuffleballchange_3.mp4', '../../../Source/Uncut/Shuffle_Ball_Change_3.wav');

In [13]:
song = AudioSegment.from_wav('../../../Source/Uncut/Shuffle_Ball_Change_3.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 15 * 1000
end = 16 * 1000
shuffle = song[start:end]

In [14]:
path_list = [shuffle]
for audio in path_list:
    print(audio.dBFS)

-21.972861262735467


In [15]:
path_list = [shuffle]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Shuffle/3/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

**4**

In [16]:
get_wav_mp4('../../../Source/Uncut/Shuffle_Ball_Change_4.mp4', '../../../Source/Uncut/Shuffle_Ball_Change_4.wav');

In [17]:
song = AudioSegment.from_wav('../../../Source/Uncut/Shuffle_Ball_Change_4.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 29.5 * 1000
end = 33 * 1000
shuffle_bc_2x = song[start:end]

In [18]:
shuffle_1 = shuffle_bc_2x[0:975]
shuffle_2 = shuffle_bc_2x[2000:2800]

In [19]:
path_list = [shuffle_1, shuffle_2]
for audio in path_list:
    print(audio.dBFS)

-30.892766481744005
-32.590471903165735


In [20]:
path_list = [shuffle_1, shuffle_2]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Shuffle/4/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

## Ball change

**1** 

In [21]:
get_wav_mp4('../../../Source/Uncut/Ball_change_1.mp4', '../../../Source/Uncut/Ball_change_1.wav');

In [22]:
song = AudioSegment.from_wav('../../../Source/Uncut/Ball_change_1.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 48.5 * 1000
end = 52 * 1000
three_ball_changes = song[start:end]

In [23]:
ball_change_1 = three_ball_changes[:(three_ball_changes.duration_seconds/3)*1000]
ball_change_2 = three_ball_changes[(three_ball_changes.duration_seconds/3)*1000:(three_ball_changes.duration_seconds/3)*2000]
ball_change_3 = three_ball_changes[(three_ball_changes.duration_seconds/3)*2000:]

In [24]:
path_list = [ball_change_1, ball_change_2, ball_change_3]
for audio in path_list:
    print(audio.dBFS)

-38.117110514689955
-39.75640068176758
-39.402856369877874


In [25]:
path_list = [ball_change_1, ball_change_2, ball_change_3]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Ball_change/1/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

 **2** 

In [26]:
song = AudioSegment.from_wav('../../../Source/Uncut/Tap_Dance_Technique_2.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
# I during feature extraction that this clip has a frame_width of 4 instead of 2
song = song.set_sample_width(2)
start = 28 * 1000
end = 29.5 * 1000
ball_change = song[start:end]

In [27]:
path_list = [ball_change]
for audio in path_list:
    print(audio.dBFS)

-31.023806095257378


In [28]:
path_list = [ball_change]
for i in path_list:
    trimmed_sound = i
trimmed_sound.export('../../../Source/Ball_change/2/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

<_io.BufferedRandom name='../../../Source/Ball_change/2/1.wav'>

 **3** 

In [29]:
song = AudioSegment.from_wav('../../../Source/Uncut/Shuffle_Ball_Change_3.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 3.5 * 1000
end = 3.9 * 1000
ball_change = song[start:end]

In [30]:
path_list = [ball_change]
for audio in path_list:
    print(audio.dBFS)

-22.86180704870788


In [31]:
path_list = [ball_change]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Ball_change/3/'+ str(path_list.index(i)+1)+ '.wav', format='wav')

**4** 

In [32]:
song = AudioSegment.from_wav('../../../Source/Uncut/Shuffle_Ball_Change_4.wav')
song = song.set_channels(1)
#song = set_target_amplitude(song)
#song = song.normalize()
start = 29.5 * 1000
end = 33 * 1000
shuffle_bc_2x = song[start:end]

In [33]:
ball_change_1 = shuffle_bc_2x[975:2000]
ball_change_2 = shuffle_bc_2x[2800:]

In [34]:
path_list = [ball_change_1, ball_change_2]
for audio in path_list:
    print(audio.dBFS)

-28.747115691066142
-28.212430626121247


In [35]:
path_list = [ball_change_1, ball_change_2]
for i in path_list:
    trimmed_sound = i
    trimmed_sound.export('../../../Source/Ball_change/4/'+ str(path_list.index(i)+1)+ '.wav', format='wav')