# Project Introduction

Tiktok is a famous short-form video platform. And there are various background music pieces for users to choose from for short-form video editing and creation. In this project, we create a top TikTok song dataset. We annotated beat and downbeat time stamps on these songs first, then used this dataset to evaluate two famous beat-tracking platforms: Madmom and Librosa. The results show that the average f-measure score of this dataset is 0.92 and 0.62 on Madmom and Librosa, respectively. We hope that this dataset will be helpful for the research work of the MIR community. The colab notebook could be accessed [here](https://colab.research.google.com/drive/1gZNCXWFaJWpxMn_TeDP5FNk69dHrWQ5S?usp=sharing).

# Dataset Introduction

This dataset contains 375 tracks, each one is 30-second long. The data structure of each track is as follows:

```
{
track_id: ‘0000’,
audio_path: ‘/content/drive/MyDrive/MIR_final/audio/0000.wav’,
rank: ‘1’,
song_name: ‘Love You So’,
segment: ‘1’
times...
labels...
downbeats...
}
```

We provide a [link](https://drive.google.com/uc?export=download&id=1U7J-dDlCob_s_KF5fu5nHbEXKIpCYVVg) to the annotation file, which you can download directly to your Google Drive via the script in the `.ipynb` file with `dataset.download_annotation()`. For copyright considerations, we will not provide the original audio files directly to the user. However, users can download and pre-process the data themselves through `dataset.download_audio()`. Since the downloaded audio is mp3, but most packages in python can’t read mp3 files directly, users need to install [ffmpeg](https://ffmpeg.org/) on their computers. (ffmpeg is installed by default in colab.)

## Encapsulation of the dataset



In [None]:
!pip install pydub --quiet --exists-action i
!pip install madmom --quiet --exists-action i
import os
import madmom
from pydub import AudioSegment
import numpy as np
import requests
import string
import pandas as pd
import urllib
import sys
import glob
import soundfile as sf
import gdown
import zipfile

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting madmom
  Downloading madmom-0.16.1.tar.gz (20.0 MB)
[K     |████████████████████████████████| 20.0 MB 1.5 MB/s 
Collecting mido>=1.2.8
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 7.4 MB/s 
[?25hBuilding wheels for collected packages: madmom
  Building wheel for madmom (setup.py) ... [?25l[?25hdone
  Created wheel for madmom: filename=madmom-0.16.1-cp38-cp38-linux_x86_64.whl size=21153369 sha256=1ff660d873f5888e565ee75af7e2c8ae00d09e3d8ee423b2fd5fcd238c6477e3
  Stored in directory: /root/.cache/pip/wheels/a2/fc/0f/2d5569ea771ee70a9bf321a72744b4507eb85ff540d82b54eb
Successfull

In [None]:
class Track:
    def __init__(self, id, path = '.', audio_path = None, annotation_path = None,  rank = '', song_name = '', segment = ''):
        self.track_id = id
        self.songname = song_name
        if audio_path == None:
            self.audio_path = path + '/audio/'+id+'.wav'
        else:
            self.audio_path = path + audio_path
        if annotation_path == None:
            self.annotation_path = path + '/annotation/'+id+'.txt'
        else: 
            self.annotation_path = path + annotation_path   
        self.segment = segment
        self.rank = rank
        self.song_name = song_name
        annotation = np.loadtxt(self.annotation_path)
        self.times = annotation[:,0]
        self.labels = annotation[:,1]
        self.downbeats = [ self.times[i] for i in range(len(self.times)) if self.labels[i] == 1] 
        
    
    def __str__(self):
        return f"track_id: {self.track_id},\naudio_path: {self.audio_path},\nrank: {self.rank},\nsong_name: {self.song_name},\nsegment: {self.segment}\ntimes...\nlabels...\ndownbeats..."

    def audio(self):
        '''
        Load the audio from the track as mono
        '''
        y, fs = sf.read(self.audio_path)
        if y.ndim > 1:
            y = np.mean(y, axis=1)
        return y, fs



class Dataset:
    def __init__(self, home = '.', metadata_path = '/annotation/annotation.csv'):
        self.home = home 
        self.metadata_path = home + metadata_path
        self.tracks = None
        if not os.path.exists(home): 
            os.mkdir(home)
    
    def __str__(self):
        if self.tracks == None:
            return f"home: {self.home}, tracks not initialized. Call dataset.initialize() before loading tracks"
        return f"home : {self.home}, {len(self.tracks)} tracks in total. Load them with dataset.tracks"

    def validate(self):
        '''
        Print if there are missing files    
        '''
        num_missing = 0
        missing_files = []
        import os
        if not os.path.exists(self.metadata_path):
           print(self.metadata_path, ' is missing!')
           return None
        import pandas as pd
        df = pd.read_csv(self.metadata_path, dtype = 'str')
        for i in range(len(df)):
            audio_path = self.home + df.loc[i]['audio_path']
            annotation_path = self.home + df.loc[i]['annotation_path']
            if not os.path.exists(audio_path):
                missing_files.append(audio_path);
            if not os.path.exists(annotation_path):
                missing_files.append(annotation_path)
        num_missing = len(missing_files)
        if num_missing > 0:
            print(num_missing,' files missing: ')
            print(*missing_files, sep = '\n')
            # print('\n'.join(missing_files))
            return
        print('No files missing, ready to initialize!')
    
    def initialize(self):
        '''
        initialize the tracks in dataset
        '''
        data = pd.read_csv(self.metadata_path, dtype = 'str').to_dict('index')
        track_dict = {}
        for i in range(len(data)):
            t = data[i]
            track_id = data[i]['track_id']
            track = Track(track_id, self.home, t['audio_path'], t['annotation_path'], t['rank'], t['name'], t['segment'])
            track_dict[data[i]['track_id']] = track
            self.tracks = track_dict
    
    def download(self):
        '''
        Download All annotation and audio files
        '''
        self.download_annotation()
        self.download_audio()


    
    def download_audio(self):
        '''
        Get all audio files
        '''
        d = self.home + '/wav'
        if os.path.exists(d):
            arr = glob.glob(d+'/*')
            for f in arr:
                os.remove(f)
            os.rmdir(d)
        os.mkdir(d)

        URL = "https://hot.trendpop.social/sounds/year/2022-01-01"
        r = requests.get(URL)
        c = r.content
        s = r.text.split("\"title\":\"")
        sys.stdout.write("Downloading mp3 and Saving to DIR: " + self.home+'\n0%')
        count = 0

        for i in range(1,len(s),1):
            print_process = len(s)/100
            songname = s[i].split("\"")[0]
            songname = songname.replace('\\u0026','&')
            songname = songname.replace('\\u003c','<')
            songname = songname.replace('\\u003e','>')
            t = s[i].split("sound_url\":\"")
            r = s[i].split("rank\":")
            rank = str(r[2]).split(",")[0]
            if(len(t) > 1):
                u = t[1].split("\"")[0]
                # print("#",rank,":",songname,", url= ",u)
                filename = d+'/'+str(rank)+"_"+songname+'.mp3'
                urllib.request.urlretrieve(u, filename) ##download file to filename(same dir)
                count += 1
            sys.stdout.write('\r')
            sys.stdout.write( str( int((i)/print_process ))+ '%')
                
        sys.stdout.write("\rDownload Finished! " + str(count) + " Files in total!")
        arr = glob.glob(d+'/*.mp3')
        for f in arr:
            sound = AudioSegment.from_file(f)
            sound.export(f[:-4]+'.wav', format="wav")
            os.remove(f)
        print('Convert mp3 to wav finished!')
        self.cut_audiofiles()



    def download_annotation(self):
        '''
        Get all annotation files
        '''
        import gdown, zipfile
        url = 'https://drive.google.com/uc?export=download&id=1U7J-dDlCob_s_KF5fu5nHbEXKIpCYVVg'
        output = self.home + '/annotation.zip'
        gdown.download(url, output, quiet=False)
        with zipfile.ZipFile(self.home +'/annotation.zip', 'r') as zip_ref:
            zip_ref.extractall(self.home)
        os.remove(self.home + '/annotation.zip')
    
    def cut_audiofiles(self):
        '''
        Read the rank and segment of the clips we want from the csv file,
        from the wav files named rank_songname.wav, cut the audio we want
        '''
        import os, glob, pandas
        # read csv to dict
        df = pd.read_csv(self.metadata_path, dtype = str).to_dict('index')
        # clear and initialize the audio path
        if os.path.exists(self.home + '/audio'):
            arr = glob.glob(self.home + '/audio/*')
            for f in arr:
                os.remove(f)
            os.rmdir(self.home + '/audio')
        os.mkdir(self.home + '/audio')

        # cut the audio with segments
        d = self.home + '/wav'
        for i in range(len(df)):
            t = df[i]
            audio_path = d+'/' + t['rank'] + "_" + t['name'] + '.wav'
            segment = int(t['segment'])
            self.__get_audio(audio_path, self.home + '/audio/'+ t['track_id']+'.wav', segment)
        arr = glob.glob(d + '/*')
        for f in arr:
            os.remove(f)
        os.rmdir(d)


    def __get_audio( self,audio_path, save_path, segment = 0,sr = 44100, dur = 30, t_fade = 0.01):
        '''
        a private helper function for cut audio
        '''
        import soundfile as sf
        # read audio and convert to mono
        y, fs = sf.read(audio_path)
        if y.ndim > 1:
            y = np.mean(y, axis=1)
        filename = audio_path.split('/')[-1]
        filename = save_path+ '/' + filename[:-4]
        # create window for fade in/out
        l = int(sr*dur)
        w = np.ones(l,dtype = float)
        slope_len = int(t_fade*sr)
        slope = np.linspace(1,0,slope_len,True)
        w[-slope_len:] = slope
        slope = np.linspace(0,1,slope_len,True)
        w[0:slope_len] = slope
        # obtain and save the audio as the id.wav
        data = y[ segment * l: (segment + 1) * l ] * w
        sf.write(save_path, data, fs)
        