In [67]:
import pandas as pd
from youtube_search import YoutubeSearch
from pytubefix import YouTube
import re

### Load Scrapping Result

In [68]:
data = pd.read_csv('data/data.csv', sep=';')

data.head()

Unnamed: 0,nama,daerah,keyword,lirik
0,Kicir-Kicir,Jakarta,jakarta,
1,Ondel Ondel,jakarta,Jakarta,
2,Manuk Dadali,jawa barat,jawa barat,


### Search and Download Music Function

In [69]:
def search_yt(query):
    results = YoutubeSearch(query, max_results=3).to_dict()

    for i in range(len(results)):
        results[i]['url'] = 'https://www.youtube.com' + results[i]['url_suffix']
        
    return results

def normalized_yt_title(title):
    text = title.lower()
    text = text.replace(' ', '_')
    text = re.sub(r'[^a-z0-9_]', '', text)
    text = re.sub(r'_{2,}', '_', text)
    
    return text

def dl_video(query):
    try:
        yt = YouTube(query, 'MWEB')
        print(f'Downloading {yt.title}...')
        
        normalized_title = normalized_yt_title(yt.title)
        
        ys = yt.streams.get_audio_only()
        ys.download(mp3=True, output_path='songs', filename=normalized_title)
        
        return f'/songs/{normalized_title}.mp3'
    except Exception as e:
        print(e)
        return None
    

In [70]:
results = []

for index, row in data.iterrows():
    songs = search_yt(f"{row['nama']} asal {row['daerah']}")
    
    result_entry = {
        'nama': row['nama'],
        'daerah': row['daerah'],
        'keyword': row['keyword'],
        'songs': []
    }

    for song in songs:
        path = dl_video(song['url'])
        result_entry['songs'].append({
            'title': song['title'],
            'url': song['url'],
            'path': path
        })
    
    results.append(result_entry)


Downloading KICIR KICIR | Lagu Daerah DKI Jakarta - Betawi | Budaya Indonesia | Dongeng Kita...
Downloading KARAOKE KICIR KICIR    Lagu Daerah Jakarta...
Downloading Kicir Kicir (Lagu Daerah Betawi)...
Downloading ONDEL ONDEL | Lagu Daerah DKI Jakarta - Betawi | Budaya Indonesia | Dongeng Kita...
Downloading ONDEL ONDEL 💞 LAGU DAERAH DKI JAKARTA | ONDEL ONDEL BETAWI REMIX...
Downloading Sejarah Asal Muasal Nama ONDEL - ONDEL...
Downloading Lirik Lagu manuk Dadali...
Downloading Tari Manuk Dadali-Jawa Barat || "Keragaman Budaya Indonesia" XI IPS 4...


### Data Selection

In [65]:
df = pd.DataFrame(results)
df_songs = pd.DataFrame([song for result in results for song in result['songs']])

In [66]:
df_songs.head()

Unnamed: 0,title,url,path
0,KICIR KICIR | Lagu Daerah DKI Jakarta - Betawi...,https://www.youtube.com/watch?v=lhZf7qGZzsE&pp...,/songs/kicir_kicir_lagu_daerah_dki_jakarta_bet...
1,ONDEL ONDEL | Lagu Daerah DKI Jakarta - Betawi...,https://www.youtube.com/watch?v=wardyOl-EHo&pp...,/songs/ondel_ondel_lagu_daerah_dki_jakarta_bet...
2,Lirik Lagu manuk Dadali,https://www.youtube.com/watch?v=2xMLFGA12F0&pp...,/songs/lirik_lagu_manuk_dadali.mp3


In [37]:
import os

def convert_path(path):
    if path is None:
        return None
    return str(os.path.abspath(path))

df_songs['path'] = df_songs['path'].apply(convert_path)

In [38]:
df_songs.head()

Unnamed: 0,title,url,path
0,KICIR KICIR | Lagu Daerah DKI Jakarta - Betawi...,https://www.youtube.com/watch?v=lhZf7qGZzsE&pp...,c:\songs\kicir_kicir__lagu_daerah_dki_jakarta_...
1,KARAOKE KICIR KICIR Lagu Daerah Jakarta,https://www.youtube.com/watch?v=nbO8hjzAN20&pp...,c:\songs\karaoke_kicir_kicir____lagu_daerah_ja...
2,Kicir Kicir (Lagu Daerah Betawi),https://www.youtube.com/watch?v=CcFbCjM2jsM&pp...,c:\songs\kicir_kicir_lagu_daerah_betawi.mp3
3,senam kicir kicir lomba 2024,https://www.youtube.com/watch?v=NzWTp-eX02g&pp...,c:\songs\senam_kicir_kicir_lomba_2024.mp3
4,Tari Daerah ~ Kicir-kicir DKI Jakarta |Rumah B...,https://www.youtube.com/watch?v=0EJP74A3Ws4&pp...,c:\songs\tari_daerah_~_kicir-kicir_dki_jakarta...


In [39]:
#remove the first three character of path
def remove_first_three(path):
    if path is None:
        return None
    return path[3:]

df_songs['path'] = df_songs['path'].apply(remove_first_three)

In [40]:
df_songs.head()

Unnamed: 0,title,url,path
0,KICIR KICIR | Lagu Daerah DKI Jakarta - Betawi...,https://www.youtube.com/watch?v=lhZf7qGZzsE&pp...,songs\kicir_kicir__lagu_daerah_dki_jakarta_-_b...
1,KARAOKE KICIR KICIR Lagu Daerah Jakarta,https://www.youtube.com/watch?v=nbO8hjzAN20&pp...,songs\karaoke_kicir_kicir____lagu_daerah_jakar...
2,Kicir Kicir (Lagu Daerah Betawi),https://www.youtube.com/watch?v=CcFbCjM2jsM&pp...,songs\kicir_kicir_lagu_daerah_betawi.mp3
3,senam kicir kicir lomba 2024,https://www.youtube.com/watch?v=NzWTp-eX02g&pp...,songs\senam_kicir_kicir_lomba_2024.mp3
4,Tari Daerah ~ Kicir-kicir DKI Jakarta |Rumah B...,https://www.youtube.com/watch?v=0EJP74A3Ws4&pp...,songs\tari_daerah_~_kicir-kicir_dki_jakarta_ru...


#### Durasi Lagu
Masih error, path harus seperti ini:
`songs\senam kicir kicir lomba 2024.mp3`

In [41]:
from mutagen.mp3 import MP3
list_songs = []

for row in df_songs.iterrows():
    path = row[1]['path']
    if path is None:
        continue
    try:
        audio = MP3(path)
        print(f'Processing {path}...')
        list_songs.append({
            'title': row[1]['title'],
            'path': path,
            'duration': audio.info.length
        })
    except Exception as e:
        print(e)
        continue    

[Errno 2] No such file or directory: 'songs\\kicir_kicir__lagu_daerah_dki_jakarta_-_betawi__budaya_indonesia__dongeng_kita.mp3'
[Errno 2] No such file or directory: 'songs\\karaoke_kicir_kicir____lagu_daerah_jakarta.mp3'
[Errno 2] No such file or directory: 'songs\\kicir_kicir_lagu_daerah_betawi.mp3'
[Errno 2] No such file or directory: 'songs\\senam_kicir_kicir_lomba_2024.mp3'
[Errno 2] No such file or directory: 'songs\\tari_daerah_~_kicir-kicir_dki_jakarta_rumah_belajar_mawinsya.mp3'
[Errno 2] No such file or directory: 'songs\\ondel_ondel__lagu_daerah_dki_jakarta_-_betawi__budaya_indonesia__dongeng_kita.mp3'
[Errno 2] No such file or directory: 'songs\\ondel_ondel_💞_lagu_daerah_dki_jakarta__ondel_ondel_betawi_remix.mp3'
[Errno 2] No such file or directory: 'songs\\sejarah_asal_muasal_nama_ondel_-_ondel.mp3'
[Errno 2] No such file or directory: 'songs\\asal-usul_ondel-ondel__dongeng_bahasa_indonesia__akm_literasi_sd.mp3'
[Errno 2] No such file or directory: 'songs\\ondel_ondel_✅️_la

In [42]:
df_list_songs = pd.DataFrame(list_songs)