In [3]:
import os
import numpy as np
import librosa
import pandas as pd

In [4]:
n = 20
hop = 512
features = []
labels = []

In [5]:
folder_path = "./rap"
mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]

for mp3_file in mp3_files:
    mp3_file_path = os.path.join(folder_path, mp3_file)

    y, sr = librosa.load(mp3_file_path, sr=None, mono=True, dtype=np.float32)
    # 移除靜音部分 & 正規化
    y = librosa.effects.trim(y)[0]
    y = librosa.util.normalize(y)
    # 計算 MFCC (n_mfcc, time_frames)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n, hop_length=hop)
    # 計算 MFCC 均值（特徵縮減為 (n_mfcc,)）
    mfcc_mean = np.mean(mfcc, axis=1)
    # 儲存特徵與標籤
    features.append(mfcc_mean)
    labels.append(0) 

print(f"成功處理 {len(features)} 首歌曲！")

成功處理 343 首歌曲！


In [6]:
folder_path = "./country"
mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]

for mp3_file in mp3_files:
    mp3_file_path = os.path.join(folder_path, mp3_file)

    y, sr = librosa.load(mp3_file_path, sr=None, mono=True, dtype=np.float32)

    y = librosa.effects.trim(y)[0]
    y = librosa.util.normalize(y)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n, hop_length=hop)

    mfcc_mean = np.mean(mfcc, axis=1)

    features.append(mfcc_mean)
    labels.append(1) 

print(f"成功處理 {len(features)} 首歌曲！")

成功處理 693 首歌曲！


In [7]:
folder_path = "./rock"
mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]

for mp3_file in mp3_files:
    mp3_file_path = os.path.join(folder_path, mp3_file)

    y, sr = librosa.load(mp3_file_path, sr=None, mono=True, dtype=np.float32)

    y = librosa.effects.trim(y)[0]
    y = librosa.util.normalize(y)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n, hop_length=hop)

    mfcc_mean = np.mean(mfcc, axis=1)

    features.append(mfcc_mean)
    labels.append(2) 

print(f"成功處理 {len(features)} 首歌曲！")

成功處理 992 首歌曲！


In [8]:
folder_path = "./jazz"
mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]

for mp3_file in mp3_files:
    mp3_file_path = os.path.join(folder_path, mp3_file)

    y, sr = librosa.load(mp3_file_path, sr=None, mono=True, dtype=np.float32)

    y = librosa.effects.trim(y)[0]
    y = librosa.util.normalize(y)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n, hop_length=hop)

    mfcc_mean = np.mean(mfcc, axis=1)

    features.append(mfcc_mean)
    labels.append(3) 

print(f"成功處理 {len(features)} 首歌曲！")

成功處理 1250 首歌曲！


In [9]:
features = np.array(features)
labels = np.array(labels)

song_ids = np.arange(1, features.shape[0] + 1)

df = pd.DataFrame(features, columns=[f"feature_{i+1}" for i in range(features.shape[1])])
df.insert(0, "song_id", song_ids)
df["label"] = labels
df.to_csv("dataset.csv", index=False, encoding="utf-8")

In [10]:
dataset = pd.read_csv('dataset.csv')
dataset

Unnamed: 0,song_id,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,label
0,1,-129.575760,122.92955,-22.193884,49.484856,-24.858639,33.370686,-13.389397,21.018896,-6.045021,...,1.320587,2.054224,-8.536629,8.460932,-5.219547,4.207222,-5.636729,5.987942,-1.939030,0
1,2,-131.593430,129.64317,-55.656467,49.287190,-16.282686,36.139780,0.741059,9.033503,0.837788,...,4.174528,14.427233,-0.645022,12.443151,-0.046594,3.284289,1.086975,4.504528,-0.723212,0
2,3,-183.298420,117.45908,-22.033175,24.566383,-31.206730,14.176015,-13.778350,1.269202,-3.213777,...,-7.469764,-5.896905,-8.906673,0.002047,-5.200934,-1.955609,-5.249220,2.472644,-1.365942,0
3,4,-95.231094,153.05118,-31.876870,46.072495,-15.342759,24.378073,-9.473240,8.595106,-1.803025,...,-0.164136,7.018112,-3.634860,5.738299,0.313524,6.666144,-0.732744,6.308126,-0.608489,0
4,5,-155.081760,109.98620,-17.222094,56.357445,-18.746206,35.473766,-13.958752,8.242919,-10.633527,...,-0.776032,5.664334,-6.793395,7.913438,-2.088957,4.183787,-3.599888,7.157237,-1.581926,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1245,1246,-384.545380,135.05170,61.236973,15.327516,2.267388,1.741465,0.613402,-3.293388,-6.442965,...,-1.981322,-0.430206,-0.302739,-0.766945,-1.365921,-2.578774,-3.835223,-3.981327,-3.143352,3
1246,1247,-183.545350,165.93260,-44.275177,64.189640,-20.742079,10.551808,15.094042,-17.288290,17.154568,...,0.874083,-9.400365,12.119840,-4.602041,4.376448,0.561713,-0.052728,1.737591,-8.408474,3
1247,1248,-210.830700,144.44810,-12.342857,51.418190,-7.980223,21.474247,-2.993782,6.727013,5.949357,...,-0.721226,4.139108,2.031794,1.941961,-3.558708,1.747616,-4.051151,-1.391530,-1.278148,3
1248,1249,-196.754380,163.67519,-26.161694,42.202408,-20.057478,9.205800,8.582489,-10.819203,5.415753,...,-3.150856,-1.696628,3.247919,-3.980751,3.575623,-2.898201,0.289493,-1.236437,-1.410076,3
