In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import warnings
from collections import Counter

warnings.filterwarnings('ignore')

# 1. Ładujemy bazę 3-sekundową - jest dużo dokładniejsza
df_base = pd.read_csv("lib/gtzan-dataset-music-genre-classification/Data/features_3_sec.csv")
# Usuwamy zbędne kolumny
data_numeric = df_base.drop(columns=['filename', 'length', 'label'])
labels = df_base['label']

# 2. Obliczamy statystyki wzorcowe
genre_means = df_base.drop(columns=['filename', 'length']).groupby('label').mean()
global_std = data_numeric.std()

def extract_segment_features(y, sr):
    """Ekstrakcja cech z krótkiego fragmentu dźwięku"""
    f = {}
    f['chroma_stft_mean'] = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    f['chroma_stft_var'] = np.var(librosa.feature.chroma_stft(y=y, sr=sr))
    f['rms_mean'] = np.mean(librosa.feature.rms(y=y))
    f['rms_var'] = np.var(librosa.feature.rms(y=y))
    f['spectral_centroid_mean'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    f['spectral_centroid_var'] = np.var(librosa.feature.spectral_centroid(y=y, sr=sr))
    f['spectral_bandwidth_mean'] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    f['spectral_bandwidth_var'] = np.var(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    f['rolloff_mean'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    f['rolloff_var'] = np.var(librosa.feature.spectral_rolloff(y=y, sr=sr))
    f['zero_crossing_rate_mean'] = np.mean(librosa.feature.zero_crossing_rate(y))
    f['zero_crossing_rate_var'] = np.var(librosa.feature.zero_crossing_rate(y))

    y_harm, y_perc = librosa.effects.hpss(y)
    f['harmony_mean'], f['harmony_var'] = np.mean(y_harm), np.var(y_harm)
    f['perceptr_mean'], f['perceptr_var'] = np.mean(y_perc), np.var(y_perc)

    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    f['tempo'] = tempo[0] if isinstance(tempo, (np.ndarray, list)) else tempo

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    for i in range(1, 21):
        f[f'mfcc{i}_mean'] = np.mean(mfccs[i-1])
        f[f'mfcc{i}_var'] = np.var(mfccs[i-1])
    return pd.Series(f)

def classify_optimized(file_path, num_segments=5):
    """
    Analizuje kilka fragmentów utworu i wybiera gatunek drogą głosowania.
    """
    # Wczytujemy tylko pierwsze 15 sekund (szybkość!)
    y_full, sr = librosa.load(file_path, duration=15)
    seg_len = 3 * sr

    votes = []

    for i in range(num_segments):
        start = i * seg_len
        if start + seg_len > len(y_full): break

        y_seg = y_full[start:start+seg_len]
        f_seg = extract_segment_features(y_seg, sr)

        # Obliczamy odległość Euklidesową znormalizowaną (Z-score)
        # To kluczowe: każda cecha waży tyle samo dzięki podzieleniu przez global_std
        distances = {}
        for genre in genre_means.index:
            diff = (f_seg - genre_means.loc[genre]).abs() / global_std
            distances[genre] = diff.sum()

        votes.append(min(distances, key=distances.get))

    # Zwraca najczęstszy wynik (głosowanie)
    return Counter(votes).most_common(1)[0][0]

In [3]:
import time

def run_test(base_dir, limit=5):
    results = []
    start_time = time.time()

    genres = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

    for genre in genres:
        folder_path = os.path.join(base_dir, genre)
        files = [f for f in os.listdir(folder_path) if f.endswith('.wav')][:limit]

        print(f"Analizuję {genre}...")
        for file in files:
            path = os.path.join(folder_path, file)
            try:
                pred = classify_optimized(path)
                results.append({'Real': genre, 'Pred': pred})
            except:
                continue

    total_time = time.time() - start_time
    report = pd.DataFrame(results)
    accuracy = (report['Real'] == report['Pred']).mean() * 100

    print(f"\nGotowe! Czas: {total_time:.1f}s")
    print(f"Skuteczność: {accuracy:.2f}%")
    return report


report = run_test('lib/gtzan-dataset-music-genre-classification/Data/genres_original', limit=10)

Analizuję blues...
Analizuję classical...
Analizuję country...
Analizuję disco...
Analizuję hiphop...
Analizuję jazz...
Analizuję metal...
Analizuję pop...
Analizuję reggae...
Analizuję rock...

Gotowe! Czas: 120.4s
Skuteczność: 55.00%
