In [None]:
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt
import zipfile, shutil, os, sys, warnings
from pathlib import Path

try:
    from google.colab import files
    IN_COLAB = True
except Exception:
    IN_COLAB = False


MODE = "upload_zip"  #@param ["upload_zip", "paths"] {type:"string"}
LABEL_A = "\"ih\" Hang "  #@param {type:"string"}
LABEL_B = "\"oa\" Hang"  #@param {type:"string"}
MIN_FREQ_HZ = 20.0        #@param {type:"number"}
MIN_SEP_HZ  = 40.0        #@param {type:"number"}
MAX_DURATION_S = 30.0     #@param {type:"number"}
TRIM_SILENCE = True       #@param {type:"boolean"}


FOLDER_A_PATH = "/content/folder_A"  #@param {type:"string"}
FOLDER_B_PATH = "/content/folder_B"  #@param {type:"string"}

AUDIO_EXTS = {'.wav', '.mp3', '.flac', '.ogg', '.m4a', '.aac', '.wma', '.aiff', '.aif', '.opus'}

def two_dominant_freqs(y, sr, min_freq_hz=20.0, min_sep_hz=20.0):
    """
    Visszaadja a jel két legdominánsabb frekvenciáját Hz-ben (Welch PSD alapján).
    """
    if y.size < 32:
        return np.array([])

    if TRIM_SILENCE:
        import librosa
        yt, _ = librosa.effects.trim(y, top_db=40)
        if yt.size > 32:
            y = yt

    y = y - np.mean(y)
    if np.max(np.abs(y)) > 0:
        y = y / np.max(np.abs(y))

    n = len(y)
    nperseg = 65536
    if n < 2048:
        nperseg = max(1024, n)
    elif n < nperseg:
        nperseg = 2**int(np.floor(np.log2(n)))
    freqs, Pxx = signal.welch(y, fs=sr, nperseg=nperseg, noverlap=nperseg//2)

    mask = freqs >= min_freq_hz
    freqs, Pxx = freqs[mask], Pxx[mask]
    if freqs.size < 3:
        return np.array([])

    df = freqs[1] - freqs[0]
    distance_bins = max(1, int(min_sep_hz / df))
    prominence = 0.01 * np.max(Pxx)
    peaks, _ = signal.find_peaks(Pxx, prominence=prominence, distance=distance_bins)

    if peaks.size == 0:
        top_idx = np.argsort(Pxx)[-2:]
    elif peaks.size == 1:
        top_idx = np.array([peaks[0], np.argsort(Pxx)[-1]])
        if top_idx[1] == top_idx[0]:
            top_idx[1] = np.argsort(Pxx)[-2]
    else:
        top_idx = peaks[np.argsort(Pxx[peaks])[-2:]]

    order = np.argsort(Pxx[top_idx])[::-1]
    dom = freqs[top_idx][order]
    return dom

def collect_audio_paths(root: Path):
    return [p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in AUDIO_EXTS]

def load_audio(path: Path, max_duration_s=None):
    import librosa
    try:
        y, sr = librosa.load(str(path), sr=None, mono=True, duration=max_duration_s)
        return y, sr
    except Exception as e:
        warnings.warn(f"Hiba a betöltésnél: {path.name} -> {e}")
        return None, None

def process_folder(folder: Path, label: str):
    paths = collect_audio_paths(folder)
    results = []
    for p in sorted(paths):
        y, sr = load_audio(p, MAX_DURATION_S)
        if y is None:
            continue
        dom = two_dominant_freqs(y, sr, MIN_FREQ_HZ, MIN_SEP_HZ)
        if dom.size >= 2:
            results.append({"file": p.name, "f1_hz": float(dom[0]), "f2_hz": float(dom[1]), "label": label})
        else:
            print(f"  Kihagyva (nincs 2 megbízható csúcs): {p.name}")
    return results

def extract_zip_to(target_dir: Path, zip_filename: str):
    target_dir.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_filename, 'r') as zf:
        zf.extractall(target_dir)
    print(f"Kicsomagolva ide: {target_dir}")

base = Path("/content")
folder_a = Path("/content/folder_A")
folder_b = Path("/content/folder_B")

if MODE == "upload_zip":
    if not IN_COLAB:
        print("Ez a mód a Colab feltöltőjét használja. Állítsd MODE='paths'-ra, ha nem Colabban futtatod.")
    print("  Töltsd fel az A mappához tartozó ZIP-et…")
    upA = files.upload()
    zipA = next(iter(upA.keys()))
    extract_zip_to(folder_a, zipA)

    print("  Töltsd fel a B mappához tartozó ZIP-et…")
    upB = files.upload()
    zipB = next(iter(upB.keys()))
    extract_zip_to(folder_b, zipB)
else:
    folder_a = Path(FOLDER_A_PATH)
    folder_b = Path(FOLDER_B_PATH)
    assert folder_a.exists(), f"Nem találom az A mappát: {folder_a}"
    assert folder_b.exists(), f"Nem találom a B mappát: {folder_b}"

print(f"\nA mappa: {folder_a}")
res_a = process_folder(folder_a, LABEL_A)
print(f"\nB mappa: {folder_b}")
res_b = process_folder(folder_b, LABEL_B)

all_a = np.array([[r["f1_hz"], r["f2_hz"]] for r in res_a]) if len(res_a) else np.empty((0,2))
all_b = np.array([[r["f1_hz"], r["f2_hz"]] for r in res_b]) if len(res_b) else np.empty((0,2))

plt.figure(figsize=(7,6))
if all_a.size:
    plt.scatter(all_a[:,0], all_a[:,1], c="red", label=LABEL_A, alpha=0.85)
if all_b.size:
    plt.scatter(all_b[:,0], all_b[:,1], c="blue", label=LABEL_B, alpha=0.85)

plt.xlabel("1. domináns frekvencia (Hz)")
plt.ylabel("2. domináns frekvencia (Hz)")
plt.title("Domináns frekvenciák – két mappa (x=f1, y=f2)")
plt.grid(True, which="both", linestyle="--", linewidth=0.5, alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

print("\nEredmények (A):")
for r in res_a:
    print(f"{r['file']}: {r['f1_hz']:.2f} Hz, {r['f2_hz']:.2f} Hz")
print("\nEredmények (B):")
for r in res_b:
    print(f"{r['file']}: {r['f1_hz']:.2f} Hz, {r['f2_hz']:.2f} Hz")
