In [1]:
import os, json, math, random, time, shutil
from pathlib import Path
import numpy as np

# 스레드 폭주 방지(로컬에서도 안정성에 도움)
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

# 프로젝트 루트: 이 파일(노트북)이 notebooks/ 아래에 있다고 가정
ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
DATA = ROOT / "data"
OUT  = ROOT / "out" / "v4"
OUT.mkdir(parents=True, exist_ok=True)

ESC50_DIR   = DATA / "esc-50"
MS_SNSD_DIR = DATA / "MS-SNSD-NOISE"
SNEEZE_DIR  = DATA / "sneeze"
REC_DIR     = DATA / "recordings"

print("ROOT:", ROOT)
print("DATA:", DATA)
print("OUT :", OUT)

assert ESC50_DIR.exists(), ESC50_DIR
assert MS_SNSD_DIR.exists(), MS_SNSD_DIR
assert SNEEZE_DIR.exists(), SNEEZE_DIR
assert REC_DIR.exists(), REC_DIR


ROOT: c:\Dev\sneezing_detection\legacy_code
DATA: c:\Dev\sneezing_detection\legacy_code\data
OUT : c:\Dev\sneezing_detection\legacy_code\out\v4


In [2]:
CONFIG_PATH = OUT / "config_v4.json"

cfg = {
  "version": "v4",
  "seed": 1337,
  "audio": {"sr": 16000, "clip_seconds": 2.0},
  "features": {"type":"logmel","n_mels":64,"n_fft":400,"hop_length":160,"center":False,"log_eps":1e-6},
  "normalization": {"mode":"dataset_stats","rms_target_range":[0.03,0.15],"rms_apply_prob":1.0},
  "dataset_sizes": {"pos_total":12000,"neg_total":30000,"pos_mix_ratio":0.70},
  "negative_plan": {
    "event_ratio":0.60,"background_ratio":0.40,
    "event_sources":{"esc50_excluding_sneeze":0.40,"yaho":0.35,"noise1_noise2":0.25},
    "background_sources":{"ms_snsd":0.50,"talk":0.30,"dish":0.20}
  },
  "positive_plan": {
    "original_ratio":0.30,"synthetic_ratio":0.70,
    "background_pool":{"ms_snsd":0.40,"talk":0.40,"dish":0.20},
    "snr_db_range_bg":[0.0,20.0],
    "optional_event_on_pos":{
      "apply_prob":0.15,
      "event_pool":{"yaho":0.50,"esc50_excluding_sneeze":0.30,"noise1_noise2":0.20},
      "snr_db_range_event":[15.0,30.0]
    }
  },
  "augment": {
    "gain_db_range":[-6.0,6.0],
    "time_shift_ms":200,
    "time_stretch":{"apply_prob":0.10,"rate_range":[0.95,1.05]},
    "pitch_shift":{"apply_prob":0.05,"semitones_range":[-0.25,0.25]},
    "specaugment":{"apply_prob":0.10,"time_masks":1,"time_mask_max":12,"freq_masks":1,"freq_mask_max":6}
  },
  "splits": {"train":0.70,"val":0.15,"test":0.15},
  "training": {"batch_size":64,"epochs":100,"lr":0.001,"early_stopping_patience":6,"reduce_lr_patience":2,"reduce_lr_factor":0.5},
  "thresholding": {"method":"precision_target","target_precision":0.99,"fallback_threshold":0.90},
  "export": {"tflite_dynamic":True}
}

CONFIG_PATH.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
print("saved:", CONFIG_PATH)


saved: c:\Dev\sneezing_detection\legacy_code\out\v4\config_v4.json


In [3]:
import csv
import librosa
import soundfile as sf
from tqdm import tqdm


In [4]:
SEED = int(cfg["seed"])
random.seed(SEED)
np.random.seed(SEED)
print("seed:", SEED)


seed: 1337


In [5]:
# recordings 필수 파일
TALK_WAV   = REC_DIR / "talk.wav"
DISH_WAV   = REC_DIR / "dish.wav"
YAHO_WAV   = REC_DIR / "yaho.wav"
NOISE1_WAV = REC_DIR / "noise1.wav"
NOISE2_WAV = REC_DIR / "noise2.wav"

for p in [TALK_WAV, DISH_WAV, YAHO_WAV, NOISE1_WAV, NOISE2_WAV]:
    assert p.exists(), f"missing: {p}"

# sneeze clips
sneeze_files = sorted([p for p in SNEEZE_DIR.rglob("*.wav")])
assert len(sneeze_files) > 0
print("sneeze clips:", len(sneeze_files))

# ESC-50
esc_meta = ESC50_DIR / "meta" / "esc50.csv"
esc_audio_dir = ESC50_DIR / "audio"
assert esc_meta.exists(), esc_meta
assert esc_audio_dir.exists(), esc_audio_dir

esc_rows = []
with open(esc_meta, "r", encoding="utf-8") as f:
    r = csv.DictReader(f)
    for row in r:
        esc_rows.append(row)

esc_event_files = []
for row in esc_rows:
    label = row["category"].strip().lower()
    fname = row["filename"].strip()
    if "sneeze" in label:
        continue
    wav = esc_audio_dir / fname
    if wav.exists():
        esc_event_files.append(wav)

print("esc-50 event files(excl sneeze):", len(esc_event_files))

# MS-SNSD
ms_snsd_files = sorted([p for p in MS_SNSD_DIR.rglob("*.wav")])
assert len(ms_snsd_files) > 0
print("ms-snsd wav files:", len(ms_snsd_files))


sneeze clips: 968
esc-50 event files(excl sneeze): 2000
ms-snsd wav files: 128


In [6]:
SR = int(cfg["audio"]["sr"])
CLIP_SEC = float(cfg["audio"]["clip_seconds"])
CLIP_SAMPLES = int(SR * CLIP_SEC)

N_MELS = int(cfg["features"]["n_mels"])
N_FFT  = int(cfg["features"]["n_fft"])
HOP    = int(cfg["features"]["hop_length"])
CENTER = bool(cfg["features"]["center"])
LOG_EPS = float(cfg["features"]["log_eps"])

def rms(x):
    x = np.asarray(x, np.float32)
    return float(np.sqrt(np.mean(x*x) + 1e-8))

def fix_2s(y):
    y = np.asarray(y, np.float32)
    if len(y) >= CLIP_SAMPLES:
        return y[:CLIP_SAMPLES]
    return np.pad(y, (0, CLIP_SAMPLES - len(y))).astype(np.float32)

def rand_crop_2s(y):
    y = np.asarray(y, np.float32)
    if len(y) <= CLIP_SAMPLES:
        return fix_2s(y)
    start = np.random.randint(0, len(y) - CLIP_SAMPLES + 1)
    return y[start:start+CLIP_SAMPLES].astype(np.float32)

def load_mono(path, sr=SR):
    y, _ = librosa.load(str(path), sr=sr, mono=True)
    return y.astype(np.float32)

def apply_gain_db(y, db):
    g = 10 ** (db / 20.0)
    return np.clip(y * g, -1.0, 1.0).astype(np.float32)

def rms_randomize(y, lo, hi):
    target = float(np.random.uniform(lo, hi))
    r = rms(y)
    if r > 1e-6:
        y = y * (target / (r + 1e-8))
    return np.clip(y, -1.0, 1.0).astype(np.float32)

def mix_at_snr(signal, background, snr_db):
    s = fix_2s(signal)
    b = fix_2s(background)
    rs, rb = rms(s), rms(b)
    if rb < 1e-6:
        return s
    alpha = (rs + 1e-8) / ((rb + 1e-8) * (10 ** (snr_db / 20.0)))
    y = s + b * alpha
    return np.clip(y, -1.0, 1.0).astype(np.float32)

def time_shift(y, max_ms=200):
    max_samp = int(SR * (max_ms / 1000.0))
    if max_samp <= 0:
        return y
    k = np.random.randint(-max_samp, max_samp + 1)
    return np.roll(y, k).astype(np.float32)

def maybe_time_stretch(y, prob, r_lo, r_hi):
    if np.random.rand() > prob:
        return y
    rate = float(np.random.uniform(r_lo, r_hi))
    ys = librosa.effects.time_stretch(y, rate=rate).astype(np.float32)
    return fix_2s(ys)

def maybe_pitch_shift(y, prob, s_lo, s_hi):
    if np.random.rand() > prob:
        return y
    steps = float(np.random.uniform(s_lo, s_hi))
    yp = librosa.effects.pitch_shift(y, sr=SR, n_steps=steps).astype(np.float32)
    return fix_2s(yp)

def logmel(y):
    S = librosa.feature.melspectrogram(
        y=y, sr=SR, n_fft=N_FFT, hop_length=HOP,
        n_mels=N_MELS, power=2.0, center=CENTER
    )
    return np.log(S + LOG_EPS).T.astype(np.float32)  # (frames, mels)


In [7]:
talk_audio  = load_mono(TALK_WAV)
dish_audio  = load_mono(DISH_WAV)
yaho_audio  = load_mono(YAHO_WAV)
noise1_audio = load_mono(NOISE1_WAV)
noise2_audio = load_mono(NOISE2_WAV)

def sample_from_long(y_long):
    return rand_crop_2s(y_long)

def sample_esc50_2s(path):
    y = load_mono(path)
    return rand_crop_2s(y)

def sample_ms_snsd_2s(path):
    y = load_mono(path)
    return rand_crop_2s(y)


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
pos_total = int(cfg["dataset_sizes"]["pos_total"])
neg_total = int(cfg["dataset_sizes"]["neg_total"])
N_TOTAL = pos_total + neg_total

tmp = logmel(np.zeros(CLIP_SAMPLES, np.float32))
FRAMES = int(tmp.shape[0])
assert tmp.shape[1] == N_MELS
print("FRAMES:", FRAMES, "MELS:", N_MELS, "N_TOTAL:", N_TOTAL)

X_PATH = OUT / "v4_features_f32.dat"
Y_PATH = OUT / "v4_labels_i8.dat"
META_PATH = OUT / "v4_meta.json"

# 이미 존재하면 재생성 여부 결정(실수 방지)
if X_PATH.exists() or Y_PATH.exists():
    print("memmap exists. delete to regenerate:", X_PATH, Y_PATH)

X_mm = np.memmap(str(X_PATH), dtype="float32", mode="w+", shape=(N_TOTAL, FRAMES, N_MELS))
y_mm = np.memmap(str(Y_PATH), dtype="int8", mode="w+", shape=(N_TOTAL,))

sum_m = np.zeros((N_MELS,), np.float64)
sumsq_m = np.zeros((N_MELS,), np.float64)
count_tf = 0

def weighted_choice(items, weights):
    w = np.asarray(weights, np.float64)
    w = w / w.sum()
    idx = np.random.choice(len(items), p=w)
    return items[idx]

pos_original_n = int(pos_total * float(cfg["positive_plan"]["original_ratio"]))
pos_synth_n    = pos_total - pos_original_n
neg_event_n = int(neg_total * float(cfg["negative_plan"]["event_ratio"]))
neg_bg_n    = neg_total - neg_event_n

bg_pool_items = ["ms_snsd", "talk", "dish"]
bg_pool_w = [
    cfg["positive_plan"]["background_pool"]["ms_snsd"],
    cfg["positive_plan"]["background_pool"]["talk"],
    cfg["positive_plan"]["background_pool"]["dish"],
]

pos_evt_items = ["yaho", "esc", "noise12"]
pos_evt_w = [
    cfg["positive_plan"]["optional_event_on_pos"]["event_pool"]["yaho"],
    cfg["positive_plan"]["optional_event_on_pos"]["event_pool"]["esc50_excluding_sneeze"],
    cfg["positive_plan"]["optional_event_on_pos"]["event_pool"]["noise1_noise2"],
]

neg_evt_items = ["esc", "yaho", "noise12"]
neg_evt_w = [
    cfg["negative_plan"]["event_sources"]["esc50_excluding_sneeze"],
    cfg["negative_plan"]["event_sources"]["yaho"],
    cfg["negative_plan"]["event_sources"]["noise1_noise2"],
]

neg_bg_items = ["ms_snsd", "talk", "dish"]
neg_bg_w = [
    cfg["negative_plan"]["background_sources"]["ms_snsd"],
    cfg["negative_plan"]["background_sources"]["talk"],
    cfg["negative_plan"]["background_sources"]["dish"],
]

rms_lo, rms_hi = cfg["normalization"]["rms_target_range"]
snr_bg_lo, snr_bg_hi = cfg["positive_plan"]["snr_db_range_bg"]
pos_evt_prob = float(cfg["positive_plan"]["optional_event_on_pos"]["apply_prob"])
snr_evt_lo, snr_evt_hi = cfg["positive_plan"]["optional_event_on_pos"]["snr_db_range_event"]

aug = cfg["augment"]
gain_lo, gain_hi = aug["gain_db_range"]
shift_ms = int(aug["time_shift_ms"])
ts_prob = float(aug["time_stretch"]["apply_prob"])
ts_lo, ts_hi = aug["time_stretch"]["rate_range"]
ps_prob = float(aug["pitch_shift"]["apply_prob"])
ps_lo, ps_hi = aug["pitch_shift"]["semitones_range"]

def sample_background(source_name):
    if source_name == "talk":
        return sample_from_long(talk_audio)
    if source_name == "dish":
        return sample_from_long(dish_audio)
    if source_name == "ms_snsd":
        return sample_ms_snsd_2s(random.choice(ms_snsd_files))
    raise ValueError(source_name)

def sample_event(source_name):
    if source_name == "yaho":
        return sample_from_long(yaho_audio)
    if source_name == "noise12":
        return sample_from_long(noise1_audio if np.random.rand() < 0.5 else noise2_audio)
    if source_name == "esc":
        return sample_esc50_2s(random.choice(esc_event_files))
    raise ValueError(source_name)

def apply_audio_aug(y):
    y = apply_gain_db(y, float(np.random.uniform(gain_lo, gain_hi)))
    y = time_shift(y, max_ms=shift_ms)
    y = maybe_time_stretch(y, ts_prob, ts_lo, ts_hi)
    y = maybe_pitch_shift(y, ps_prob, ps_lo, ps_hi)
    y = rms_randomize(y, rms_lo, rms_hi)
    return y

def write_feature(i, y_audio, label):
    global sum_m, sumsq_m, count_tf
    f = logmel(y_audio)
    if f.shape[0] > FRAMES:
        f = f[:FRAMES, :]
    elif f.shape[0] < FRAMES:
        f = np.pad(f, ((0, FRAMES - f.shape[0]), (0, 0)), mode="constant")

    X_mm[i, :, :] = f.astype(np.float32)
    y_mm[i] = np.int8(label)

    sum_m += f.sum(axis=0)
    sumsq_m += (f * f).sum(axis=0)
    count_tf += f.shape[0]

i = 0

for _ in tqdm(range(pos_original_n), desc="pos_original"):
    p = random.choice(sneeze_files)
    y = apply_audio_aug(rand_crop_2s(load_mono(p)))
    write_feature(i, y, 1); i += 1

for _ in tqdm(range(pos_synth_n), desc="pos_synth"):
    p = random.choice(sneeze_files)
    sneeze = rand_crop_2s(load_mono(p))
    bg = sample_background(weighted_choice(bg_pool_items, bg_pool_w))
    snr_bg = float(np.random.uniform(snr_bg_lo, snr_bg_hi))
    y = mix_at_snr(sneeze, bg, snr_bg)

    if np.random.rand() < pos_evt_prob:
        evt = sample_event(weighted_choice(pos_evt_items, pos_evt_w))
        snr_evt = float(np.random.uniform(snr_evt_lo, snr_evt_hi))
        y = mix_at_snr(y, evt, snr_evt)

    y = apply_audio_aug(y)
    write_feature(i, y, 1); i += 1

for _ in tqdm(range(neg_bg_n), desc="neg_bg"):
    y = apply_audio_aug(sample_background(weighted_choice(neg_bg_items, neg_bg_w)))
    write_feature(i, y, 0); i += 1

for _ in tqdm(range(neg_event_n), desc="neg_event"):
    y = sample_event(weighted_choice(neg_evt_items, neg_evt_w))
    if np.random.rand() < 0.50:
        bg = sample_background(weighted_choice(neg_bg_items, neg_bg_w))
        snr = float(np.random.uniform(5.0, 25.0))
        y = mix_at_snr(y, bg, snr)
    y = apply_audio_aug(y)
    write_feature(i, y, 0); i += 1

X_mm.flush()
y_mm.flush()

meta = {
  "version":"v4",
  "sr":SR, "clip_seconds":CLIP_SEC,
  "frames":FRAMES, "mels":N_MELS,
  "n_total":int(N_TOTAL),
  "pos_total":int(pos_total),
  "neg_total":int(neg_total),
  "features_path":str(X_PATH),
  "labels_path":str(Y_PATH),
}
META_PATH.write_text(json.dumps(meta, indent=2), encoding="utf-8")

print("written:", i, "expected:", N_TOTAL)
print("saved:", META_PATH)


FRAMES: 198 MELS: 64 N_TOTAL: 42000


pos_original: 100%|██████████| 3600/3600 [00:39<00:00, 91.66it/s] 
pos_synth: 100%|██████████| 8400/8400 [01:47<00:00, 78.40it/s] 
neg_bg: 100%|██████████| 12000/12000 [02:23<00:00, 83.51it/s] 
neg_event: 100%|██████████| 18000/18000 [03:43<00:00, 80.44it/s] 


written: 42000 expected: 42000
saved: c:\Dev\sneezing_detection\legacy_code\out\v4\v4_meta.json


In [9]:
mu = (sum_m / max(1, count_tf)).astype(np.float32)
var = (sumsq_m / max(1, count_tf) - (mu.astype(np.float64) ** 2))
var = np.maximum(var, 1e-8).astype(np.float32)
sd  = np.sqrt(var).astype(np.float32)

STATS_PATH = OUT / "v4_norm_stats.npz"
np.savez(STATS_PATH, mu=mu, sd=sd)
print("saved:", STATS_PATH, "mu:", mu.shape, "sd:", sd.shape)


saved: c:\Dev\sneezing_detection\legacy_code\out\v4\v4_norm_stats.npz mu: (64,) sd: (64,)


In [10]:
from sklearn.model_selection import train_test_split

y_all = np.array(np.memmap(str(Y_PATH), dtype="int8", mode="r", shape=(N_TOTAL,)), dtype=np.int64)
idx = np.arange(len(y_all))

idx_train, idx_tmp, y_train, y_tmp = train_test_split(
    idx, y_all, test_size=(1.0 - cfg["splits"]["train"]), random_state=SEED, stratify=y_all
)
val_ratio = cfg["splits"]["val"] / (cfg["splits"]["val"] + cfg["splits"]["test"])
idx_val, idx_test, y_val, y_test = train_test_split(
    idx_tmp, y_tmp, test_size=(1.0 - val_ratio), random_state=SEED, stratify=y_tmp
)

def counts(name, idxs):
    yy = y_all[idxs]
    print(name, "n=", len(idxs), "pos=", int(yy.sum()), "neg=", int((yy==0).sum()))

counts("train", idx_train)
counts("val  ", idx_val)
counts("test ", idx_test)


train n= 29399 pos= 8400 neg= 20999
val   n= 6300 pos= 1800 neg= 4500
test  n= 6301 pos= 1800 neg= 4501


In [11]:
import tensorflow as tf

# memmap read 모드로 재오픈
X_mm = np.memmap(str(X_PATH), dtype="float32", mode="r", shape=(N_TOTAL, FRAMES, N_MELS))
st = np.load(str(STATS_PATH))
mu = st["mu"].astype(np.float32)
sd = st["sd"].astype(np.float32)

BATCH = int(cfg["training"]["batch_size"])

sa = cfg["augment"]["specaugment"]
SA_PROB = float(sa["apply_prob"])
SA_TM = int(sa["time_masks"])
SA_TMAX = int(sa["time_mask_max"])
SA_FM = int(sa["freq_masks"])
SA_FMAX = int(sa["freq_mask_max"])

def specaugment_np(f):
    g = f.copy()
    T, F = g.shape
    for _ in range(SA_TM):
        w = np.random.randint(0, SA_TMAX+1)
        if w > 0 and T - w > 0:
            t0 = np.random.randint(0, T - w)
            g[t0:t0+w, :] = 0.0
    for _ in range(SA_FM):
        w = np.random.randint(0, SA_FMAX+1)
        if w > 0 and F - w > 0:
            f0 = np.random.randint(0, F - w)
            g[:, f0:f0+w] = 0.0
    return g

def batch_generator(idxs, shuffle=True):
    idxs = np.array(idxs, dtype=np.int64)
    n = len(idxs)
    while True:
        if shuffle:
            np.random.shuffle(idxs)
        for s in range(0, n, BATCH):
            b = idxs[s:s+BATCH]
            Xb = np.array(X_mm[b, :, :], dtype=np.float32)
            Xb = (Xb - mu[None, None, :]) / (sd[None, None, :] + 1e-6)

            if np.random.rand() < SA_PROB:
                for i in range(Xb.shape[0]):
                    Xb[i] = specaugment_np(Xb[i])

            Xb = Xb[..., None]
            yb = y_all[b].astype(np.float32)
            yield Xb, yb

def build_model(frames, mels):
    inp = tf.keras.Input(shape=(frames, mels, 1))
    x = tf.keras.layers.Conv2D(16, (3,3), padding="same", activation="relu")(inp)
    x = tf.keras.layers.MaxPool2D((2,2))(x)
    x = tf.keras.layers.Conv2D(32, (3,3), padding="same", activation="relu")(x)
    x = tf.keras.layers.MaxPool2D((2,2))(x)
    x = tf.keras.layers.Conv2D(64, (3,3), padding="same", activation="relu")(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    out = tf.keras.layers.Dense(1, activation="sigmoid")(x)
    return tf.keras.Model(inp, out)

BEST_PATH = OUT / "v4_model_best.keras"
LAST_PATH = OUT / "v4_model_last.keras"

# 재개: last가 있으면 로드, 없으면 새로 생성
if LAST_PATH.exists():
    print("resume from:", LAST_PATH)
    model = tf.keras.models.load_model(str(LAST_PATH))
else:
    model = build_model(FRAMES, N_MELS)

lr = float(cfg["training"]["lr"])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss="binary_crossentropy",
    metrics=[tf.keras.metrics.AUC(name="auc"), tf.keras.metrics.Precision(name="prec"), tf.keras.metrics.Recall(name="rec")]
)

steps_per_epoch = math.ceil(len(idx_train) / BATCH)
val_steps = math.ceil(len(idx_val) / BATCH)

train_gen = batch_generator(idx_train, shuffle=True)
val_gen   = batch_generator(idx_val, shuffle=False)

cbs = [
    tf.keras.callbacks.ModelCheckpoint(str(BEST_PATH), monitor="val_auc", mode="max", save_best_only=True, save_weights_only=False),
    tf.keras.callbacks.ModelCheckpoint(str(LAST_PATH), monitor="val_auc", mode="max", save_best_only=False, save_weights_only=False),
    tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=int(cfg["training"]["early_stopping_patience"]), restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", patience=int(cfg["training"]["reduce_lr_patience"]), factor=float(cfg["training"]["reduce_lr_factor"])),
]

history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=int(cfg["training"]["epochs"]),   # 100 상한, early stopping이 알아서 멈춤
    callbacks=cbs,
    verbose=1
)

print("saved best:", BEST_PATH)
print("saved last:", LAST_PATH)


Epoch 1/100
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 64ms/step - auc: 0.8048 - loss: 0.4655 - prec: 0.7342 - rec: 0.4152 - val_auc: 0.8999 - val_loss: 0.3648 - val_prec: 0.7774 - val_rec: 0.5683 - learning_rate: 0.0010
Epoch 2/100
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 62ms/step - auc: 0.9074 - loss: 0.3413 - prec: 0.7751 - rec: 0.6957 - val_auc: 0.9230 - val_loss: 0.3209 - val_prec: 0.7936 - val_rec: 0.6878 - learning_rate: 0.0010
Epoch 3/100
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 65ms/step - auc: 0.9269 - loss: 0.3063 - prec: 0.8021 - rec: 0.7396 - val_auc: 0.9326 - val_loss: 0.3099 - val_prec: 0.7256 - val_rec: 0.8183 - learning_rate: 0.0010
Epoch 4/100
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 64ms/step - auc: 0.9363 - loss: 0.2867 - prec: 0.8167 - rec: 0.7660 - val_auc: 0.9458 - val_loss: 0.2899 - val_prec: 0.8953 - val_rec: 0.6564 - learning_rate: 0.0010
Epoch 5/100
[1m460/

In [12]:
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve

best_model = tf.keras.models.load_model(str(BEST_PATH))

def predict_on_idxs(idxs):
    probs, ys = [], []
    for s in range(0, len(idxs), BATCH):
        b = np.array(idxs[s:s+BATCH], dtype=np.int64)
        Xb = np.array(X_mm[b, :, :], dtype=np.float32)
        Xb = (Xb - mu[None, None, :]) / (sd[None, None, :] + 1e-6)
        Xb = Xb[..., None]
        pb = best_model.predict(Xb, verbose=0).reshape(-1)
        probs.append(pb)
        ys.append(y_all[b])
    return np.concatenate(probs), np.concatenate(ys)

p_test, y_test2 = predict_on_idxs(idx_test)
thr = 0.5
yhat = (p_test >= thr).astype(int)

print("confusion @0.5:\n", confusion_matrix(y_test2, yhat))
print(classification_report(y_test2, yhat, digits=4))

p_val, y_val2 = predict_on_idxs(idx_val)
prec, rec, th = precision_recall_curve(y_val2, p_val)

target_prec = float(cfg["thresholding"]["target_precision"])
fallback = float(cfg["thresholding"]["fallback_threshold"])

cands = [(t, p, r) for t, p, r in zip(th, prec[:-1], rec[:-1]) if p >= target_prec]
if len(cands) == 0:
    best_t = fallback
else:
    best_t, _, _ = sorted(cands, key=lambda x: x[2], reverse=True)[0]

THR_PATH = OUT / "v4_threshold.txt"
THR_PATH.write_text(f"{best_t}\n", encoding="utf-8")
print("threshold:", best_t, "saved:", THR_PATH)

# TFLite 변환(동적 양자화)
TFLITE_PATH = OUT / "v4_model.tflite"
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
TFLITE_PATH.write_bytes(tflite_model)
print("saved:", TFLITE_PATH, "bytes:", TFLITE_PATH.stat().st_size)


confusion @0.5:
 [[4385  116]
 [ 139 1661]]
              precision    recall  f1-score   support

           0     0.9693    0.9742    0.9717      4501
           1     0.9347    0.9228    0.9287      1800

    accuracy                         0.9595      6301
   macro avg     0.9520    0.9485    0.9502      6301
weighted avg     0.9594    0.9595    0.9595      6301

threshold: 0.9609946 saved: c:\Dev\sneezing_detection\legacy_code\out\v4\v4_threshold.txt
INFO:tensorflow:Assets written to: C:\Users\hmchung\AppData\Local\Temp\tmp3_yjf0gc\assets


INFO:tensorflow:Assets written to: C:\Users\hmchung\AppData\Local\Temp\tmp3_yjf0gc\assets


Saved artifact at 'C:\Users\hmchung\AppData\Local\Temp\tmp3_yjf0gc'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 198, 64, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2299665620944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2299665622672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2299665622096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2299665621520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2299665623056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2299665622864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2297362909712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2297362908944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2297362910672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2297362910288: TensorSpec(shape=(), dtype=tf.resource, name=None)
saved: 