# **1. 라이브러리, 기본 설정**

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import input_file_name, regexp_extract, when, col

import numpy as np
import tensorflow as tf

In [0]:
WATCH_REAL_DATA_TABLE = "team2_databricks.default.bronze_fall_detection_raw_memberA"

print("✅ WATCH_REAL_DATA_TABLE :", WATCH_REAL_DATA_TABLE)

✅ WATCH_REAL_DATA_TABLE : team2_databricks.default.bronze_fall_detection_raw_memberA


In [0]:
df_real_all = spark.table(WATCH_REAL_DATA_TABLE)

# **2. 파일 경로 정의**

In [0]:
# =========================================================
# 2. 파일 경로 정의
# =========================================================

# ---------------------------------------------------------
# 기존 real_data (과거 수집 데이터, batch)
# ---------------------------------------------------------
BASE_REAL_DATA_PATH = "/Volumes/team2_databricks/data_set/real_data/"

# ---------------------------------------------------------
# 워치 → Azure Function → Databricks (Fall-only, streaming)
# ---------------------------------------------------------
WATCH_REAL_DATA_PATH = "/Volumes/team2_databricks/data_set/real_data_watch/"

# ---------------------------------------------------------
# Auto Loader (WATCH 전용)
# ---------------------------------------------------------
WATCH_CHECKPOINT_PATH = "dbfs:/checkpoints/real_data_watch_bronze"
WATCH_SCHEMA_PATH     = "dbfs:/schemas/real_data_watch_bronze"

print("✅ BASE_REAL_DATA_PATH :", BASE_REAL_DATA_PATH)
print("✅ WATCH_REAL_DATA_PATH:", WATCH_REAL_DATA_PATH)
print("✅ WATCH_CHECKPOINT_PATH:", WATCH_CHECKPOINT_PATH)
print("✅ WATCH_SCHEMA_PATH    :", WATCH_SCHEMA_PATH)

✅ BASE_REAL_DATA_PATH : /Volumes/team2_databricks/data_set/real_data/
✅ WATCH_REAL_DATA_PATH: /Volumes/team2_databricks/data_set/real_data_watch/
✅ WATCH_CHECKPOINT_PATH: dbfs:/checkpoints/real_data_watch_bronze
✅ WATCH_SCHEMA_PATH    : dbfs:/schemas/real_data_watch_bronze


# **3. 실축 데이터 로드**

In [0]:
# =========================================================
# 3-1. 기존 Training Data 로드 (Batch)
# =========================================================

from pyspark.sql.functions import col, lit
from pyspark.sql.types import TimestampType

df_train_base = (
    spark.read
    .option("header", "true")
    .csv("/Volumes/team2_databricks/data_set/real_data/")
)

print("✅ Base training rows:", df_train_base.count())
display(df_train_base.limit(5))

# ---------------------------------------------------------
# 스키마 정규화
# - timestamp : dummy 생성
# - subject   : dummy 생성 (기존 데이터에는 없음)
# ---------------------------------------------------------

df_train_base = (
    df_train_base
    .withColumn(
        "timestamp",
        lit("1970-01-01 00:00:00").cast(TimestampType())
    )
    .withColumn(
        "subject",
        lit("train_base")   # 🔥 핵심 수정
    )
    .select(
        "timestamp",
        col("ACC_X").alias("acc_x"),
        col("ACC_Y").alias("acc_y"),
        col("ACC_Z").alias("acc_z"),
        col("GYRO_X").alias("gyro_x"),
        col("GYRO_Y").alias("gyro_y"),
        col("GYRO_Z").alias("gyro_z"),
        col("Label").alias("label"),
        "subject"
    )
)

df_train_base.printSchema()


✅ Base training rows: 257408


Index,ACC_X,ACC_Y,ACC_Z,GYRO_X,GYRO_Y,GYRO_Z,SVM,Label,Window_ID
1.0,-9.485827445983888,-4.910507678985596,0.0191536135971546,0.1698205322027206,-0.1954768747091293,0.0122173046693205,10.68149684553956,0,Normal_걷기_0
2.0,-9.351752281188965,-4.72376012802124,-0.0502782352268695,0.6206390857696533,-0.2333505302667617,-0.8881980776786804,10.477199452942743,0,Normal_걷기_0
3.0,-9.27034854888916,-4.417302131652832,-0.0287304203957319,1.099557399749756,-0.5290092825889587,-1.7666223049163818,10.26901873487687,0,Normal_걷기_0
4.0,-9.17936897277832,-4.2760443687438965,-0.1077390760183334,1.23394775390625,-0.6230825781822205,-2.046398639678955,10.127041912146558,0,Normal_걷기_0
5.0,-9.047688484191896,-4.02465295791626,-0.1077390760183334,1.1508700847625732,-0.7073819637298584,-2.118480682373047,9.90303519367391,0,Normal_걷기_0


root
 |-- timestamp: timestamp (nullable = true)
 |-- acc_x: string (nullable = true)
 |-- acc_y: string (nullable = true)
 |-- acc_z: string (nullable = true)
 |-- gyro_x: string (nullable = true)
 |-- gyro_y: string (nullable = true)
 |-- gyro_z: string (nullable = true)
 |-- label: string (nullable = true)
 |-- subject: string (nullable = false)



In [0]:
# =========================================================
# 3-2. 신규 실측 데이터 로드 (Bronze → 정규화)
# =========================================================

from pyspark.sql.functions import col, from_json, explode
from pyspark.sql.types import (
    StructType, StructField,
    StringType, DoubleType, ArrayType
)

# Bronze 로드
df_bronze = spark.read.table("bronze_fall_detection_raw")

# device / sensor 스키마
device_schema = StructType([
    StructField("deviceId", StringType()),
])

sensor_schema = ArrayType(
    StructType([
        StructField("ax", DoubleType()),
        StructField("ay", DoubleType()),
        StructField("az", DoubleType()),
        StructField("gx", DoubleType()),
        StructField("gy", DoubleType()),
        StructField("gz", DoubleType()),
    ])
)

# JSON 파싱 + explode
df_new = (
    df_bronze
    .withColumn("device_json", from_json(col("device"), device_schema))
    .withColumn("sensor_array", from_json(col("sensor"), sensor_schema))
    .withColumn("sensor_row", explode(col("sensor_array")))
    .select(
        col("timestamp"),

        col("sensor_row.ax").alias("acc_x"),
        col("sensor_row.ay").alias("acc_y"),
        col("sensor_row.az").alias("acc_z"),

        col("sensor_row.gx").alias("gyro_x"),
        col("sensor_row.gy").alias("gyro_y"),
        col("sensor_row.gz").alias("gyro_z"),

        col("type").alias("label"),              # ⚠️ 필요 시 0/1 매핑
        col("device_json.deviceId").alias("subject")
    )
)

print("✅ New watch rows:", df_new.count())
df_new.printSchema()


✅ New watch rows: 15
root
 |-- timestamp: string (nullable = true)
 |-- acc_x: double (nullable = true)
 |-- acc_y: double (nullable = true)
 |-- acc_z: double (nullable = true)
 |-- gyro_x: double (nullable = true)
 |-- gyro_y: double (nullable = true)
 |-- gyro_z: double (nullable = true)
 |-- label: string (nullable = true)
 |-- subject: string (nullable = true)



In [0]:
from pyspark.sql.functions import when

df_new = df_new.withColumn(
    "label",
    when(col("label") == "auto_reported", 1).otherwise(0)
)

In [0]:
# =========================================================
# 3-4. 기존 Training + 신규 실측 데이터 통합
# =========================================================

common_cols = [
    "timestamp",
    "acc_x", "acc_y", "acc_z",
    "gyro_x", "gyro_y", "gyro_z",
    "label", "subject"
]

df_raw = (
    df_train_base.select(common_cols)
    .unionByName(df_new.select(common_cols))
)

print("🔥 Total training rows:", df_raw.count())
display(df_raw.limit(5))

🔥 Total training rows: 257423


timestamp,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,label,subject
1970-01-01T00:00:00Z,-9.485827445983888,-4.910507678985596,0.0191536135971546,0.1698205322027206,-0.1954768747091293,0.0122173046693205,0,train_base
1970-01-01T00:00:00Z,-9.351752281188965,-4.72376012802124,-0.0502782352268695,0.6206390857696533,-0.2333505302667617,-0.8881980776786804,0,train_base
1970-01-01T00:00:00Z,-9.27034854888916,-4.417302131652832,-0.0287304203957319,1.099557399749756,-0.5290092825889587,-1.7666223049163818,0,train_base
1970-01-01T00:00:00Z,-9.17936897277832,-4.2760443687438965,-0.1077390760183334,1.23394775390625,-0.6230825781822205,-2.046398639678955,0,train_base
1970-01-01T00:00:00Z,-9.047688484191896,-4.02465295791626,-0.1077390760183334,1.1508700847625732,-0.7073819637298584,-2.118480682373047,0,train_base


# **4. 실축 데이터 칼럼 정렬**

In [0]:
# =========================================================
# 4. 실측 데이터 칼럼 정렬 (Raw → Window 입력)
# =========================================================

FEATURE_COLS = [
    "acc_x", "acc_y", "acc_z",
    "gyro_x", "gyro_y", "gyro_z"
]

REQUIRED_COLS = ["timestamp", "label", "subject"] + FEATURE_COLS

missing = set(REQUIRED_COLS) - set(df_raw.columns)
if missing:
    raise RuntimeError(f"❌ df_raw 컬럼 누락: {missing}")

df_real_all = (
    df_raw
    .select(
        "timestamp",
        "label",
        "subject",
        *FEATURE_COLS
    )
)

print("✅ 4단계 컬럼 정렬 완료")
df_real_all.printSchema()
display(df_real_all.limit(5))


✅ 4단계 컬럼 정렬 완료
root
 |-- timestamp: timestamp (nullable = true)
 |-- label: long (nullable = true)
 |-- subject: string (nullable = true)
 |-- acc_x: double (nullable = true)
 |-- acc_y: double (nullable = true)
 |-- acc_z: double (nullable = true)
 |-- gyro_x: double (nullable = true)
 |-- gyro_y: double (nullable = true)
 |-- gyro_z: double (nullable = true)



timestamp,label,subject,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z
1970-01-01T00:00:00Z,0,train_base,-9.485827445983888,-4.910507678985596,0.0191536135971546,0.1698205322027206,-0.1954768747091293,0.0122173046693205
1970-01-01T00:00:00Z,0,train_base,-9.351752281188965,-4.72376012802124,-0.0502782352268695,0.6206390857696533,-0.2333505302667617,-0.8881980776786804
1970-01-01T00:00:00Z,0,train_base,-9.27034854888916,-4.417302131652832,-0.0287304203957319,1.099557399749756,-0.5290092825889587,-1.7666223049163818
1970-01-01T00:00:00Z,0,train_base,-9.17936897277832,-4.2760443687438965,-0.1077390760183334,1.23394775390625,-0.6230825781822205,-2.046398639678955
1970-01-01T00:00:00Z,0,train_base,-9.047688484191896,-4.02465295791626,-0.1077390760183334,1.1508700847625732,-0.7073819637298584,-2.118480682373047


# **5. Window 데이터 생성**

In [0]:
# =========================================================
# 5. Window 데이터 생성 (Raw → Sequence)
# =========================================================

import numpy as np

WINDOW_SIZE = 128
STRIDE = 64

FEATURE_COLS = [
    "acc_x", "acc_y", "acc_z",
    "gyro_x", "gyro_y", "gyro_z"
]

# ---------------------------------------------------------
# Spark → Pandas
# ---------------------------------------------------------
df_real = (
    df_real_all
    .orderBy("subject", "timestamp")
    .toPandas()
)

required_cols = FEATURE_COLS + ["label", "subject", "timestamp"]
missing = set(required_cols) - set(df_real.columns)
if missing:
    raise RuntimeError(f"❌ Missing columns: {missing}")

X_list, y_list, window_ids, subjects = [], [], [], []

# ---------------------------------------------------------
# subject 단위 시계열 → Sliding Window
# ---------------------------------------------------------
for subject, g in df_real.groupby("subject"):
    g = g.sort_values("timestamp").reset_index(drop=True)

    if len(g) < WINDOW_SIZE:
        continue

    for start in range(0, len(g) - WINDOW_SIZE + 1, STRIDE):
        w = g.iloc[start:start + WINDOW_SIZE]

        # (WINDOW_SIZE, FEATURE_DIM)
        X_list.append(
            w[FEATURE_COLS].values
        )

        # label: window 내 다수결 또는 첫 값
        y_list.append(
            int(w["label"].mode()[0])
        )

        window_ids.append(
            f"{subject}_{start}"
        )
        subjects.append(subject)

# ---------------------------------------------------------
# numpy 변환
# ---------------------------------------------------------
X_all = np.array(X_list)      # (N, 128, 6)
y_all = np.array(y_list)

window_ids = np.array(window_ids)
subjects = np.array(subjects)

print("✅ X_all shape :", X_all.shape)
print("✅ y_all shape :", y_all.shape)
print("🔥 Fall ratio  :", y_all.mean())


✅ X_all shape : (4021, 128, 6)
✅ y_all shape : (4021,)
🔥 Fall ratio  : 0.574732653568764


# **6. Train / Validation 데이터 분리**

In [0]:
# =========================================================
# 6. Train / Validation 데이터 분리 (Time-based)
# =========================================================

import numpy as np

# ---------------------------------------------------------
# 입력 데이터
# ---------------------------------------------------------
X = X_all          # shape: (N, 128, 6)
y = y_all          # shape: (N,)

N = len(X)

if N == 0:
    raise RuntimeError("❌ No window data found. Check step 5.")

# ---------------------------------------------------------
# 시간 기반 Split 비율
# ---------------------------------------------------------
TRAIN_RATIO = 0.8
split_idx = int(N * TRAIN_RATIO)

# ---------------------------------------------------------
# Split (과거 → Train / 미래 → Validation)
# ---------------------------------------------------------
X_train = X[:split_idx]
y_train = y[:split_idx]

X_val = X[split_idx:]
y_val = y[split_idx:]

# ---------------------------------------------------------
# 결과 확인
# ---------------------------------------------------------
print("📊 Time-based Split Summary")
print(" - Total windows :", N)
print(" - Train windows :", len(X_train))
print(" - Val windows   :", len(X_val))

print("Train:", X_train.shape, y_train.shape)
print("Val  :", X_val.shape, y_val.shape)

print("🔥 Train fall ratio:", y_train.mean())
print("🔥 Val   fall ratio:", y_val.mean())

📊 Time-based Split Summary
 - Total windows : 4021
 - Train windows : 3216
 - Val windows   : 805
Train: (3216, 128, 6) (3216,)
Val  : (805, 128, 6) (805,)
🔥 Train fall ratio: 0.46828358208955223
🔥 Val   fall ratio: 1.0


In [0]:
# =========================================================
# AUTO-GUARD (자동화 안전 장치)
#   - 반드시 split 이후에 실행
# =========================================================

# ---------------------------------------------------------
# Guard 1️⃣ 최소 데이터 수
# ---------------------------------------------------------
MIN_TRAIN_SAMPLES = 300   # 필요 시 조정

if X_train.shape[0] < MIN_TRAIN_SAMPLES:
    dbutils.notebook.exit(
        f"SKIP: not enough training data "
        f"(X_train={X_train.shape[0]})"
    )

# ---------------------------------------------------------
# Guard 2️⃣ Fall 비율 상한
# ---------------------------------------------------------
fall_ratio = y_train.mean()
MAX_FALL_RATIO = 0.7

print(f"🔎 Fall ratio (train): {fall_ratio:.3f}")

if fall_ratio > MAX_FALL_RATIO:
    dbutils.notebook.exit(
        f"SKIP: fall ratio too high ({fall_ratio:.2f})"
    )

print("✅ AUTO-GUARD passed, continue training")

🔎 Fall ratio (train): 0.468
✅ AUTO-GUARD passed, continue training


# **7. 모델 정의**

In [0]:
import tensorflow as tf

WINDOW_SIZE = X_train.shape[1]      # 64
FEATURE_DIM = X_train.shape[2]      # 6 or 7

print("Model input shape:", (WINDOW_SIZE, FEATURE_DIM))

def create_model(batch_size=None):
    model = tf.keras.Sequential([
        # ✅ 시계열 입력
        tf.keras.layers.InputLayer(
            batch_input_shape=(batch_size, WINDOW_SIZE, FEATURE_DIM)
        ),

        # ✅ Conv1D: 국소 패턴 (충격, peak)
        tf.keras.layers.Conv1D(
            filters=32,
            kernel_size=3,
            padding="same",
            activation="relu"
        ),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Dropout(0.2),

        # ✅ LSTM: 시간 흐름
        tf.keras.layers.LSTM(64),
        tf.keras.layers.Dropout(0.3),

        # ✅ 분류기
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ])
    return model

train_model = create_model(batch_size=None)

Model input shape: (128, 6)


2025-12-22 02:31:51.350100: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


# **8. 학습 전략 설정 (Class Weight & EarlyStopping)**

In [0]:
# =========================================================
# 8. 모델 학습 (Class Weight + EarlyStopping)
# =========================================================

import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

# ---------------------------------------------------------
# 1️⃣ Optimizer
# ---------------------------------------------------------
optimizer = Adam(learning_rate=1e-3)

# ---------------------------------------------------------
# 2️⃣ 클래스 가중치 (Train 기준)
# ---------------------------------------------------------
classes = np.unique(y_train)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=y_train
)

class_weight = dict(zip(classes, class_weights))
print("Class weights:", class_weight)

# ---------------------------------------------------------
# 3️⃣ Compile (⚠️ train_model 사용)
# ---------------------------------------------------------
train_model.compile(
    optimizer=optimizer,
    loss="binary_crossentropy",
    metrics=[
        tf.keras.metrics.Recall(name="recall"),
        tf.keras.metrics.Precision(name="precision"),
        "accuracy",
    ]
)

# ---------------------------------------------------------
# 4️⃣ EarlyStopping
# ---------------------------------------------------------
early_stop = EarlyStopping(
    monitor="val_recall",
    mode="max",
    patience=5,
    restore_best_weights=True,
    verbose=1
)

# ---------------------------------------------------------
# 5️⃣ Fit
# ---------------------------------------------------------
history = train_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    class_weight=class_weight,
    callbacks=[early_stop],
    verbose=1
)

print("✅ Training finished")


Class weights: {np.int64(0): np.float64(0.9403508771929825), np.int64(1): np.float64(1.0677290836653386)}


Epoch 1/50
[1m 1/51[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:07[0m 4s/step - accuracy: 0.4531 - loss: 0.7127 - precision: 0.4576 - recall: 0.9000[1m 3/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 40ms/step - accuracy: 0.4575 - loss: 0.7046 - precision: 0.4552 - recall: 0.9208[1m 5/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 36ms/step - accuracy: 0.4718 - loss: 0.6983 - precision: 0.4620 - recall: 0.9105[1m 7/51[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 35ms/step - accuracy: 0.4935 - loss: 0.6920 - precision: 0.4761 - recall: 0.8966



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 60ms/step - accuracy: 0.6813 - loss: 0.5833 - precision: 0.6306 - recall: 0.8419 - val_accuracy: 0.9801 - val_loss: 0.1174 - val_precision: 1.0000 - val_recall: 0.9801
Epoch 2/50
[1m 1/51[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 69ms/step - accuracy: 0.8750 - loss: 0.2961 - precision: 0.7941 - recall: 0.9643[1m 3/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 32ms/step - accuracy: 0.8819 - loss: 0.2797 - precision: 0.8164 - recall: 0.9707[1m 5/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 35ms/step - accuracy: 0.8971 - loss: 0.2486 - precision: 0.8378 - recall: 0.9754

Uploading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Training finished


# **9. 모델 학습**

In [0]:
# =========================================================
# 9. 모델 학습 (Real-only, Class Weight 적용) - 최종
# =========================================================

import tensorflow as tf
tf.keras.backend.clear_session()

# ---------------------------------------------------------
# 1️⃣ 학습 실행
# ---------------------------------------------------------
history = train_model.fit(
    X_train,                # ✅ (N, 64, FEATURE_DIM)
    y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    class_weight=class_weight,
    callbacks=[early_stop],
    verbose=1
)

print("✅ real_data_model training completed")

Epoch 1/50
[1m 1/51[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m39s[0m 783ms/step - accuracy: 0.9219 - loss: 0.1979 - precision: 0.8750 - recall: 1.0000[1m 3/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 49ms/step - accuracy: 0.9444 - loss: 0.1616 - precision: 0.9048 - recall: 1.0000  [1m 5/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 42ms/step - accuracy: 0.9443 - loss: 0.1694 - precision: 0.9029 - recall: 1.0000[1m 7/51[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 38ms/step - accuracy: 0.9446 - loss: 0.1719 - precision: 0.9035 - recall: 0.9987



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - accuracy: 0.9062 - loss: 0.2532 - precision: 0.8481 - recall: 0.9787 - val_accuracy: 0.9925 - val_loss: 0.2386 - val_precision: 1.0000 - val_recall: 0.9925
Epoch 2/50
[1m 1/51[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 51ms/step - accuracy: 0.8906 - loss: 0.2521 - precision: 0.8056 - recall: 1.0000[1m 3/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 27ms/step - accuracy: 0.8733 - loss: 0.2746 - precision: 0.7810 - recall: 1.0000[1m 5/51[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 27ms/step - accuracy: 0.8602 - loss: 0.2922 - precision: 0.7620 - recall: 0.9986

Uploading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

✅ real_data_model training completed


# **10. real_data_model 모델 저장**

In [0]:
# =========================================================
# 10. 모델 저장 (MLflow + TFLite) – Real-only (Wear OS FINAL)
# =========================================================

import os
import mlflow
import mlflow.keras
import tensorflow as tf
from mlflow.models.signature import infer_signature

# ---------------------------------------------------------
# 10-1️⃣ MLflow 저장 (학습 모델 기준)
# ---------------------------------------------------------
signature = infer_signature(
    X_train[:1],                       # (1, 64, FEATURE_DIM)
    train_model.predict(X_train[:1])
)

with mlflow.start_run(run_name="real_data_model_wearos_final"):
    mlflow.keras.log_model(
        train_model,
        artifact_path="model",
        signature=signature,
        registered_model_name="real_data_model"
    )

    # 메타데이터
    mlflow.log_param("dataset", "real_only")
    mlflow.log_param("window_size", WINDOW_SIZE)
    mlflow.log_param("stride", STRIDE)
    mlflow.log_param("feature_dim", FEATURE_DIM)
    mlflow.log_param("feature", "raw_sequence")
    mlflow.log_param("split", "event_based")
    mlflow.log_param("target", "fall_detection")
    mlflow.log_param("wear_os_verified", True)

print("✅ Model logged to MLflow (real_data_model)")

# ---------------------------------------------------------
# 10-2️⃣ Keras → TFLite 변환 (🔥 Wear OS 안전 고정)
# ---------------------------------------------------------

# 🔥 변환 전용 모델 (batch_size = 1)
convert_model = create_model(batch_size=1)
convert_model.set_weights(train_model.get_weights())

converter = tf.lite.TFLiteConverter.from_keras_model(convert_model)

# Wear OS 안정성 설정
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS
]
converter.optimizations = []              # 최적화 비활성화
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32

tflite_model = converter.convert()

# ---------------------------------------------------------
# 10-3️⃣ 파일 저장 (Version + Latest)
# ---------------------------------------------------------
BASE_DIR = "/dbfs/FileStore/tflite"
os.makedirs(BASE_DIR, exist_ok=True)

VERSIONED_NAME = "real_data_model_win64_seq_wearos.tflite"
LATEST_NAME    = "latest_model.tflite"

with open(f"{BASE_DIR}/{VERSIONED_NAME}", "wb") as f:
    f.write(tflite_model)

with open(f"{BASE_DIR}/{LATEST_NAME}", "wb") as f:
    f.write(tflite_model)

print("✅ Versioned model saved :", VERSIONED_NAME)
print("✅ Latest model updated  :", LATEST_NAME)

# ---------------------------------------------------------
# 10-4️⃣ 다운로드 URL
# ---------------------------------------------------------
print(
    "📥 Stable Download URL:\n"
    "https://adb-2608286707928203.3.azuredatabricks.net"
    "/files/tflite/latest_model.tflite"
)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step


🔗 View Logged Model at: https://adb-2608286707928203.3.azuredatabricks.net/ml/experiments/4041749224166180/models/m-e4f8f5a323114636b7e23a630d472ad9?o=2608286707928203
Registered model 'real_data_model' already exists. Creating a new version of this model...


Uploading artifacts:   0%|          | 0/10 [00:00<?, ?it/s]

🔗 Created version '14' of model 'team2_databricks.default.real_data_model': https://adb-2608286707928203.3.azuredatabricks.net/explore/data/models/team2_databricks/default/real_data_model/version/14?o=2608286707928203


✅ Model logged to MLflow (real_data_model)
INFO:tensorflow:Assets written to: /tmp/tmpp81vqszg/assets


INFO:tensorflow:Assets written to: /tmp/tmpp81vqszg/assets


Saved artifact at '/tmp/tmpp81vqszg'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(1, 128, 6), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(1, 1), dtype=tf.float32, name=None)
Captures:
  140290441442512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441431568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441435600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441433296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441444624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441436368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441437520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441438288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441437904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441441936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140290441437328: TensorSp

W0000 00:00:1766370848.833626    1301 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1766370848.833929    1301 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-12-22 02:34:08.835421: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpp81vqszg
2025-12-22 02:34:08.836311: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-12-22 02:34:08.836324: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpp81vqszg
I0000 00:00:1766370848.846205    1301 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled
2025-12-22 02:34:08.847599: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-12-22 02:34:08.911704: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpp81vqszg
2025-12-22 02:34:08.926454: I tensorflow/cc/saved_model/loader.cc:471] SavedModel 

✅ Versioned model saved : real_data_model_win64_seq_wearos.tflite
✅ Latest model updated  : latest_model.tflite
📥 Stable Download URL:
https://adb-2608286707928203.3.azuredatabricks.net/files/tflite/latest_model.tflite


In [0]:
# =========================================================
# 10-5️⃣ TFLite Wear OS 호환성 검증
# =========================================================

import tensorflow as tf

interpreter = tf.lite.Interpreter(
    model_path="/dbfs/FileStore/tflite/latest_model.tflite"
)
interpreter.allocate_tensors()

print("🔍 Ops used in TFLite:")
bad = False
for d in interpreter._get_ops_details():
    print(" -", d["op_name"], d.get("version", "N/A"))
    if d.get("version") == 12:
        bad = True

if bad:
    raise RuntimeError("❌ Wear OS incompatible TFLite (FULLY_CONNECTED v12 detected)")
else:
    print("✅ Wear OS SAFE: no FC v12 detected")


🔍 Ops used in TFLite:
 - RESHAPE N/A
 - CONV_2D N/A
 - RESHAPE N/A
 - ADD N/A
 - MUL N/A
 - ADD N/A
 - RESHAPE N/A
 - MAX_POOL_2D N/A
 - RESHAPE N/A
 - WHILE N/A
 - STRIDED_SLICE N/A
 - FULLY_CONNECTED N/A
 - FULLY_CONNECTED N/A
 - LOGISTIC N/A
 - DELEGATE N/A
 - DELEGATE N/A
✅ Wear OS SAFE: no FC v12 detected


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [0]:
import numpy as np
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

# ---------------------------------------------------------
# 1️⃣ 예측
# ---------------------------------------------------------
y_prob = train_model.predict(X_val).ravel()

THRESHOLD = 0.5  # 필요 시 조정
y_pred = (y_prob >= THRESHOLD).astype(int)

# ---------------------------------------------------------
# 2️⃣ 주요 지표
# ---------------------------------------------------------
acc  = accuracy_score(y_val, y_pred)
prec = precision_score(y_val, y_pred, zero_division=0)
rec  = recall_score(y_val, y_pred)
f1   = f1_score(y_val, y_pred)

print("📊 Validation Metrics")
print(f" - Accuracy : {acc:.3f}")
print(f" - Precision: {prec:.3f}")
print(f" - Recall   : {rec:.3f}")
print(f" - F1-score : {f1:.3f}")

# ---------------------------------------------------------
# 3️⃣ Confusion Matrix
# ---------------------------------------------------------
cm = confusion_matrix(y_val, y_pred)

print("\n🧮 Confusion Matrix [TN FP / FN TP]")
print(cm)

[1m 1/26[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 61ms/step[1m 7/26[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step [1m14/26[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 8ms/step[1m19/26[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 9ms/step[1m24/26[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 9ms/step[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
📊 Validation Metrics
 - Accuracy : 0.993
 - Precision: 1.000
 - Recall   : 0.993
 - F1-score : 0.996

🧮 Confusion Matrix [TN FP / FN TP]
[[  0   0]
 [  6 799]]


In [0]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler = StandardScaler()
df_real[FEATURE_COLS] = scaler.fit_transform(df_real[FEATURE_COLS])


In [0]:
print("MEAN:", scaler.mean_)
print("SCALE:", scaler.scale_)

MEAN: [-6.22699913 -4.2384822   1.46259864 -0.00677336 -0.32078875 -0.02531408]
SCALE: [11.43841041 14.48611489 12.02212232  4.36645276  3.03000227  3.54862473]
