In [2]:
# -*- coding: utf-8 -*-
import os, re, datetime, io
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau, ModelCheckpoint

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

# =========================
# パス設定（必要に応じて変更）
# =========================
# あなたのリポジトリ構成に合わせた既定値
PROCESS_DIR = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\process"

# 使いたい軌跡サブフォルダ（例：正規化図のみ使うなら *_nor を指定）
# 例) ["tail_set_nor", "left_paw_nor", "right_paw_nor"]
# 例) 生の軌跡なら ["tail_set", "left_paw", "right_paw"]
SRC_SUBDIRS = [
    "left_paw_nor",
    "right_paw_nor",
    # "left_tarsal_nor",
    # "right_tarsal_nor",
]

# 出力先
MODEL_DIR = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\models_cnn_traj"
CURVE_DIR = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\output\learning_curve_traj"
EVAL_DIR  = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\output\learning_curve_traj"

# ★ 学習ログの保存先（任意のパスを指定）
LOG_DIR   = r"C:\kanno\vscode\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\data\output\logs_traj"

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(CURVE_DIR, exist_ok=True)
os.makedirs(EVAL_DIR,  exist_ok=True)
os.makedirs(LOG_DIR,   exist_ok=True)

# =========================
# ハイパパラメータ
# =========================
IMG_SIZE = (300, 300)
BATCH_SIZE = 16
EPOCHS = 200
VAL_RATIO = 0.15
LEARNING_RATE = 5e-6
AUGMENT = True  # 軽いDataAugを入れる場合は True

# =========================
# クラスとラベル推定
# =========================
# 3クラス分類：one, two, normal
CLASSES = ["one", "two", "normal"]
NAME2IDX = {c:i for i,c in enumerate(CLASSES)}

def infer_label_from_path(p: Path):
    """
    ファイル名と親フォルダ名をトークン化して判定
    - 'one' -> one, 'two' -> two, 'normal' -> normal
    """
    def tokens(s):
        return [t for t in re.split(r"[^a-z0-9]+", s.lower()) if t]

    toks = set(tokens(p.stem)) | set(tokens(p.name)) | set(tokens(p.parent.name))
    if "one" in toks:    return NAME2IDX["one"]
    if "two" in toks:    return NAME2IDX["two"]
    if "normal" in toks: return NAME2IDX["normal"]
    return None  # 判定不能

# =========================
# 画像リスト収集（サブフォルダ別カウントも返す）
# =========================
def collect_images(process_dir: str, subdirs=None, exts=(".png", ".jpg", ".jpeg")):
    proc = Path(process_dir)
    paths, labels = [], []
    per_dir_counts = {}  # {subdir_name: count}

    subdirs = list(subdirs) if subdirs else []
    if subdirs:
        cand_dirs = [proc / sd for sd in subdirs]
    else:
        cand_dirs = [d for d in proc.iterdir() if d.is_dir()]

    for d in cand_dirs:
        if not d.exists():
            print(f"[WARN] ディレクトリが見つかりません: {d}")
            continue
        before = len(paths)
        for p in d.rglob("*"):
            if p.is_file() and p.suffix.lower() in exts:
                y = infer_label_from_path(p)
                if y is None:
                    print(f"[SKIP] ラベル不明: {p}")
                    continue
                paths.append(str(p))
                labels.append(y)
        per_dir_counts[d.name] = len(paths) - before

    if not paths:
        raise FileNotFoundError("画像が見つかりませんでした。SRC_SUBDIRS とファイル名ルール（one/two/normal）を確認してください。")

    df = pd.DataFrame({"path": paths, "y": labels})
    return df, per_dir_counts

df_all, per_dir_counts = collect_images(PROCESS_DIR, SRC_SUBDIRS)
print("[INFO] 収集画像数:", len(df_all))
print(df_all["y"].value_counts().rename(index={i:c for i,c in enumerate(CLASSES)}))

# =========================
# Stratified Train/Val 分割
# =========================
sss = StratifiedShuffleSplit(n_splits=1, test_size=VAL_RATIO, random_state=42)
train_idx, val_idx = next(sss.split(df_all["path"], df_all["y"]))
df_tr = df_all.iloc[train_idx].reset_index(drop=True)
df_va = df_all.iloc[val_idx].reset_index(drop=True)
print(f"[INFO] split -> train: {len(df_tr)}, val: {len(df_va)}")

# =========================
# tf.data パイプライン
# =========================
AUTOTUNE = tf.data.AUTOTUNE

def decode_img(path):
    img_bytes = tf.io.read_file(path)
    img = tf.io.decode_image(img_bytes, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.clip_by_value(img/255.0, 0.0, 1.0)
    return img

def aug(img):
    if AUGMENT:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, max_delta=0.05)
        img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
    return img

def make_ds(paths, labels, training=True, batch_size=BATCH_SIZE):
    ds_paths = tf.data.Dataset.from_tensor_slices((paths, labels))
    def _load(path, y):
        img = decode_img(path)
        if training:
            img = aug(img)
        y_onehot = tf.one_hot(y, depth=len(CLASSES))
        return img, y_onehot
    ds = ds_paths.map(_load, num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.shuffle(buffer_size=min(2000, len(paths)))
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

train_ds = make_ds(df_tr["path"].values, df_tr["y"].values, training=True)
val_ds   = make_ds(df_va["path"].values, df_va["y"].values, training=False)

# =========================
# class_weight（不均衡対策）
# =========================
cls_w = compute_class_weight(
    class_weight="balanced",
    classes=np.array(list(range(len(CLASSES)))),
    y=df_tr["y"].values
)
class_weight = {i: float(cls_w[i]) for i in range(len(CLASSES))}
print("[INFO] class_weight:", class_weight)

# =========================
# CNN モデル（提示構成ベース）
# =========================
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(CLASSES), activation='softmax')
])
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# =========================
#  走行ID & 保存パス
# =========================
RUN_ID = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
model_path = os.path.join(MODEL_DIR, f"traj_cnn_{RUN_ID}.h5")
curve_path = os.path.join(CURVE_DIR, f"traj_learning_curve_{RUN_ID}.png")
pred_csv   = os.path.join(EVAL_DIR,  f"traj_val_predictions_{RUN_ID}.csv")
cm_png     = os.path.join(EVAL_DIR,  f"traj_cm_{RUN_ID}.png")
hist_csv   = os.path.join(LOG_DIR,   f"history_{RUN_ID}.csv")     # 追加: CSV履歴
log_txt    = os.path.join(LOG_DIR,   f"train_run_{RUN_ID}.txt")   # 追加: テキストログ

# =========================
#  コールバック
# =========================
callbacks = [
    ReduceLROnPlateau(monitor="val_loss", mode="min", factor=0.5, patience=5, min_lr=1e-6, verbose=1),
    ModelCheckpoint(model_path, monitor="val_accuracy", mode="max", save_best_only=True, verbose=1),
    CSVLogger(hist_csv, append=False)  # 追加: エポック毎のloss/accをCSV保存
]

# =========================
#  学習
# =========================
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    class_weight=class_weight,
    verbose=1,
    callbacks=callbacks
)

print(f"✅ モデル保存: {model_path}")

# =========================
# 学習曲線保存
# =========================
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title("Loss"); plt.legend(); plt.grid(True)

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title("Accuracy"); plt.legend(); plt.grid(True)

plt.tight_layout()
plt.savefig(curve_path, dpi=150)
plt.close()
print(f"✅ 学習曲線保存: {curve_path}")

# =========================
#  検証データでの詳細評価（CSV & 混同行列）
# =========================
val_paths  = df_va["path"].values
val_labels = df_va["y"].values
y_true, y_prob, y_pred = [], [], []

for start in range(0, len(val_paths), BATCH_SIZE):
    batch_paths = val_paths[start:start+BATCH_SIZE]
    batch_imgs = np.stack([
        tf.image.resize(
            tf.io.decode_image(tf.io.read_file(p), channels=3, expand_animations=False),
            IMG_SIZE
        ).numpy()/255.0
        for p in batch_paths
    ], axis=0)
    probs = model.predict(batch_imgs, verbose=0)
    preds = probs.argmax(axis=1)
    y_prob.append(probs)
    y_pred.append(preds)
    y_true.append(val_labels[start:start+BATCH_SIZE])

y_true = np.concatenate(y_true)
y_pred = np.concatenate(y_pred)
y_prob = np.concatenate(y_prob)

print("\n[Validation] classification_report")
report_txt = classification_report(y_true, y_pred, target_names=CLASSES, digits=4)
print(report_txt)

# 予測CSV
df_pred = pd.DataFrame({
    "path": val_paths,
    "true": [CLASSES[i] for i in y_true],
    "pred": [CLASSES[i] for i in y_pred],
    **{f"p_{c}": y_prob[:, i] for i, c in enumerate(CLASSES)}
})
df_pred.to_csv(pred_csv, index=False, encoding="utf-8-sig")
print(f"✅ 予測CSV保存: {pred_csv}")

# 混同行列PNG
cm = confusion_matrix(y_true, y_pred, labels=list(range(len(CLASSES))))
fig, ax = plt.subplots(figsize=(6,6))
im = ax.imshow(cm.astype(np.float32) / max(cm.sum(),1), cmap="Blues")
ax.figure.colorbar(im, ax=ax)
ax.set_xticks(list(range(len(CLASSES)))); ax.set_yticks(list(range(len(CLASSES))))
ax.set_xticklabels(CLASSES); ax.set_yticklabels(CLASSES)
ax.set_xlabel("Predicted"); ax.set_ylabel("True")
ax.set_title("Confusion Matrix (Validation)")
for i in range(len(CLASSES)):
    for j in range(len(CLASSES)):
        ax.text(j, i, int(cm[i,j]),
                ha="center",
                color="white" if cm[i,j] > cm.max()/2 else "black",
                fontsize=14)
plt.tight_layout()
plt.savefig(cm_png, dpi=150); plt.close()
print(f"✅ 混同行列保存: {cm_png}")

# =========================
#  テキストログ作成（★追加機能）
# =========================
# モデルsummaryを文字列で取得
string_buf = io.StringIO()
model.summary(print_fn=lambda s: string_buf.write(s + "\n"))
model_summary_str = string_buf.getvalue()

# クラス別カウント
def counts_named(series):
    vc = series.value_counts().sort_index()
    return {CLASSES[i]: int(vc.get(i, 0)) for i in range(len(CLASSES))}

counts_all = counts_named(df_all["y"])
counts_tr  = counts_named(df_tr["y"])
counts_va  = counts_named(df_va["y"])

# エポック履歴を書き出し
def epoch_lines(hist: dict) -> str:
    n = len(hist["loss"])
    lines = ["epoch,loss,accuracy,val_loss,val_accuracy"]
    for i in range(n):
        lines.append(f"{i+1},{hist['loss'][i]:.6f},{hist['accuracy'][i]:.6f},{hist['val_loss'][i]:.6f},{hist['val_accuracy'][i]:.6f}")
    return "\n".join(lines)

with open(log_txt, "w", encoding="utf-8") as f:
    f.write(f"# CNN training log (3-class)  RUN_ID={RUN_ID}\n\n")
    f.write("## Hyperparameters\n")
    f.write(f"IMG_SIZE       : {IMG_SIZE}\n")
    f.write(f"BATCH_SIZE     : {BATCH_SIZE}\n")
    f.write(f"EPOCHS         : {EPOCHS}\n")
    f.write(f"VAL_RATIO      : {VAL_RATIO}\n")
    f.write(f"LEARNING_RATE  : {LEARNING_RATE}\n")
    f.write(f"AUGMENT        : {AUGMENT}\n")
    f.write(f"CLASSES        : {CLASSES}\n")
    f.write(f"SRC_SUBDIRS    : {SRC_SUBDIRS}\n")
    f.write(f"class_weight   : {class_weight}\n\n")

    f.write("## Dataset summary\n")
    f.write(f"Total images   : {len(df_all)}\n")
    f.write(f"By class (all) : {counts_all}\n")
    f.write(f"By class (train): {counts_tr}\n")
    f.write(f"By class (val)  : {counts_va}\n")
    f.write(f"By subdir (all): {per_dir_counts}\n\n")

    f.write("## Paths\n")
    f.write(f"MODEL_DIR: {MODEL_DIR}\nCURVE_DIR: {CURVE_DIR}\nEVAL_DIR : {EVAL_DIR}\nLOG_DIR  : {LOG_DIR}\n\n")
    f.write(f"Saved model path     : {model_path}\n")
    f.write(f"Learning curve path  : {curve_path}\n")
    f.write(f"Predictions CSV path : {pred_csv}\n")
    f.write(f"Confusion matrix PNG : {cm_png}\n")
    f.write(f"History CSV path     : {hist_csv}\n\n")

    f.write("## Model summary\n")
    f.write(model_summary_str + "\n")

    f.write("## Epoch history (train/val)\n")
    f.write(epoch_lines(history.history) + "\n\n")

    f.write("## Validation report\n")
    f.write(report_txt + "\n")

    f.write("## Confusion matrix (raw counts)\n")
    f.write(pd.DataFrame(cm, index=CLASSES, columns=CLASSES).to_string() + "\n")

print(f"✅ テキストログ保存: {log_txt}")


[INFO] 収集画像数: 2422
y
normal    1494
one        526
two        402
Name: count, dtype: int64
[INFO] split -> train: 2058, val: 364
[INFO] class_weight: {0: 1.534675615212528, 1: 2.0058479532163744, 2: 0.5405831363278172}
Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


KeyboardInterrupt: 