In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"   # hide GPU from TF

import tensorflow as tf

print("GPUs visible:", tf.config.list_physical_devices('GPU'))

GPUs visible: []


In [None]:
import zipfile, os

ZIP_NAME = "Dataset_V2.zip"  # must match the uploaded filename

with zipfile.ZipFile(ZIP_NAME, 'r') as z:
    z.extractall("/content")

print("Extracted folders in /content:")
print([p for p in os.listdir("/content") if "Dataset" in p or "dataset" in p])

Extracted folders in /content:
['Dataset_V2.zip', 'Dataset_V2']


In [None]:
import os, glob, json, random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# ====== EDIT THESE PATHS ======
DATASET_ROOT = "Dataset_V2"   # <-- root containing words (including idle)
OUT_DIR = "/content/out_metric"

# Day-wise split
TRAIN_DAYS = {f"Day_{i}" for i in range(1, 9)}   # Day_1..Day_8
VAL_DAYS   = {"Day_9"}
TEST_DAYS  = {"Day_10"}

# Window length used in your project
T = 512
F = 9  # features after dropping timestamp

# Training
EMBED_DIM = 128
BATCH_TRIPLETS = 64         # number of triplets per batch
STEPS_PER_EPOCH = 200       # triplet batches per epoch
EPOCHS = 25
LR = 1e-3
MARGIN = 0.4                # triplet margin (0.2..0.6 works)

os.makedirs(OUT_DIR, exist_ok=True)
print("OUT_DIR:", OUT_DIR)


OUT_DIR: /content/out_metric


In [None]:
def load_csv_robust(fp, expected_cols=10):
    with open(fp, "rb") as f:
        raw = f.read().replace(b"\x00", b"")
    text = raw.decode("utf-8", errors="ignore")

    good_rows = []
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue
        while line.endswith(","):
            line = line[:-1].strip()
        parts = [p.strip() for p in line.split(",")]
        if len(parts) != expected_cols:
            continue
        if any(p == "" for p in parts):
            continue
        try:
            good_rows.append([float(p) for p in parts])
        except:
            continue

    if not good_rows:
        raise ValueError(f"No valid numeric rows in {fp}")
    return np.array(good_rows, dtype=np.float32)

def moving_average(x, w=25):
    w = max(1, int(w))
    kernel = np.ones(w, dtype=np.float32) / w
    return np.convolve(x, kernel, mode="same")

def fix_length_center(X, target_len):
    if len(X) >= target_len:
        start = (len(X) - target_len) // 2
        return X[start:start + target_len]
    pad = np.zeros((target_len - len(X), X.shape[1]), dtype=X.dtype)
    return np.vstack([X, pad])

def emg_dc_remove(X):
    X = X.copy()
    X[:, :3] -= X[:, :3].mean(axis=0, keepdims=True)
    return X

def crop_active_region_emg(X, target_len=512, smooth_w=25, thresh_ratio=0.25):
    Traw = X.shape[0]
    if Traw == 0:
        return np.zeros((target_len, X.shape[1]), dtype=np.float32)

    energy = np.sum(np.abs(X[:, :3]), axis=1)
    energy_s = moving_average(energy, w=smooth_w)

    mx = float(np.max(energy_s))
    if mx <= 1e-6:
        return fix_length_center(X, target_len)

    thresh = thresh_ratio * mx
    active = np.where(energy_s >= thresh)[0]
    if len(active) < 5:
        return fix_length_center(X, target_len)

    start = int(active[0])
    end   = int(active[-1])
    center = (start + end) // 2

    half = target_len // 2
    win_start = max(0, center - half)
    win_end = win_start + target_len
    if win_end > Traw:
        win_end = Traw
        win_start = max(0, win_end - target_len)

    cropped = X[win_start:win_end]
    if cropped.shape[0] < target_len:
        pad = np.zeros((target_len - cropped.shape[0], X.shape[1]), dtype=cropped.dtype)
        cropped = np.vstack([cropped, pad])
    return cropped

def load_one_sample(path):
    arr = load_csv_robust(path, expected_cols=10)  # (Traw, 10)
    X = arr[:, 1:]                                # drop timestamp -> (Traw, 9)
    X = emg_dc_remove(X)
    X = crop_active_region_emg(X, target_len=T, smooth_w=25, thresh_ratio=0.25)
    return X


In [None]:
def list_word_dirs(root):
    return sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))])

def build_label_map(root):
    labels = list_word_dirs(root)
    return {lbl: i for i, lbl in enumerate(labels)}

def collect_files_with_meta(root):
    label2id = build_label_map(root)
    items = []  # (filepath, label_id, day_name)

    for label, lab_id in label2id.items():
        class_dir = os.path.join(root, label)
        files = sorted(glob.glob(os.path.join(class_dir, "**", "*.txt"), recursive=True))
        for fp in files:
            day_name = os.path.basename(os.path.dirname(fp))  # Day_1 folder
            items.append((fp, lab_id, day_name, label))
    return items, label2id

items, label2id = collect_files_with_meta(DATASET_ROOT)
id2label = {v:k for k,v in label2id.items()}

print("Classes:", len(label2id))
print("Example labels:", list(label2id.keys())[:10])
print("Total files:", len(items))

days_present = sorted(set([d for _,_,d,_ in items]))
print("Days found:", days_present)


Classes: 11
Example labels: ['ada', 'awidinawa', 'boru', 'hawasa', 'hodai', 'idle', 'irida', 'narakai', 'pata', 'saduda']
Total files: 1100
Days found: ['Day_1', 'Day_10', 'Day_2', 'Day_3', 'Day_4', 'Day_5', 'Day_6', 'Day_7', 'Day_8', 'Day_9']


In [None]:
train_items = [it for it in items if it[2] in TRAIN_DAYS]
val_items   = [it for it in items if it[2] in VAL_DAYS]
test_items  = [it for it in items if it[2] in TEST_DAYS]

print("Train:", len(train_items), "Val:", len(val_items), "Test:", len(test_items))

# Fit scaler using train data only (streaming)
scaler = StandardScaler()

# We’ll sample some train files to fit quickly (or fit all if small)
FIT_MAX = min(800, len(train_items))  # you can increase to fit all
fit_subset = random.sample(train_items, FIT_MAX) if len(train_items) > FIT_MAX else train_items

all_rows = []
for fp, y, day, label in fit_subset:
    try:
        X = load_one_sample(fp)  # (512,9)
        all_rows.append(X)
    except:
        pass

X_fit = np.concatenate(all_rows, axis=0)  # (N*512,9)
scaler.fit(X_fit)

scaler_params = {"mean": scaler.mean_.tolist(), "scale": scaler.scale_.tolist()}
os.makedirs(OUT_DIR, exist_ok=True)
with open(os.path.join(OUT_DIR, "scaler_params.json"), "w") as f:
    json.dump(scaler_params, f, indent=2)

with open(os.path.join(OUT_DIR, "label_map.json"), "w") as f:
    json.dump(label2id, f, indent=2)

print("Saved scaler_params.json and label_map.json to", OUT_DIR)

def normalize(X):
    return (X - scaler.mean_) / (scaler.scale_ + 1e-6)


Train: 881 Val: 110 Test: 109
Saved scaler_params.json and label_map.json to /content/out_metric


In [None]:
from tensorflow.keras import layers, models

def build_encoder(T=512, F=9, embed_dim=128):
    inp = layers.Input(shape=(T, F))

    x = layers.Conv1D(64, 5, padding="same")(inp)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool1D(2)(x)

    x = layers.Conv1D(128, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPool1D(2)(x)

    x = layers.Dropout(0.3)(x)
    x = layers.LSTM(128, implementation=2)(x)  # ✅ CPU-safe
    x = layers.Dropout(0.4)(x)

    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dropout(0.2)(x)

    # Embedding
    emb = layers.Dense(embed_dim)(x)

    # ✅ FIX: wrap TF op in Lambda
    emb = layers.Lambda(
        lambda t: tf.nn.l2_normalize(t, axis=-1),
        name="l2_norm"
    )(emb)

    return models.Model(inp, emb, name="cnn_lstm_encoder")


In [None]:
from collections import defaultdict

# Create label -> list of filepaths for TRAIN
train_by_label = defaultdict(list)
for fp, y, day, label in train_items:
    train_by_label[y].append(fp)

# Filter labels with >=2 samples (needed for anchor+positive)
valid_labels = [y for y, fps in train_by_label.items() if len(fps) >= 2]
print("Valid labels for triplets:", len(valid_labels), "/", len(label2id))

def sample_triplet():
    # Choose anchor label
    a_lab = random.choice(valid_labels)
    pos_list = train_by_label[a_lab]
    a_fp, p_fp = random.sample(pos_list, 2)

    # Choose negative label != a_lab
    n_lab = random.choice(valid_labels)
    while n_lab == a_lab:
        n_lab = random.choice(valid_labels)
    n_fp = random.choice(train_by_label[n_lab])

    # Load + preprocess + normalize
    A = normalize(load_one_sample(a_fp)).astype(np.float32)
    P = normalize(load_one_sample(p_fp)).astype(np.float32)
    N = normalize(load_one_sample(n_fp)).astype(np.float32)
    return A, P, N

def triplet_batch(batch_size=BATCH_TRIPLETS):
    A_list, P_list, N_list = [], [], []
    for _ in range(batch_size):
        try:
            A, P, N = sample_triplet()
            A_list.append(A); P_list.append(P); N_list.append(N)
        except:
            # if a file fails, just resample
            continue
    A = np.stack(A_list, axis=0)
    P = np.stack(P_list, axis=0)
    N = np.stack(N_list, axis=0)
    return A, P, N

# Quick sanity
A,P,N = triplet_batch(8)
print(A.shape, P.shape, N.shape)


Valid labels for triplets: 11 / 11
(8, 512, 9) (8, 512, 9) (8, 512, 9)


In [None]:
# Build encoder first
encoder = build_encoder(T=T, F=F, embed_dim=EMBED_DIM)

# Quick sanity check
print("Encoder built:", type(encoder))
encoder.summary()


Encoder built: <class 'keras.src.models.functional.Functional'>


In [None]:
optimizer = tf.keras.optimizers.Adam(LR)

@tf.function
def train_step(encoder, A, P, N):
    with tf.GradientTape() as tape:
        eA = encoder(A, training=True)
        eP = encoder(P, training=True)
        eN = encoder(N, training=True)

        d_ap = tf.reduce_sum(tf.square(eA - eP), axis=1)
        d_an = tf.reduce_sum(tf.square(eA - eN), axis=1)
        loss = tf.reduce_mean(tf.nn.relu(d_ap - d_an + MARGIN))

    grads = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(grads, encoder.trainable_variables))
    return loss

for epoch in range(1, EPOCHS + 1):
    losses = []
    for step in range(STEPS_PER_EPOCH):
        A, P, N = triplet_batch(BATCH_TRIPLETS)
        loss = train_step(encoder,
                          tf.constant(A, dtype=tf.float32),
                          tf.constant(P, dtype=tf.float32),
                          tf.constant(N, dtype=tf.float32))
        losses.append(float(loss.numpy()))
    print(f"Epoch {epoch}/{EPOCHS}  loss={np.mean(losses):.4f}")


Epoch 1/25  loss=0.2390
Epoch 2/25  loss=0.1450
Epoch 3/25  loss=0.1113
Epoch 4/25  loss=0.0731
Epoch 5/25  loss=0.0422
Epoch 6/25  loss=0.0382
Epoch 7/25  loss=0.0209
Epoch 8/25  loss=0.0361
Epoch 9/25  loss=0.0230
Epoch 10/25  loss=0.0189
Epoch 11/25  loss=0.0165
Epoch 12/25  loss=0.0120
Epoch 13/25  loss=0.0078
Epoch 14/25  loss=0.0137
Epoch 15/25  loss=0.0064
Epoch 16/25  loss=0.0013
Epoch 17/25  loss=0.0019
Epoch 18/25  loss=0.0125
Epoch 19/25  loss=0.0109
Epoch 20/25  loss=0.0093
Epoch 21/25  loss=0.0050
Epoch 22/25  loss=0.0012
Epoch 23/25  loss=0.0014
Epoch 24/25  loss=0.0019
Epoch 25/25  loss=0.0018


In [None]:
encoder.save_weights("/content/encoder_triplet.weights.h5")
print("Saved:", "/content/encoder_triplet.weights.h5")


Saved: /content/encoder_triplet.weights.h5


In [None]:
encoder.save("/content/encoder_triplet.keras")


In [None]:
encoder.export("/content/encoder_savedmodel_cpu")
!zip -r encoder_savedmodel_cpu.zip /content/encoder_savedmodel_cpu


Saved artifact at '/content/encoder_savedmodel_cpu'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 512, 9), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 128), dtype=tf.float32, name=None)
Captures:
  135036371382544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371382160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371384464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371382352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371381392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371385424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371385616: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371386192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371384080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135036371384656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13

In [None]:
ENC_DIR = os.path.join(OUT_DIR, "encoder_savedmodel")
encoder.export(ENC_DIR)
print("Saved encoder to:", ENC_DIR)

!zip -r /content/encoder_savedmodel.zip {ENC_DIR}
print("Zipped -> /content/encoder_savedmodel.zip")


Saved artifact at '/content/out_metric/encoder_savedmodel'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 512, 9), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 128), dtype=tf.float32, name=None)
Captures:
  136598910087440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910086864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910089168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910087248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910087056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910090128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910090320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910090512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910088400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136598910090704: TensorSpec(shape=(), dtype=tf.resource, name=Non

In [None]:
import zipfile, os

ZIP_PATH = "/content/FewShot.zip"
OUT_DIR  = "/content"

with zipfile.ZipFile(ZIP_PATH, "r") as z:
    z.extractall(OUT_DIR)

print("✅ Extracted. Top-level folders now:")
print([p for p in os.listdir("/content") if os.path.isdir("/content/"+p)])


✅ Extracted. Top-level folders now:
['.config', 'encoder_savedmodel_cpu', 'FewShot', '__MACOSX', 'Dataset_V2', 'out_metric', 'sample_data']


In [None]:
import os, glob, json
import numpy as np
import tensorflow as tf

# =====================
# PATHS (from your screenshot)
# =====================
ENCODER_DIR  = "/content/encoder_savedmodel_cpu"
SCALER_PATH  = "/content/out_metric/scaler_params.json"
FEWSHOT_ROOT = "/content/FewShot"
OUT_DB       = "/content/fewshot_db.json"

T = 512
F = 9

# =====================
# Load scaler params
# =====================
with open(SCALER_PATH) as f:
    sc = json.load(f)
mean  = np.array(sc["mean"], dtype=np.float32)
scale = np.array(sc["scale"], dtype=np.float32)

def normalize(X):
    return (X - mean) / (scale + 1e-6)

# =====================
# Same robust loader + crop used in training
# =====================
def load_csv_robust(fp, expected_cols=10):
    with open(fp, "rb") as f:
        raw = f.read().replace(b"\x00", b"")
    text = raw.decode("utf-8", errors="ignore")

    good_rows = []
    for line in text.splitlines():
        line = line.strip()
        if not line:
            continue
        while line.endswith(","):
            line = line[:-1].strip()
        parts = [p.strip() for p in line.split(",")]
        if len(parts) != expected_cols:
            continue
        if any(p == "" for p in parts):
            continue
        try:
            good_rows.append([float(p) for p in parts])
        except:
            continue

    if not good_rows:
        raise ValueError(f"No valid numeric rows in {fp}")
    return np.array(good_rows, dtype=np.float32)

def moving_average(x, w=25):
    w = max(1, int(w))
    kernel = np.ones(w, dtype=np.float32) / w
    return np.convolve(x, kernel, mode="same")

def fix_length_center(X, target_len):
    if len(X) >= target_len:
        start = (len(X) - target_len) // 2
        return X[start:start + target_len]
    pad = np.zeros((target_len - len(X), X.shape[1]), dtype=X.dtype)
    return np.vstack([X, pad])

def emg_dc_remove(X):
    X = X.copy()
    X[:, :3] -= X[:, :3].mean(axis=0, keepdims=True)
    return X

def crop_active_region_emg(X, target_len=512, smooth_w=25, thresh_ratio=0.25):
    Traw = X.shape[0]
    if Traw == 0:
        return np.zeros((target_len, X.shape[1]), dtype=np.float32)

    energy = np.sum(np.abs(X[:, :3]), axis=1)
    energy_s = moving_average(energy, w=smooth_w)

    mx = float(np.max(energy_s))
    if mx <= 1e-6:
        return fix_length_center(X, target_len)

    thresh = thresh_ratio * mx
    active = np.where(energy_s >= thresh)[0]
    if len(active) < 5:
        return fix_length_center(X, target_len)

    start = int(active[0])
    end   = int(active[-1])
    center = (start + end) // 2

    half = target_len // 2
    win_start = max(0, center - half)
    win_end = win_start + target_len
    if win_end > Traw:
        win_end = Traw
        win_start = max(0, win_end - target_len)

    cropped = X[win_start:win_end]
    if cropped.shape[0] < target_len:
        pad = np.zeros((target_len - cropped.shape[0], X.shape[1]), dtype=cropped.dtype)
        cropped = np.vstack([cropped, pad])
    return cropped

def load_one_sample(fp):
    arr = load_csv_robust(fp, expected_cols=10)  # (Traw,10)
    X = arr[:, 1:]                               # drop timestamp -> (Traw,9)
    X = emg_dc_remove(X)
    X = crop_active_region_emg(X, target_len=T)
    X = normalize(X)
    return X.astype(np.float32)

# =====================
# Load encoder SavedModel signature
# =====================
loaded = tf.saved_model.load(ENCODER_DIR)
infer = loaded.signatures["serving_default"]

IN_KEY  = list(infer.structured_input_signature[1].keys())[0]
OUT_KEY = list(infer.structured_outputs.keys())[0]

print("Encoder signature:")
print("  IN_KEY :", IN_KEY)
print("  OUT_KEY:", OUT_KEY)

def embed(X):  # X: (T,9)
    Xb = X[np.newaxis, ...].astype(np.float32)
    out = infer(**{IN_KEY: tf.constant(Xb)})
    emb = out[OUT_KEY].numpy()[0]
    emb = emb / (np.linalg.norm(emb) + 1e-9)
    return emb.astype(np.float32)

# =====================
# Build prototypes
# =====================
proto = {}
stats = {}

words = sorted([d for d in os.listdir(FEWSHOT_ROOT) if os.path.isdir(os.path.join(FEWSHOT_ROOT, d))])
print("Found few-shot word folders:", words)

for w in words:
    files = sorted(glob.glob(os.path.join(FEWSHOT_ROOT, w, "*.txt")))
    if not files:
        print("⚠️ No .txt files in", w)
        continue

    embs = []
    for fp in files:
        try:
            X = load_one_sample(fp)
            embs.append(embed(X))
        except Exception as e:
            print("[SKIP]", fp, "->", e)

    if not embs:
        print("⚠️ No valid embeddings for", w)
        continue

    E = np.stack(embs, axis=0)
    p = E.mean(axis=0)
    p = p / (np.linalg.norm(p) + 1e-9)

    proto[w] = p.tolist()
    stats[w] = {"n": int(len(embs))}

print("✅ Prototypes built:", stats)

with open(OUT_DB, "w") as f:
    json.dump({"prototypes": proto, "stats": stats}, f, indent=2)

print("Saved:", OUT_DB)


Encoder signature:
  IN_KEY : keras_tensor
  OUT_KEY: output_0
Found few-shot word folders: ['amma', 'nil']
✅ Prototypes built: {'amma': {'n': 10}, 'nil': {'n': 10}}
Saved: /content/fewshot_db.json


In [None]:
import json, numpy as np
db = json.load(open("/content/fewshot_db.json"))
P = {k: np.array(v, dtype=np.float32) for k,v in db["prototypes"].items()}
keys = list(P.keys())
for i in range(len(keys)):
    for j in range(i+1, len(keys)):
        a,b = keys[i], keys[j]
        cos = float(np.dot(P[a], P[b]) / (np.linalg.norm(P[a])*np.linalg.norm(P[b]) + 1e-9))
        print(a, "vs", b, "cosine=", round(cos, 4))


amma vs nil cosine= 0.4669
