In [14]:
import glob, os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.decomposition import PCA
from itertools import combinations
from sklearn.manifold import MDS

In [15]:
import optuna

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [17]:
src_pattern = r'/workspace/Krrish/Silent_Speech/dataset_sony/Normalized_dataset1/recordings/*/*.csv'
features = ["theta", "x", "y"]
C = len(features)
print("1. Loading & Normalizing PER SUBJECT...")

files = glob.glob(src_pattern)

X, words, subjects = [], [], []
T = 150

for fp in files:
    try:
        df = pd.read_csv(fp)
        data = df[features].values
        #normalization
        scaler = StandardScaler()
        data = scaler.fit_transform(data)
        
        n_chunks = len(data) // T
        if n_chunks == 0:
            continue

        data = data[:n_chunks * T].reshape(n_chunks, T, len(features))
        X.append(data)

        word = os.path.basename(os.path.dirname(fp))
        subj = os.path.basename(fp).replace(".csv", "")

        words.extend([word] * n_chunks)
        subjects.extend([subj] * n_chunks)

    except Exception as e:
        print("Skipping:", fp, e)

X = np.vstack(X)  # (N, T, 5)
print(X.shape)
print(list(set(words)))
print(list(set(subjects)))

1. Loading & Normalizing PER SUBJECT...
(5126, 150, 3)
['Dawai', 'Chah', 'Doctor', 'Hospital', 'Ulti', 'Dhadkan', 'Khangh', 'Bukhar', 'Sardard', 'Dard', 'Chakkar', 'Bhuk', 'Saah', 'Kamjori', 'Jukham', 'Peed', 'Gharde', 'Kabz', 'Khoon', 'Ghabrahat', 'Paani', 'Bahaar', 'Neend', 'Piyaas', 'Peshab']
['anubhavjot', 'Doctor', 'Hospital', 'Armman', 'madhav', 'Anupam', 'prabhdeep', 'liv', 'arushi', 'rudraksh', 'vansh', 'krrish', 'Krrish', 'mainder', 'suresh', 'anum', 'asha', 'gurmann', 'Amish', 'sapna', 'Bansbir', 'harsh', 'Jaskaran', 'KamalPreet', 'manj', 'Surindar', 'maninder', 'aunty']


In [18]:
#label coding 
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(words)
X_tensor = torch.tensor(X, dtype=torch.float32) # this is the data 
y_tensor = torch.tensor(y, dtype=torch.long) # this is the labels words to be predicted

dataset = torch.utils.data.TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(
    dataset,
    batch_size=64, 
    shuffle=True,
    drop_last=True
)

# full_loader = DataLoader(
#     dataset,
#     batch_size=64,
#     shuffle=False
# )
torch.backends.cudnn.enabled = False
torch.autograd.set_detect_anomaly(True)

full_loader = DataLoader( #it is important for batch and each batch contains 150 time frames
    dataset,
    batch_size=16,
    shuffle=True,
    drop_last=True,
    num_workers=0,
    pin_memory=False
)

print("2. Dataset & loaders ready.")

2. Dataset & loaders ready.


In [19]:
class KinematicEncoder(nn.Module): 
    def __init__(self, in_ch=3, emb_dim=96):
        super().__init__()

        self.net = nn.Sequential(
            nn.Conv1d(in_ch, 64, kernel_size=7, padding=3),
            nn.BatchNorm1d(64),
            nn.ReLU(),

            nn.Conv1d(64, 128, kernel_size=5, padding=2),
            nn.BatchNorm1d(128),
            nn.ReLU(),

            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),

            nn.AdaptiveAvgPool1d(1)  # removes timing/context
        )

        self.proj = nn.Linear(256, emb_dim)

    def forward(self, x):
        # x: (B, T, C)
        x = x.permute(0, 2, 1)  # (B, C, T)
        h = self.net(x).squeeze(-1)
        z = self.proj(h)
        z = F.normalize(z, dim=1)  # critical for metric geometry
        return z

In [20]:
class SupConLoss(nn.Module):
    def __init__(self, temperature=0.05):
        super().__init__()
        self.temperature = temperature

    def forward(self, features, labels):
        """
        features: (B, D), normalized
        labels:   (B,)
        """
        device = features.device
        B = features.shape[0]

        sim = torch.matmul(features, features.T) / self.temperature
        sim = sim - sim.max(dim=1, keepdim=True)[0]  # numerical stability

        labels = labels.view(-1, 1)
        mask = torch.eq(labels, labels.T).float().to(device)
        logits_mask = torch.ones_like(mask) - torch.eye(B, device=device)
        mask = mask * logits_mask

        exp_sim = torch.exp(sim) * logits_mask
        log_prob = sim - torch.log(exp_sim.sum(dim=1, keepdim=True) + 1e-8)

        mean_log_prob_pos = (mask * log_prob).sum(dim=1) / (mask.sum(dim=1) + 1e-8)
        loss = -mean_log_prob_pos.mean()

        return loss

In [21]:
def temporal_jitter_torch(x, max_shift=20):
    # x: (T, C)
    shift = torch.randint(-max_shift, max_shift + 1, (1,)).item()
    return torch.roll(x, shifts=shift, dims=0)

def channel_dropout_torch(x, p=0.2):
    # x: (T, C)
    mask = (torch.rand(x.shape[1]) > p).float()
    return x * mask


In [22]:
model = KinematicEncoder(in_ch=C, emb_dim=64).to(device)
criterion = SupConLoss(temperature=0.06)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=4e-4,
    weight_decay=1e-4
)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer,
#     mode="min",
#     factor=0.5,      # reduce LR by half
#     patience=15,     # wait 15 epochs without improvement
#     min_lr=1e-5,
# )


num_epochs = 500

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for x_batch, y_batch in train_loader:
        # keep everything in torch
        x_batch = x_batch.clone()   # VERY IMPORTANT

        for i in range(x_batch.shape[0]):
            x_batch[i] = temporal_jitter_torch(x_batch[i])
            x_batch[i] = channel_dropout_torch(x_batch[i])

        x = x_batch.to(device)
        y = y_batch.to(device)

        z = model(x)
        loss = criterion(z, y)

        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1} | Loss: {total_loss / len(train_loader):.4f}")



Epoch 10 | Loss: 3.7960
Epoch 20 | Loss: 3.8123
Epoch 30 | Loss: 3.7766
Epoch 40 | Loss: 3.7783
Epoch 50 | Loss: 3.7591
Epoch 60 | Loss: 3.7550
Epoch 70 | Loss: 3.7100
Epoch 80 | Loss: 3.7069
Epoch 90 | Loss: 3.7043
Epoch 100 | Loss: 3.6951
Epoch 110 | Loss: 3.6839
Epoch 120 | Loss: 3.6687
Epoch 130 | Loss: 3.6491
Epoch 140 | Loss: 3.6627
Epoch 150 | Loss: 3.6395
Epoch 160 | Loss: 3.6253
Epoch 170 | Loss: 3.5987
Epoch 180 | Loss: 3.6010
Epoch 190 | Loss: 3.6168
Epoch 200 | Loss: 3.5499
Epoch 210 | Loss: 3.5799
Epoch 220 | Loss: 3.5405
Epoch 230 | Loss: 3.5283
Epoch 240 | Loss: 3.5121
Epoch 250 | Loss: 3.5244
Epoch 260 | Loss: 3.5090
Epoch 270 | Loss: 3.5003
Epoch 280 | Loss: 3.4434
Epoch 290 | Loss: 3.4532
Epoch 300 | Loss: 3.4915
Epoch 310 | Loss: 3.3975
Epoch 320 | Loss: 3.4108
Epoch 330 | Loss: 3.4176
Epoch 340 | Loss: 3.3955
Epoch 350 | Loss: 3.3786
Epoch 360 | Loss: 3.3554
Epoch 370 | Loss: 3.3258
Epoch 380 | Loss: 3.3174
Epoch 390 | Loss: 3.3265
Epoch 400 | Loss: 3.3189
Epoch 410

In [23]:
from collections import defaultdict

model.eval()

chunk_embeddings = []
chunk_labels = []

with torch.no_grad():
    for x_batch, y_batch in full_loader:
        x_batch = x_batch.to(device)

        z = model(x_batch)              # (B, D), already normalized
        z = z.cpu().numpy()

        chunk_embeddings.append(z)
        chunk_labels.append(y_batch.numpy())

chunk_embeddings = np.vstack(chunk_embeddings)   # (N_chunks, D)
chunk_labels = np.concatenate(chunk_labels)   

In [24]:
word_to_embs = defaultdict(list)

for z, word_id in zip(chunk_embeddings, chunk_labels):
    word_to_embs[word_id].append(z)

In [25]:
z_word = {}

for word_id, embs in word_to_embs.items():
    embs = np.stack(embs, axis=0)   # (N_chunks_word, D)

    mean_emb = embs.mean(axis=0)

    # CRITICAL: renormalize (stay on unit hypersphere)
    mean_emb = mean_emb / np.linalg.norm(mean_emb)

    z_word[word_id] = mean_emb


In [26]:
z_word_torch = {
    word_id: torch.tensor(vec, dtype=torch.float32)
    for word_id, vec in z_word.items()
}

In [27]:
def attach_z_word(word_id, T):
    """
    Returns z_word repeated across time
    """
    z = z_word_torch[word_id]        # (D,)
    z = z.unsqueeze(0).repeat(T, 1)  # (T, D)
    return z

In [28]:
# 1. Norm check
for w, z in z_word.items():
    assert np.isclose(np.linalg.norm(z), 1.0, atol=1e-3)

# 2. Distinctness check
keys = list(z_word.keys())
sims = []
for i in range(len(keys)):
    for j in range(i + 1, len(keys)):
        sim = np.dot(z_word[keys[i]], z_word[keys[j]])
        sims.append(sim)

print("Mean inter-word similarity:", np.mean(sims))


Mean inter-word similarity: 0.8189812


In [29]:
print("Words:", len(z_word))
print("Embedding dim:", next(iter(z_word.values())).shape)

Words: 25
Embedding dim: (64,)


In [30]:
word_ids = np.array(list(z_word.keys()))             # (N_words,)
embeddings = np.stack(list(z_word.values()), axis=0) # (N_words, D)

np.savez(
    "z_word_embeddings.npz",
    word_ids=word_ids,
    embeddings=embeddings
)