In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler

train_df = pd.read_csv('./datasets/mitbih_dataset/mitbih_train.csv', header=None)
test_df = pd.read_csv('./datasets/mitbih_dataset/mitbih_test.csv', header=None)

X_train = train_df.iloc[:, :187].values
y_train = train_df.iloc[:, 187].values
X_test = test_df.iloc[:, :187].values
y_test = test_df.iloc[:, 187].values

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 1024

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:


class BasicBlock1D(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock1D, self).__init__()
        self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm1d(planes)
        self.conv2 = nn.Conv1d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet1D(nn.Module):
    def __init__(self, block, layers, num_classes=5):
        super(ResNet1D, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0], stride=1)  # Adjusted stride here
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride))  # Pass three arguments here
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))  # Adjusted to pass three arguments

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

def ResNet18_1D():
    return ResNet1D(BasicBlock1D, [2, 2, 2, 2])

dummy_input = torch.randn(10, 1, 187)  # (batch_size, num_channels, seq_length)
model = ResNet18_1D()
output = model(dummy_input)
print(output.shape)  

lr = 0.001
num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet18_1D().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    with tqdm(total=len(train_loader), desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch') as pbar:
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)  # Ajout d'une dimension pour le canal

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            pbar.set_postfix(loss=running_loss/(i+1))
            pbar.update(1)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

model.eval()
correct = 0
total = 0
with torch.no_grad():
    with tqdm(total=len(test_loader), desc='Evaluating', unit='batch') as pbar:
        for inputs, labels in test_loader:
            inputs, labels = inputs.unsqueeze(1).to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            pbar.update(1)

print(f"Accuracy: {100 * correct / total}%")


torch.Size([10, 5])


  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Epoch 1/10: 100%|██████████| 86/86 [00:19<00:00,  4.49batch/s, loss=0.16]


Epoch 1/10, Loss: 0.15960755710338437


Epoch 2/10: 100%|██████████| 86/86 [00:17<00:00,  4.85batch/s, loss=0.0556]


Epoch 2/10, Loss: 0.05561016807549222


Epoch 3/10: 100%|██████████| 86/86 [00:16<00:00,  5.08batch/s, loss=0.0424]


Epoch 3/10, Loss: 0.04239204471800909


Epoch 4/10: 100%|██████████| 86/86 [00:16<00:00,  5.29batch/s, loss=0.0327]


Epoch 4/10, Loss: 0.032742354953878144


Epoch 5/10: 100%|██████████| 86/86 [00:16<00:00,  5.35batch/s, loss=0.0263]


Epoch 5/10, Loss: 0.0263311994305357


Epoch 6/10: 100%|██████████| 86/86 [00:16<00:00,  5.36batch/s, loss=0.0251]


Epoch 6/10, Loss: 0.025096762048210517


Epoch 7/10: 100%|██████████| 86/86 [00:16<00:00,  5.32batch/s, loss=0.0203]


Epoch 7/10, Loss: 0.0202687902614301


Epoch 8/10: 100%|██████████| 86/86 [00:16<00:00,  5.18batch/s, loss=0.0179]


Epoch 8/10, Loss: 0.01785750674668613


Epoch 9/10: 100%|██████████| 86/86 [00:16<00:00,  5.30batch/s, loss=0.0161]


Epoch 9/10, Loss: 0.01610669531577895


Epoch 10/10: 100%|██████████| 86/86 [00:16<00:00,  5.35batch/s, loss=0.014]


Epoch 10/10, Loss: 0.01403709922289086


Evaluating: 100%|██████████| 22/22 [00:00<00:00, 22.91batch/s]

Accuracy: 98.60222912479445%





In [None]:
import os
import sys
import torch
from torch.utils.data import TensorDataset, DataLoader
import subprocess
import numpy as np
import pandas as pd
import glob
from collections import OrderedDict
import random

if not os.path.isdir('CNNWordReco'):
    subprocess.call(['git', 'clone', 'https://github.com/saztorralba/CNNWordReco'])
if 'CNNWordReco' not in sys.path:
    sys.path.append('CNNWordReco')
from utils.cnn_func import load_data, train_model, validate_model, test_model
from test_wordreco import show_matrix

# Arguments
args = {
    'cv_percentage': 0.1,
    'xsize': 20,
    'ysize': 20,
    'num_blocks': 10,
    'channels': 32,
    'dropout': 0.3,
    'embedding_size': 128,
    'epochs': 20,
    'batch_size': 32,
    'learning_rate': 0.001,
    'seed': 0,
    'device': 'cpu',
    'verbose': 1,
    'augment': False,
    'vocab': OrderedDict({'ZERO': 0, 'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5, 'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9})
}

# Initialise the random seeds
random.seed(args['seed'])
torch.manual_seed(args['seed'])
torch.cuda.manual_seed(args['seed'])
torch.backends.cudnn.deterministic = True

# Read data and store in dataframe
wavfiles = glob.glob('./datasets/recordings/*.wav')
speakers = [file.split('/')[-1].split('_')[1] for file in wavfiles]
words = [list(args['vocab'].keys())[int(file.split('/')[-1].split('_')[0])] for file in wavfiles]
rec_number = [int(file.split('/')[-1].split('_')[2].split('.')[0]) for file in wavfiles]
data = pd.DataFrame({'wavfile': wavfiles, 'speaker': speakers, 'word': words, 'rec_number': rec_number})

# Perform training as defined in https://github.com/Jakobovski/free-spoken-digit-dataset/
# Recordings [5-49] for training and recordings [0-4] for testing
print('Training model with recordings [5-49] from all speakers')
# Load data
train_data = data.loc[data['rec_number'] >= 5].reset_index(drop=True)
test_data = data.loc[data['rec_number'] < 5].reset_index(drop=True)
trainset, validset, trainlabels, validlabels = load_data(train_data, True, **args)
args['mean'] = torch.mean(trainset.float())
args['std'] = torch.std(trainset.float())

# Convert the pandas data to PyTorch tensors
X_train = torch.tensor(trainset, dtype=torch.float32)
y_train = torch.tensor(trainlabels, dtype=torch.long)

# Reshape the input tensor to the shape expected by conv1d: (batch_size, channels, width)
X_train = X_train.view(X_train.size(0), 1, -1)

# Create Tensor datasets
train_dataset = TensorDataset(X_train, y_train)

# Define batch size
batch_size = 512  # Choose the batch size you want to use

# Create DataLoaders
train_loader_voice = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Print the shape of a sample
sample_batch = next(iter(train_loader_voice))
print("Shape of input batch:", sample_batch[0].shape)
print("Shape of target batch:", sample_batch[1].shape)


Training model with recordings [5-49] from all speakers
Shape of input batch: torch.Size([512, 1, 400])
Shape of target batch: torch.Size([512])


In [None]:
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Fonction pour extraire les caractéristiques
def extract_features(model, data_loader):
    features = []
    labels_list = []
    model.eval()
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)

            outputs = model(inputs)
            features.append(outputs)
            labels_list.append(labels)

    features = torch.cat(features)
    labels_list = torch.cat(labels_list)
    return features, labels_list

# Extract features and labels from test data
test_features, test_labels = extract_features(model, train_loader_voice)

# Calculate cosine similarity
distances = cosine_similarity(test_features.detach().cpu(), test_features.detach().cpu())
np.fill_diagonal(distances, float('-inf'))

# Fonction pour trouver les indices des éléments les plus similaires
def find_max_indices(arr, k):
    if k > len(arr):
        raise ValueError("k cannot be greater than the size of the array")
    idx = np.argpartition(arr, -k)[-k:]
    sorted_idx = np.argsort(arr[idx])[::-1]
    return idx[sorted_idx]

# Fonction pour calculer la précision top-k
def compute_top_k_accuracy(distances, test_labels, k):
    correct = 0
    for id_, elem in enumerate(distances):
        indices = find_max_indices(elem, k)
        candidates = [test_labels[idx].item() for idx in indices]
        if test_labels[id_].item() in candidates:
            correct += 1
    accuracy = correct / len(distances)
    return accuracy

# Compute top-k accuracy
real_top_accuracies = []
for k in range(1, 6):
    accuracy = compute_top_k_accuracy(distances, test_labels, k)
    real_top_accuracies.append(accuracy)
    print(f"Top-{k} : {accuracy:.4f}")


Top-1 : 0.2407
Top-2 : 0.3835
Top-3 : 0.4782
Top-4 : 0.5584
Top-5 : 0.6189
