In [5]:
from datasets import load_dataset

cv_17 = load_dataset("mozilla-foundation/common_voice_17_0", "hi", split="train",token="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",trust_remote_code=True)

Reading metadata...: 4689it [00:00, 156306.36it/s]les/s]
Generating train split: 4689 examples [00:01, 3916.58 examples/s]
Reading metadata...: 2428it [00:00, 161873.25it/s]examples/s]
Generating validation split: 2428 examples [00:00, 4005.61 examples/s]
Reading metadata...: 3154it [00:00, 153752.15it/s]es/s]
Generating test split: 3154 examples [00:00, 3939.10 examples/s]
Reading metadata...: 4044it [00:00, 161790.24it/s]les/s]
Generating other split: 4044 examples [00:01, 3982.20 examples/s]
Reading metadata...: 775it [00:00, 149603.53it/s]? examples/s]
Generating invalidated split: 775 examples [00:00, 3829.90 examples/s]
Reading metadata...: 10329it [00:00, 166596.04it/s]amples/s]
Generating validated split: 10329 examples [00:02, 4394.12 examples/s]


In [1]:
import torch.nn as nn
import torch.nn.functional as F
import sounddevice as sd
import soundfile as sf
import numpy as np
from tqdm import tqdm
import librosa
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

In [2]:
DURATION  =10
SAMPLE_RATE = 44100
N_MFCC = 1
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [13]:
def extract_mfcc(audio_path, sr=SAMPLE_RATE, n_mfcc=N_MFCC):

    y, sr = librosa.load(audio_path, sr=sr)
    
    target_length = int(DURATION * sr)
    if len(y) < target_length:
        y = np.pad(y, (0, target_length - len(y)), 'constant')
    else:
        y = y[:target_length]
    
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    
    features = np.concatenate([mfccs, delta_mfccs, delta2_mfccs])
    
    features = features.T
    
    return features


In [12]:
class SpeakerRecognitionModel(nn.Module):
    def __init__(self, input_dim=N_MFCC*3, hidden_dim=128, num_classes=2):
        super(SpeakerRecognitionModel, self).__init__()
        
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.3)
        
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        self.dropout2 = nn.Dropout(0.3)
        
        self.lstm = nn.LSTM(128, hidden_dim, batch_first=True, bidirectional=True)
        
        self.fc = nn.Linear(hidden_dim * 2, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        
        x = x.permute(0, 2, 1)
        
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.dropout1(x)
        
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.dropout2(x)
        
        x = x.permute(0, 2, 1)
        
        x, (hidden, _) = self.lstm(x)
        
        hidden = hidden.view(2, batch_size, -1) 
        hidden = hidden.permute(1, 0, 2)  
        hidden = hidden.contiguous().view(batch_size, -1)
        
        x = self.fc(hidden)
        
        return x


In [None]:
def record_audio(filename, duration=DURATION, sample_rate=SAMPLE_RATE):
    print(f"Запись {duration} .")
    print("saaay ")
    recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()
    sf.write(filename, recording, sample_rate)
    print(f"Аудио сохранено в   {filename}")
    return filename


In [9]:
def concatenate_features(features_list):

    concatenated = np.concatenate(features_list, axis=0)
    return concatenated

def prepare_data(user_files, non_user_files, word_count):
    user_features_all = []
    non_user_features_all = []
    
    for file in user_files:
        features = extract_mfcc(file)
        user_features_all.append(features)
    
    for file in non_user_files:
        features = extract_mfcc(file)
        non_user_features_all.append(features)
    
    user_features = []
    non_user_features = []
    
    for i in range(0, len(user_features_all), word_count):
        if i + word_count <= len(user_features_all):
            group = user_features_all[i:i+word_count]
            combined = concatenate_features(group)
            user_features.append(combined)
    
    for i in range(0, len(non_user_features_all), word_count):
        if i + word_count <= len(non_user_features_all):
            group = non_user_features_all[i:i+word_count]
            combined = concatenate_features(group)
            non_user_features.append(combined)
    
    user_labels = np.ones(len(user_features))
    non_user_labels = np.zeros(len(non_user_features))
    
    features = user_features + non_user_features
    labels = np.concatenate([user_labels, non_user_labels])
    
    max_time_steps = max(f.shape[0] for f in features)
    
    aligned_features = []
    for f in features:
        if f.shape[0] < max_time_steps:
            pad = np.zeros((max_time_steps - f.shape[0], f.shape[1]))
            f = np.vstack([f, pad])
        else:
            f = f[:max_time_steps]
        aligned_features.append(f)
    
    return np.array(aligned_features), labels


In [8]:
def train_model(model, train_loader, val_loader, num_epochs=20,word_count = 1):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)
    
    best_val_acc = 0.0
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        print(train_loader)
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Train)"):
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()

            outputs = model(inputs)
            print(outputs)
            print(labels)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss = running_loss / total
        train_acc = correct / total
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Val)"):
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_loss = val_loss / val_total
        val_acc = val_correct / val_total
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        
        scheduler.step(val_loss)
        
        print(f"Эпоха {epoch+1}/{num_epochs} | "
              f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), f"best_speaker_model_{word_count}.pth")
            print("Модель сохранена!")
    
    return best_val_acc


In [7]:
import torch
from torch.utils.data import Dataset

class VoiceDataset(Dataset):
    def __init__(self, features, labels):

        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        feature = self.features[idx]  
        label = self.labels[idx]
        
        feature_tensor = torch.FloatTensor(feature)
        label_tensor = torch.LongTensor([label])
        
        return feature_tensor, label_tensor


In [6]:
def collate_fn(batch):
    features, labels = zip(*batch)
    
    max_len = max(f.shape[0] for f in features)
    
    padded_features = []
    for f in features:
        pad_size = max_len - f.shape[0]
        padded = torch.nn.functional.pad(f, (0, 0, 0, pad_size), "constant", 0)
        padded_features.append(padded)
    
    features_tensor = torch.stack(padded_features)
    labels_tensor = torch.stack(labels).squeeze()
    
    return features_tensor, labels_tensor


In [5]:
import torch
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model(model, test_loader, word_count):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    
    predictions = []
    labels = []
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            outputs = model(inputs)
            
            
            print(outputs)
            print(targets)
            if len(targets.shape) > 1:
                targets = targets.squeeze()
            
            loss = torch.nn.CrossEntropyLoss()(outputs, targets)
            test_loss += loss.item() * inputs.size(0)
            
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            
            predictions.extend(predicted.cpu().numpy())
            labels.extend(targets.cpu().numpy())
    
    accuracy = correct / total
    print(f"Test Loss: {test_loss / total:.4f} | Test Accuracy: {accuracy:.4f}")
    
    print("Report:         \n")
    print(classification_report(labels, predictions))
    
    return accuracy


In [4]:

def train_and_evaluate_word_count(user_files, non_user_files, word_count):
    print(f"\n{'='*50}")
    print(f"Число слов {word_count} ")
    print(f"{'='*50}")
    
    features, labels = prepare_data(user_files, non_user_files, word_count)
    
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    
    train_dataset = VoiceDataset(X_train, y_train)
    val_dataset = VoiceDataset(X_val, y_val)
    test_dataset = VoiceDataset(X_test, y_test)
    
    batch_size = min(16, len(train_dataset))
    train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn 
)
    val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn  
)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    input_dim = features.shape[2]  
    model = SpeakerRecognitionModel(input_dim=input_dim).to(DEVICE)
    
    best_val_acc = train_model(model, train_loader, val_loader, num_epochs=15,word_count = word_count)
    
    model.load_state_dict(torch.load(f"best_speaker_model_{word_count}.pth"))
    
    test_acc = evaluate_model(model, test_loader, word_count)
    
    return best_val_acc, test_acc


In [17]:

import os
import time
user_dir = "user_voice_samples"
non_user_dir = "non_user_voice_samples"

os.makedirs(user_dir, exist_ok=True)
os.makedirs(non_user_dir, exist_ok=True)

user_files = [os.path.join(user_dir, f) for f in os.listdir(user_dir) if f.endswith('.wav')]
non_user_files = [os.path.join(non_user_dir, f) for f in os.listdir(non_user_dir) if f.endswith('.wav')]

max_word_count = 15  
min_user_samples = max_word_count * 2 
min_non_user_samples = max_word_count * 2

if len(user_files) < min_user_samples:
    print(f"Нужно записать дополнительно {min_user_samples - len(user_files)} файлов вашего голоса.")
    for i in range(len(user_files), min_user_samples):
        filename = os.path.join(user_dir, f"user_sample_{i:03d}.wav")
        record_audio(filename)
        time.sleep(0.5)

if len(non_user_files) < min_non_user_samples:
    print(f"Недостаточно голсоов других людей в размере {min_non_user_samples - len(non_user_files)} файлов.")
    for i in range(len(non_user_files), min_non_user_samples):
        filename = os.path.join(non_user_dir, f"non_user_sample_{i:03d}.wav")
        record_audio(filename)
        time.sleep(0.5)

user_files = [os.path.join(user_dir, f) for f in os.listdir(user_dir) if f.endswith('.wav')]
non_user_files = [os.path.join(non_user_dir, f) for f in os.listdir(non_user_dir) if f.endswith('.wav')]

word_counts = [5,10,15] 
results = {}
try:
    for count in word_counts:
        if len(user_files) >= count * 2 and len(non_user_files) >= count * 2:
            val_acc, test_acc = train_and_evaluate_word_count(user_files, non_user_files, count)
            results[count] = {"val_acc": val_acc, "test_acc": test_acc}
            print(results)
        else:
            pass
except:
    pass



Число слов 5 
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 1/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[-0.2473,  0.0734],
        [-0.0686,  0.1821],
        [-0.0356,  0.1686],
        [ 0.1262,  0.1619],
        [-0.1152, -0.1128],
        [-0.1370,  0.1295],
        [ 0.1328,  0.0162],
        [-0.0239,  0.1982],
        [-0.1252,  0.2226]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0, 1, 0, 0, 0, 0], device='cuda:0')


Epoch 1/15 (Train): 100%|██████████| 1/1 [00:00<00:00,  5.34it/s]
Epoch 1/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 146.92it/s]


Эпоха 1/15 | Train Loss: 0.7618 | Train Acc: 0.3333 | Val Loss: 0.6558 | Val Acc: 1.0000
Модель сохранена!
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 2/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 47.89it/s]


tensor([[ 0.1592, -0.0983],
        [ 0.2601, -0.1203],
        [ 0.1918, -0.1935],
        [ 0.3927,  0.0550],
        [ 0.0846, -0.2415],
        [ 0.3687, -0.2526],
        [ 0.3797, -0.1311],
        [ 0.1001, -0.1578],
        [ 0.2240, -0.2282]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0, 0, 0, 0, 1, 0], device='cuda:0')


Epoch 2/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 102.05it/s]


Эпоха 2/15 | Train Loss: 0.5750 | Train Acc: 0.7778 | Val Loss: 0.6355 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 3/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 44.80it/s]


tensor([[ 0.2232, -0.1344],
        [ 0.2439, -0.3091],
        [ 0.5652, -0.5569],
        [ 0.5044, -0.3298],
        [ 0.5312, -0.5708],
        [ 0.6066, -0.4297],
        [ 0.4417, -0.3163],
        [ 0.4973, -0.2343],
        [ 0.2794, -0.2865]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0, 0, 0, 0, 1, 0], device='cuda:0')


Epoch 3/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 119.23it/s]


Эпоха 3/15 | Train Loss: 0.5037 | Train Acc: 0.7778 | Val Loss: 0.6225 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 4/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.7445, -0.6947],
        [ 0.7676, -0.6311],
        [ 0.7663, -0.7902],
        [ 0.5399, -0.4614],
        [ 0.9296, -0.6619],
        [ 0.3111, -0.3229],
        [ 0.5489, -0.6334],
        [ 0.9090, -0.9363],
        [ 0.6963, -0.4645]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 1, 0, 1, 0, 0, 0], device='cuda:0')


Epoch 4/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 49.63it/s]
Epoch 4/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 193.07it/s]


Эпоха 4/15 | Train Loss: 0.4301 | Train Acc: 0.7778 | Val Loss: 0.6207 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 5/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 47.83it/s]


tensor([[ 1.0059, -1.0923],
        [ 0.3005, -0.2588],
        [ 0.8004, -1.0054],
        [ 0.8240, -0.7947],
        [ 1.0138, -0.6183],
        [ 1.0076, -0.8773],
        [ 0.4927, -0.3921],
        [ 1.1078, -1.0203],
        [ 0.8605, -0.6145]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 1, 0, 0, 0, 0, 1, 0, 0], device='cuda:0')


Epoch 5/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 126.77it/s]


Эпоха 5/15 | Train Loss: 0.3699 | Train Acc: 0.7778 | Val Loss: 0.6247 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 6/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 45.95it/s]


tensor([[ 0.2705, -0.2991],
        [ 1.2297, -1.1499],
        [ 1.3403, -1.2007],
        [ 1.0953, -1.0479],
        [ 0.9154, -0.9611],
        [ 1.2068, -1.0185],
        [ 0.3154, -0.2720],
        [ 0.7911, -0.7788],
        [ 0.6623, -0.7780]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0, 0, 0, 1, 0, 0], device='cuda:0')


Epoch 6/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 100.59it/s]


Эпоха 6/15 | Train Loss: 0.3299 | Train Acc: 0.7778 | Val Loss: 0.6336 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 7/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.0584, -0.9857],
        [ 0.8691, -0.7832],
        [ 0.0669, -0.0857],
        [ 1.0927, -1.1333],
        [ 1.3190, -1.0576],
        [ 0.4469, -0.2063],
        [ 1.4539, -1.4168],
        [ 1.2585, -1.2930],
        [ 1.3401, -1.2548]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0, 0, 1, 0, 0, 0], device='cuda:0')


Epoch 7/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 48.34it/s]
Epoch 7/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 147.32it/s]


Эпоха 7/15 | Train Loss: 0.2817 | Train Acc: 0.7778 | Val Loss: 0.6533 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 8/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.4953, -0.1527],
        [ 1.3924, -1.4022],
        [ 1.3495, -1.0434],
        [ 1.6707, -1.6600],
        [ 0.0709,  0.1245],
        [ 1.2604, -1.3232],
        [ 0.9410, -0.9246],
        [ 1.3264, -1.2168],
        [ 0.8675, -0.9455]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0, 1, 0, 0, 0, 0], device='cuda:0')


Epoch 8/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 40.07it/s]
Epoch 8/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 143.70it/s]


Эпоха 8/15 | Train Loss: 0.2623 | Train Acc: 0.8889 | Val Loss: 0.6855 | Val Acc: 0.3333
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 9/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.8780, -0.8137],
        [ 0.2224,  0.0309],
        [ 1.4076, -1.4581],
        [ 1.8350, -1.6375],
        [ 1.3904, -1.4196],
        [ 0.6418, -0.6758],
        [ 1.5490, -1.3021],
        [ 0.0658,  0.1380],
        [ 1.8660, -1.8061]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 1, 0, 0, 0, 0, 0, 1, 0], device='cuda:0')


Epoch 9/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 44.98it/s]
Epoch 9/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 185.44it/s]


Эпоха 9/15 | Train Loss: 0.2315 | Train Acc: 0.8889 | Val Loss: 0.6760 | Val Acc: 0.3333
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 10/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.7975e+00, -1.7679e+00],
        [ 1.4085e+00, -1.3380e+00],
        [ 8.0677e-01, -7.1204e-01],
        [ 1.4645e+00, -1.4623e+00],
        [-2.3755e-01,  1.1804e-01],
        [ 1.0232e+00, -1.0873e+00],
        [ 1.7148e+00, -1.7206e+00],
        [ 5.7364e-04,  3.7144e-01],
        [ 1.6237e+00, -1.6346e+00]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], device='cuda:0')


Epoch 10/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 65.06it/s]
Epoch 10/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 126.02it/s]


Эпоха 10/15 | Train Loss: 0.1756 | Train Acc: 1.0000 | Val Loss: 0.6600 | Val Acc: 0.3333
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 11/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 42.66it/s]


tensor([[ 0.6939, -0.7702],
        [ 1.8223, -1.8979],
        [ 1.6274, -1.5087],
        [ 1.4431, -1.4996],
        [ 1.6305, -1.8193],
        [ 1.4663, -1.5763],
        [ 0.7334, -0.9191],
        [-0.1152,  0.2939],
        [-0.2655,  0.6036]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 0, 0, 0, 0, 1, 1], device='cuda:0')


Epoch 11/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 127.44it/s]


Эпоха 11/15 | Train Loss: 0.1598 | Train Acc: 1.0000 | Val Loss: 0.6365 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 12/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 41.41it/s]


tensor([[ 2.0012, -1.9573],
        [-0.2066,  0.4451],
        [ 1.6238, -1.2355],
        [ 1.3463, -1.3819],
        [ 1.7551, -1.6872],
        [ 1.5248, -1.6045],
        [-0.2732,  0.7082],
        [ 0.9265, -1.0267],
        [ 0.8200, -1.1106]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 1, 0, 0, 0, 0, 1, 0, 0], device='cuda:0')


Epoch 12/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 144.38it/s]


Эпоха 12/15 | Train Loss: 0.1353 | Train Acc: 1.0000 | Val Loss: 0.6067 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 13/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[-0.5861,  0.5137],
        [ 1.2291, -1.1942],
        [ 1.5095, -1.3638],
        [ 1.2112, -1.1047],
        [-0.0088,  0.4576],
        [ 1.5196, -1.5956],
        [ 1.7072, -1.9129],
        [ 1.6551, -1.6675],
        [ 1.9315, -1.9776]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0, 1, 0, 0, 0, 0], device='cuda:0')


Epoch 13/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 44.82it/s]
Epoch 13/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 129.21it/s]


Эпоха 13/15 | Train Loss: 0.1259 | Train Acc: 1.0000 | Val Loss: 0.5752 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 14/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 57.24it/s]

tensor([[ 1.6648, -1.7509],
        [ 1.8189, -1.9279],
        [-0.3290,  0.8854],
        [ 1.9757, -1.9913],
        [ 1.7742, -1.5933],
        [ 1.0056, -1.0578],
        [ 1.0430, -1.0162],
        [-0.6885,  0.7443],
        [ 2.2486, -2.3924]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0, 0, 0, 0, 1, 0], device='cuda:0')



Epoch 14/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 128.98it/s]


Эпоха 14/15 | Train Loss: 0.0924 | Train Acc: 1.0000 | Val Loss: 0.5417 | Val Acc: 0.6667
<torch.utils.data.dataloader.DataLoader object at 0x000001CA01F7BE50>


Epoch 15/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.9208, -2.3256],
        [-0.4962,  1.0779],
        [ 1.8903, -1.8255],
        [ 1.7368, -1.5803],
        [-0.7116,  0.9617],
        [ 1.2177, -1.2425],
        [ 1.2151, -1.0145],
        [ 1.7422, -1.8357],
        [ 2.0649, -2.4289]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 1, 0, 0, 1, 0, 0, 0, 0], device='cuda:0')


Epoch 15/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 50.88it/s]
Epoch 15/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 170.36it/s]
  model.load_state_dict(torch.load(f"best_speaker_model_{word_count}.pth"))


Эпоха 15/15 | Train Loss: 0.0730 | Train Acc: 1.0000 | Val Loss: 0.5028 | Val Acc: 0.6667
tensor([[-0.0429,  0.0225],
        [-0.0417, -0.0055],
        [ 0.0543, -0.0031],
        [ 0.0068, -0.0119]], device='cuda:0')
tensor([[1],
        [1],
        [1],
        [0]], device='cuda:0')
Test Loss: 0.6856 | Test Accuracy: 0.7500
Report:         

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.67      0.80         3

    accuracy                           0.75         4
   macro avg       0.75      0.83      0.73         4
weighted avg       0.88      0.75      0.77         4

{5: {'val_acc': 1.0, 'test_acc': 0.75}}

Число слов 10 
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 1/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 22.55it/s]


tensor([[ 0.0929, -0.0062],
        [-0.0031,  0.0625],
        [ 0.1779,  0.1137],
        [-0.0344,  0.1903]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0], device='cuda:0')


Epoch 1/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 83.65it/s]


Эпоха 1/15 | Train Loss: 0.7272 | Train Acc: 0.2500 | Val Loss: 0.7561 | Val Acc: 0.5000
Модель сохранена!
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 2/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 33.82it/s]


tensor([[ 0.2798, -0.2573],
        [ 0.0466, -0.0905],
        [ 0.0722, -0.4147],
        [ 0.1033, -0.2325]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 1], device='cuda:0')


Epoch 2/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 84.25it/s]


Эпоха 2/15 | Train Loss: 0.6103 | Train Acc: 0.7500 | Val Loss: 0.7747 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 3/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 33.79it/s]


tensor([[ 0.5286, -0.5413],
        [ 0.5605, -0.3502],
        [ 0.3102, -0.2655],
        [ 0.5163, -0.3562]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0], device='cuda:0')


Epoch 3/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 83.69it/s]


Эпоха 3/15 | Train Loss: 0.5010 | Train Acc: 0.7500 | Val Loss: 0.7126 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 4/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.6705, -0.5726],
        [ 0.8243, -0.5394],
        [ 0.5236, -0.5678],
        [ 0.2786, -0.3333]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 1], device='cuda:0')


Epoch 4/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 33.03it/s]
Epoch 4/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 72.21it/s]


Эпоха 4/15 | Train Loss: 0.4540 | Train Acc: 0.7500 | Val Loss: 0.6680 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 5/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 30.71it/s]


tensor([[ 0.8958, -0.7609],
        [ 0.7713, -0.7265],
        [ 0.6692, -0.8038],
        [ 0.4970, -0.4310]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 1], device='cuda:0')


Epoch 5/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 79.22it/s]


Эпоха 5/15 | Train Loss: 0.4610 | Train Acc: 0.7500 | Val Loss: 0.6295 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 6/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.6189, -0.3118],
        [ 1.2178, -0.8680],
        [ 0.7144, -0.4662],
        [ 1.0114, -0.6645]], device='cuda:0', grad_fn=<AddmmBackward0>)


Epoch 6/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 33.22it/s]


tensor([1, 0, 0, 0], device='cuda:0')


Epoch 6/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 100.63it/s]


Эпоха 6/15 | Train Loss: 0.4549 | Train Acc: 0.7500 | Val Loss: 0.5966 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 7/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.0315, -1.0665],
        [ 0.8818, -0.5769],
        [ 1.2947, -0.9866],
        [ 0.2612, -0.1394]], device='cuda:0', grad_fn=<AddmmBackward0>)

Epoch 7/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 30.57it/s]



tensor([0, 0, 0, 1], device='cuda:0')


Epoch 7/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 80.19it/s]


Эпоха 7/15 | Train Loss: 0.3339 | Train Acc: 0.7500 | Val Loss: 0.5664 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 8/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.2876, -0.2679],
        [ 1.2433, -1.2067],
        [ 1.1464, -0.9830],
        [ 0.8534, -0.6496]], device='cuda:0', grad_fn=<AddmmBackward0>)

Epoch 8/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 30.76it/s]



tensor([1, 0, 0, 0], device='cuda:0')


Epoch 8/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 74.00it/s]


Эпоха 8/15 | Train Loss: 0.3513 | Train Acc: 0.7500 | Val Loss: 0.5296 | Val Acc: 0.5000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 9/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.4678, -1.1762],
        [ 0.8781, -0.6129],
        [ 1.2415, -0.9607],
        [ 0.2336, -0.0220]], device='cuda:0', grad_fn=<AddmmBackward0>)

Epoch 9/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 31.20it/s]



tensor([0, 0, 0, 1], device='cuda:0')


Epoch 9/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 74.76it/s]


Эпоха 9/15 | Train Loss: 0.3014 | Train Acc: 0.7500 | Val Loss: 0.4909 | Val Acc: 1.0000
Модель сохранена!
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 10/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.3984, -1.1417],
        [-0.0990,  0.1763],
        [ 0.7432, -0.6977],
        [ 1.5604, -1.1275]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 1, 0, 0], device='cuda:0')


Epoch 10/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 32.40it/s]
Epoch 10/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 85.77it/s]


Эпоха 10/15 | Train Loss: 0.2298 | Train Acc: 1.0000 | Val Loss: 0.4472 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 11/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.6302, -1.2530],
        [ 0.8977, -0.6796],
        [ 1.5228, -1.2653],
        [ 0.0103,  0.1980]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 0, 1], device='cuda:0')


Epoch 11/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 35.09it/s]
Epoch 11/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 76.58it/s]


Эпоха 11/15 | Train Loss: 0.2264 | Train Acc: 1.0000 | Val Loss: 0.3995 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 12/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 1.5159, -1.2795],
        [ 1.1303, -0.8568],
        [-0.2827,  0.4811],
        [ 2.0143, -1.4324]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1, 0], device='cuda:0')


Epoch 12/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 32.86it/s]
Epoch 12/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 83.64it/s]


Эпоха 12/15 | Train Loss: 0.1504 | Train Acc: 1.0000 | Val Loss: 0.3503 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 13/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[-0.4494,  0.6677],
        [ 1.8628, -1.5223],
        [ 0.9895, -0.9010],
        [ 1.9102, -1.4622]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1, 0, 0, 0], device='cuda:0')


Epoch 13/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 32.86it/s]
Epoch 13/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 66.13it/s]


Эпоха 13/15 | Train Loss: 0.1227 | Train Acc: 1.0000 | Val Loss: 0.3003 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 14/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[ 0.8630, -0.8153],
        [ 1.7577, -1.4853],
        [-0.6555,  0.6710],
        [ 2.1466, -1.6950]], device='cuda:0', grad_fn=<AddmmBackward0>)

Epoch 14/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 31.43it/s]



tensor([0, 0, 1, 0], device='cuda:0')


Epoch 14/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 66.47it/s]


Эпоха 14/15 | Train Loss: 0.1165 | Train Acc: 1.0000 | Val Loss: 0.2557 | Val Acc: 1.0000
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1FE04C10>


Epoch 15/15 (Train):   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[-1.2933,  0.9578],
        [ 1.2126, -1.0982],
        [ 2.2336, -1.7914],
        [ 1.9736, -1.7970]], device='cuda:0', grad_fn=<AddmmBackward0>)

Epoch 15/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 30.61it/s]



tensor([1, 0, 0, 0], device='cuda:0')


Epoch 15/15 (Val): 100%|██████████| 1/1 [00:00<00:00, 84.54it/s]

Эпоха 15/15 | Train Loss: 0.0588 | Train Acc: 1.0000 | Val Loss: 0.2227 | Val Acc: 1.0000
tensor([[-0.0249,  0.1485],
        [ 0.7809, -0.7490]], device='cuda:0')
tensor([[1],
        [0]], device='cuda:0')
Test Loss: 0.4031 | Test Accuracy: 1.0000
Report:         

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2




  model.load_state_dict(torch.load(f"best_speaker_model_{word_count}.pth"))


{5: {'val_acc': 1.0, 'test_acc': 0.75}, 10: {'val_acc': 1.0, 'test_acc': 1.0}}

Число слов 15 
<torch.utils.data.dataloader.DataLoader object at 0x000001CA1F927490>


Epoch 1/15 (Train): 100%|██████████| 1/1 [00:00<00:00, 23.48it/s]


tensor([[ 0.1088, -0.1298],
        [-0.1990,  0.0578],
        [-0.1079, -0.1354]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([0, 0, 1], device='cuda:0')


Epoch 1/15 (Val):   0%|          | 0/1 [00:00<?, ?it/s]
