# Training baseline architecture (x-vectors 2018)

**Imports:**

In [None]:
import torch
from src.nn_models.xvectors_baseline import *
import matplotlib.pyplot as plt 
from sklearn.manifold import TSNE
from pathlib import Path
from os import listdir

**Configuration:**

In [None]:
# training configuration
training_data = Path("/home/joey/School/KNN/speaker-identification/data/vox1_dev/")
testing_data = Path("/home/joey/School/KNN/speaker-identification/data/vox1_test/")
model_params = Path("/home/joey/School/KNN/speaker-identification/model_params.pt")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
epochs = 50
batch_size = 2048
chunk_size = 24

# model declaration:
speakers_count = len(listdir(training_data))
model = XVectorsBaseline(speakers_count, chunk_size).to(device)

if model_params.is_file():
    model.load_state_dict(torch.load(model_params))
    print("Model parameters were loaded!")

# loss and optimizer selection:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# create training dataset and dataloader:
train_dataset = create_training_mfcc_dataset(training_data, chunk_size)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

**Training model:**

In [None]:
train_model(model, model_params, epochs, train_dataloader, optimizer, criterion, device)

**Visual evaluation of model on testing data:**

In [None]:
xvectors, labels = get_session_xvectors(model, testing_data, chunk_size, batch_size, device)
xvectors_reduced = TSNE(n_components=2).fit_transform(np.array(xvectors))
colors = ["red", "blue", "green", "orange", "black", "pink"]
plt.scatter(xvectors_reduced[:,0], xvectors_reduced[:,1], c=[colors[l] for l in labels])