<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/speech-privacy/expirement_2_libritts_gender_wav2vec2_neuralnetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets

In [8]:
print("Columns:", dataset.column_names)

Columns: ['age', 'age_onset', 'birthplace', 'filename', 'native_language', 'sex', 'speakerid', 'country', 'file_missing?', 'file_exists', '__index_level_0__', 'audio']


In [9]:
################################################################################
# 2. IMPORTS
################################################################################
import torch
import torchaudio
import numpy as np
import random
import matplotlib.pyplot as plt

from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    Wav2Vec2ForCTC,
    Wav2Vec2Processor,
    TrainingArguments,
    Trainer
)
from dataclasses import dataclass
from typing import Any, Dict, List, Union
from collections import defaultdict, Counter

################################################################################
# 3. LOAD THE DATASET
################################################################################
dataset = load_dataset("HamdanXI/speech-accent-archive-v2")["train"]

print("Full dataset size:", len(dataset))
print("Columns:", dataset.column_names)
print("Unique speakerid:", len(set(dataset["speakerid"])))

################################################################################
# 4. CHOOSE (A) "TRAIN" + (B) "TEST" SPEAKERS
################################################################################
A = 50
B = 20

# We have male/female lists:
male_speakers_all = list({
    ex["speakerid"] for ex in dataset
    if ex["sex"] == "male" and ex["native_language"].lower() == "english"
})

female_speakers_all = list({
    ex["speakerid"] for ex in dataset
    if ex["sex"] == "female" and ex["native_language"].lower() == "english"
})

random.seed(42)
# 1) pick (A) training: (A)M + (A)F
chosen_male_train = random.sample(male_speakers_all, A)
chosen_female_train = random.sample(female_speakers_all, A)

# Remove them from the big list so we don't re-pick them for test
for spk in chosen_male_train:
    male_speakers_all.remove(spk)
for spk in chosen_female_train:
    female_speakers_all.remove(spk)

# 2) pick (B) test: (B)M + (B)F
chosen_male_test = random.sample(male_speakers_all, B)
chosen_female_test = random.sample(female_speakers_all, B)

chosen_speakers_train = chosen_male_train + chosen_female_train
chosen_speakers_test  = chosen_male_test + chosen_female_test

print(f"Chosen training male:   {chosen_male_train}")
print(f"Chosen training female: {chosen_female_train}")
print(f"Chosen test male:       {chosen_male_test}")
print(f"Chosen test female:     {chosen_female_test}")

# Filter the dataset into two subsets: train_speakers, test_speakers
train_ds = dataset.filter(lambda x: x["speakerid"] in chosen_speakers_train)
test_ds  = dataset.filter(lambda x: x["speakerid"] in chosen_speakers_test)

################################################################################
# 5. LOAD PROCESSOR
################################################################################
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")

################################################################################
# 6. PREPARE BATCH FUNCTION
################################################################################
def prepare_batch(batch):
    """
    1) Resample audio to 16k if needed.
    2) Extract raw (unpadded) input_values via processor.feature_extractor(...).
    3) Tokenize text with the FIXED text prompt "Please call Stella..."
    """
    sr = batch["audio"]["sampling_rate"]
    audio_array = batch["audio"]["array"]
    wave_tensor = torch.tensor(audio_array, dtype=torch.float32)
    if sr != 16000:
        wave_tensor = torchaudio.functional.resample(wave_tensor, sr, 16000)
    wave_16k = wave_tensor.numpy()

    # Audio feature extraction
    audio_features = processor.feature_extractor(wave_16k, sampling_rate=16000)

    # *CHANGED* => We now ignore the original text and use a fixed prompt:
    fixed_prompt = (
        "Please call Stella. Ask her to bring these things with her from the store: "
        "Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a "
        "snack for her brother Bob. We also need a small plastic snake and a big toy "
        "frog for the kids. She can scoop these things into three red bags, and we "
        "will go meet her Wednesday at the train station."
    )
    text_tokens = processor.tokenizer(fixed_prompt)

    return {
        "speakerid":   batch["speakerid"],
        "sex":       batch["sex"],
        "input_values": audio_features["input_values"][0],
        "labels":       text_tokens["input_ids"]
    }

keep_cols = ["speakerid", "sex", "input_values", "labels"]
train_ds = train_ds.map(
    prepare_batch,
    remove_columns=[col for col in train_ds.column_names if col not in keep_cols]
)

test_ds = test_ds.map(
    prepare_batch,
    remove_columns=[col for col in test_ds.column_names if col not in keep_cols]
)

################################################################################
# 7. DATA COLLATOR
################################################################################
@dataclass
class DataCollatorCTCWithPadding:
    processor: Wav2Vec2Processor
    padding: Union[bool, str] = True

    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
        input_values = [f["input_values"] for f in features]
        labels       = [f["labels"]       for f in features]

        audio_batch = self.processor.feature_extractor.pad(
            {"input_values": input_values},
            padding=self.padding,
            return_tensors="pt",
        )

        labels_batch = self.processor.tokenizer.pad(
            {"input_ids": labels},
            padding=self.padding,
            return_tensors="pt",
        )

        labels_tensor = labels_batch["input_ids"].masked_fill(
            labels_batch["input_ids"] == self.processor.tokenizer.pad_token_id, -100
        )

        return {
            "input_values": audio_batch["input_values"],
            "labels":       labels_tensor
        }

data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)

################################################################################
# 8. TRAIN ON SINGLE SAMPLE
################################################################################
def train_on_single_sample(
    sample,
    base_model_name="facebook/wav2vec2-base-960h",
    num_train_epochs=1,
    device="cuda" if torch.cuda.is_available() else "cpu"
):
    """
    Fine-tune on a single sample (dictionary with "input_values", "labels").
    Returns the fine-tuned model on CPU.
    """
    single_ds = Dataset.from_dict({
        "input_values": [sample["input_values"]],
        "labels":       [sample["labels"]]
    })

    model = Wav2Vec2ForCTC.from_pretrained(base_model_name)
    model.to(device)

    # Freeze the feature encoder only
    model.freeze_feature_encoder()

    training_args = TrainingArguments(
        output_dir="temp_model",
        per_device_train_batch_size=1,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="no",
        logging_steps=5,
        learning_rate=1e-5,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=single_ds,
        tokenizer=processor.feature_extractor
    )

    trainer.train()
    model.to("cpu")
    return model

################################################################################
# 9. EXTRACT LAYERWISE DIFFERENCES
################################################################################
def compute_layerwise_diff(new_model, base_model, num_layers=12):
    """
    Compare new_model vs. base_model for each Wav2Vec2 encoder layer.
    Return two lists of length num_layers: mean_diffs, std_diffs
    (Both models on CPU).
    """
    from collections import defaultdict
    layerwise_params = defaultdict(list)

    base_dict = dict(base_model.named_parameters())
    new_dict  = dict(new_model.named_parameters())

    for name, base_param in base_dict.items():
        if ("wav2vec2.encoder.layers" in name) and (name in new_dict):
            new_param = new_dict[name]
            parts = name.split(".")
            try:
                layer_idx = int(parts[3])  # e.g. "3" in "wav2vec2.encoder.layers.3..."
            except ValueError:
                continue

            if layer_idx < num_layers:
                diff = (new_param - base_param).detach().cpu().view(-1)
                layerwise_params[layer_idx].append(diff)

    mean_diffs, std_diffs = [], []
    for i in range(num_layers):
        if i in layerwise_params:
            diffs_cat = torch.cat(layerwise_params[i], dim=0)
            mean_diffs.append(diffs_cat.abs().mean().item())
            std_diffs.append(diffs_cat.abs().std().item())
        else:
            mean_diffs.append(0.0)
            std_diffs.append(0.0)

    return mean_diffs, std_diffs

################################################################################
# 10A. BUILD DIFFERENCE VECTORS FOR TRAINING SPEAKERS
################################################################################
train_samples = list(train_ds)
samples_by_speaker_train = defaultdict(list)
for ex in train_samples:
    samples_by_speaker_train[ex["speakerid"]].append(ex)

# We'll pick 1 row per speaker in the TRAIN set
speaker_to_sample_train = {}
random.seed(1234)
for spk in chosen_speakers_train:
    spk_rows = samples_by_speaker_train[spk]
    chosen_row = random.choice(spk_rows)
    speaker_to_sample_train[spk] = chosen_row

# Load base model once (on CPU)
base_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").eval()
base_model.to("cpu")

LAYER_OFFSET = 1
NUM_LAYERS   = 12
LAYERS_TO_USE = list(range(LAYER_OFFSET, NUM_LAYERS))  # layers 1..11 (0-based)

train_results = {}

print("\n=== Building difference vectors for training speakers ===")
for spk_id in chosen_speakers_train:
    sample = speaker_to_sample_train[spk_id]
    print(f"Fine-tuning on speaker {spk_id} (sex={sample['sex']})")

    trained_model = train_on_single_sample(sample)
    mean_diffs, std_diffs = compute_layerwise_diff(trained_model, base_model, NUM_LAYERS)

    # Keep layers [1..11]
    mean_diffs = [mean_diffs[i] for i in LAYERS_TO_USE]
    std_diffs  = [std_diffs[i]  for i in LAYERS_TO_USE]

    train_results[spk_id] = {
        "sex": sample["sex"],
        "mean_diffs": mean_diffs,
        "std_diffs": std_diffs
    }

    del trained_model
    torch.cuda.empty_cache()

################################################################################
# 10B. CREATE TRAINING MATRICES FOR A NEURAL NETWORK
################################################################################
# We will use a simple approach: For each speaker, we create a feature vector
# by concatenating the mean_diffs and std_diffs => total size 2*len(LAYERS_TO_USE).
# Label: 0 = M, 1 = F.

X_train = []
y_train = []

for spk_id, data in train_results.items():
    x_vec = np.concatenate([data["mean_diffs"], data["std_diffs"]])  # length=22
    X_train.append(x_vec)
    sex_label = 0 if data["sex"] == "male" else 1
    y_train.append(sex_label)

X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.int64)

################################################################################
# 12. PREPARE TEST SPEAKERS (ONE ROW PER SPEAKER)
################################################################################
test_samples = list(test_ds)
samples_by_speaker_test = defaultdict(list)
for ex in test_samples:
    samples_by_speaker_test[ex["speakerid"]].append(ex)

speaker_to_sample_test = {}
random.seed(1234)
for spk in chosen_speakers_test:
    spk_rows = samples_by_speaker_test[spk]
    chosen_row = random.choice(spk_rows)
    speaker_to_sample_test[spk] = chosen_row

################################################################################
# 13. BUILD NEURAL NETWORK FOR SEX CLASSIFICATION
#     (Using the difference vectors as input)
################################################################################
import torch.nn as nn
import torch.optim as optim

class SexNet(nn.Module):
    def __init__(self, input_dim=22, hidden_dim=16):
        super(SexNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 2)  # 2 outputs (M vs. F)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create dataset + loader for training
X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)

train_dataset_torch = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset_torch, batch_size=8, shuffle=True)

# Instantiate model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_nn = SexNet(input_dim=2*len(LAYERS_TO_USE), hidden_dim=16).to(device)

# Optim + Loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_nn.parameters(), lr=1e-3)

# Train loop
epochs = 20
model_nn.train()
for epoch in range(epochs):
    running_loss = 0.0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model_nn(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if (epoch+1) % 5 == 0:
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

################################################################################
# 14. EVALUATE ON TEST SPEAKERS
################################################################################
model_nn.eval()

# Compute difference vectors for test set, then classify
X_test = []
y_test = []
test_spk_ids = []
for spk_id in chosen_speakers_test:
    sample = speaker_to_sample_test[spk_id]
    true_sex = sample["sex"]
    true_label = 0 if true_sex == "male" else 1

    # Fine-tune a Wav2Vec2 model on this single sample
    test_model = train_on_single_sample(sample)
    mean_diffs, std_diffs = compute_layerwise_diff(test_model, base_model, NUM_LAYERS)
    mean_diffs = [mean_diffs[i] for i in LAYERS_TO_USE]
    std_diffs  = [std_diffs[i]  for i in LAYERS_TO_USE]

    x_test_vec = np.concatenate([mean_diffs, std_diffs])
    X_test.append(x_test_vec)
    y_test.append(true_label)
    test_spk_ids.append(spk_id)

    del test_model
    torch.cuda.empty_cache()

X_test = np.array(X_test, dtype=np.float32)
y_test = np.array(y_test, dtype=np.int64)

X_test_tensor = torch.from_numpy(X_test).to(device)
with torch.no_grad():
    logits = model_nn(X_test_tensor)
    preds = torch.argmax(logits, dim=1).cpu().numpy()

# Accuracy
correct = (preds == y_test).sum()
accuracy = correct / len(y_test)
print(f"\nNeural Net Test Accuracy: {accuracy*100:.2f}%")

# Compute EER (Equal Error Rate)
# We'll do it by looking at the predicted probability for class=1 (female),
# and vary the threshold from 0..1 to find the point where FRR=FAR.
def compute_eer(labels, scores_for_positive):
    """
    labels: np array of 0/1
    scores_for_positive: probability of label=1
    We'll check thresholds in ascending order of unique scores.
    """
    # Sort unique scores
    thresholds = np.unique(scores_for_positive)
    # Edge cases: also consider threshold < min and > max
    thresholds = np.concatenate(([0.0], thresholds, [1.0]))

    best_eer = 1.0
    for thr in thresholds:
        # Predict "1" if score>=thr, else "0"
        preds_1 = (scores_for_positive >= thr).astype(int)

        # FRR = FN / total_pos
        # FAR = FP / total_neg
        # where pos label=1, neg label=0
        fn = np.sum((labels == 1) & (preds_1 == 0))
        tp = np.sum((labels == 1) & (preds_1 == 1))
        fp = np.sum((labels == 0) & (preds_1 == 1))
        tn = np.sum((labels == 0) & (preds_1 == 0))

        total_pos = tp + fn
        total_neg = tn + fp

        if total_pos == 0 or total_neg == 0:
            continue

        frr = fn / total_pos if total_pos > 0 else 0.0
        far = fp / total_neg if total_neg > 0 else 0.0

        # EER is where frr ~ far
        # We'll take the difference
        diff = abs(frr - far)
        if diff < best_eer:
            # Not exactly EER, but we pick threshold with minimal difference
            best_eer = diff

    return best_eer

# Grab probability of class=1 (female) from the logits:
softmax_scores = torch.softmax(logits, dim=1).cpu().numpy()
scores_for_female = softmax_scores[:,1]  # Probability for female

eer_diff = compute_eer(y_test, scores_for_female)
print(f"EER (approx by minimal diff point): {eer_diff*100:.2f}%")

print("\nDone.")

Resolving data files:   0%|          | 0/21 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/21 [00:00<?, ?it/s]

Full dataset size: 2138
Columns: ['age', 'age_onset', 'birthplace', 'filename', 'native_language', 'sex', 'speakerid', 'country', 'file_missing?', 'file_exists', '__index_level_0__', 'audio']
Unique speakerid: 2138
Chosen training male:   [137, 71, 673, 584, 547, 161, 132, 1642, 2172, 1962, 1226, 76, 2121, 127, 536, 555, 1469, 2046, 73, 1709, 521, 1222, 538, 1295, 1895, 695, 2077, 443, 876, 427, 535, 869, 1075, 129, 937, 889, 637, 86, 1312, 1635, 148, 1072, 2165, 1661, 767, 951, 1786, 517, 112, 92]
Chosen training female: [1209, 1799, 1093, 1214, 87, 1371, 1277, 1951, 1358, 121, 1362, 1348, 164, 739, 69, 793, 1319, 78, 1493, 2029, 94, 75, 702, 1688, 1648, 1957, 1320, 1341, 503, 1113, 1339, 490, 1416, 1373, 1884, 540, 606, 1412, 1552, 1395, 119, 1694, 111, 542, 90, 774, 1351, 921, 1428, 817]
Chosen test male:       [1995, 1340, 1640, 497, 835, 492, 791, 825, 1530, 1352, 1259, 664, 489, 1878, 142, 106, 162, 515, 519, 1410]
Chosen test female:     [1873, 1571, 1215, 1212, 1280, 1323, 636,

Filter:   0%|          | 0/2138 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2138 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



=== Building difference vectors for training speakers ===
Fine-tuning on speaker 137 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 71 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 673 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 584 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 547 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 161 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 132 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1642 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2172 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1962 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1226 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 76 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2121 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 127 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 536 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 555 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1469 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2046 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 73 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1709 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 521 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1222 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 538 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1295 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1895 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 695 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2077 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 443 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 876 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 427 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 535 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 869 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1075 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 129 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 937 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 889 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 637 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 86 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1312 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1635 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 148 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1072 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2165 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1661 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 767 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 951 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1786 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 517 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 112 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 92 (sex=male)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1209 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1799 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1093 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1214 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 87 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1371 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1277 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1951 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1358 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 121 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1362 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1348 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 164 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 739 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 69 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 793 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1319 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 78 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1493 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 2029 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 94 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 75 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 702 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1688 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1648 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1957 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1320 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1341 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 503 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1113 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1339 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 490 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1416 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1373 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1884 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 540 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 606 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1412 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1552 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1395 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 119 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1694 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 111 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 542 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 90 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 774 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1351 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 921 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 1428 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Fine-tuning on speaker 817 (sex=female)


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Epoch 5/20, Loss: 0.6953
Epoch 10/20, Loss: 0.6934
Epoch 15/20, Loss: 0.6940
Epoch 20/20, Loss: 0.6930


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss



Neural Net Test Accuracy: 50.00%
EER (approx by minimal diff point): 100.00%

Done.
