In [None]:
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import joblib

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device : {device}")

Using device : cuda


In [None]:
distilbert_tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
distilbert_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
distilbert_model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [None]:
asr_data = pd.read_csv("Datasets/transcripts_with_labels_1.csv")

In [None]:
asr_data.head(10)

Unnamed: 0,Transcript,Label,Binary Label
0,myself limiting,NoStutteredWords,No-Stutter
1,And I challenge those beliefs.,NoStutteredWords,No-Stutter
2,assertive when we change.,NoStutteredWords,No-Stutter
3,the roles which,NoStutteredWords,No-Stutter
4,I explained to her.,NoStutteredWords,No-Stutter
5,with their relationships.,NoStutteredWords,No-Stutter
6,to go on that course.,NoStutteredWords,No-Stutter
7,in 2000.,NoStutteredWords,No-Stutter
8,"Yes, but...",NoStutteredWords,No-Stutter
9,people who still stay.,NoStutteredWords,No-Stutter


In [None]:
asr_data.tail(10)

Unnamed: 0,Transcript,Label,Binary Label
18027,"I kind of, uh...",Interjection,Stutter
18028,And then I think I mentioned earlier that I'v...,SoundRep,Stutter
18029,I thought I had solved this.,Block,Stutter
18030,When I sort of reached maybe like,NoStutteredWords,No-Stutter
18031,"It's just a process, it's a life process.",NoStutteredWords,No-Stutter
18032,of shame.,Prolongation,Stutter
18033,in the human experience for a lot.,NoStutteredWords,No-Stutter
18034,Depression.,NoStutteredWords,No-Stutter
18035,those are things that,NoStutteredWords,No-Stutter
18036,"You know, it's unbelievable.",Interjection,Stutter


In [None]:
label_mapping = {'No-Stutter':0, 'Stutter':1}
asr_data['numeric_label'] = asr_data['Binary Label'].map(label_mapping)

In [None]:
texts = asr_data['Transcript'].astype('str').to_list()
labels = asr_data['numeric_label'].to_list()

In [None]:
encodings = distilbert_tokenizer(texts, padding=True,truncation=True, return_tensors="pt")

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
dataset = CustomDataset(encodings, labels)

In [None]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [None]:
training_args = TrainingArguments(
    output_dir='./results_distilbert',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs_distilbert',
    evaluation_strategy="steps",
    gradient_accumulation_steps=2
)



In [None]:
trainer = Trainer(
    model=distilbert_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

In [None]:
%%time
trainer.train()

Step,Training Loss,Validation Loss
500,0.6522,0.662635
1000,0.6234,0.622095
1500,0.5718,0.626319
2000,0.5106,0.723098
2500,0.4116,0.751705


CPU times: user 37min 21s, sys: 12.6 s, total: 37min 34s
Wall time: 39min 46s


TrainOutput(global_step=2706, training_loss=0.5421178892464085, metrics={'train_runtime': 2385.0572, 'train_samples_per_second': 18.149, 'train_steps_per_second': 1.135, 'total_flos': 5734116285622272.0, 'train_loss': 0.5421178892464085, 'epoch': 3.0})

In [None]:
distilbert_model.save_pretrained('./distilbert_model')
distilbert_tokenizer.save_pretrained('./distilbert_tokenizer')

('./distilbert_tokenizer/tokenizer_config.json',
 './distilbert_tokenizer/special_tokens_map.json',
 './distilbert_tokenizer/vocab.txt',
 './distilbert_tokenizer/added_tokens.json',
 './distilbert_tokenizer/tokenizer.json')

In [None]:
mfcc_data = pd.read_csv('Datasets/mfcc_features_with_labels_4.csv')

In [None]:
mfcc_data['numeric_label'] = mfcc_data['Binary Label'].map(label_mapping)

In [None]:
mfcc_features = mfcc_data.drop(['Label','Binary Label','numeric_label'], axis=1).values
labels = mfcc_data['numeric_label'].values

In [None]:
X_train, X_val, y_train, y_val = train_test_split(mfcc_features, labels, test_size=0.3, random_state=42)

In [None]:
class MFCC_Network(nn.Module):
    def __init__(self, input_dim):
        super(MFCC_Network, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.25)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

In [None]:
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
input_dim = X_train.shape[1]
mfcc_model = MFCC_Network(input_dim)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(mfcc_model.parameters(), lr=0.001)

In [None]:
num_epochs = 30
for epoch in range(num_epochs):
    mfcc_model.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        output = mfcc_model(data)
        target = target.unsqueeze(1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    mfcc_model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in val_loader:
            output = mfcc_model(data)
            target = target.unsqueeze(1)
            loss = criterion(output, target)
            val_loss += loss.item()
            probs = torch.sigmoid(output)
            predicted = (probs > 0.5).float()
            total += target.size(0)
            correct += (predicted == target).sum().item()

    val_loss /= len(val_loader)

    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss}')

Epoch 1/30, Training Loss: 0.6955179572105408, Validation Loss: 0.681639503030216
Epoch 2/30, Training Loss: 0.69195955991745, Validation Loss: 0.679287784239825
Epoch 3/30, Training Loss: 0.6948627829551697, Validation Loss: 0.6738835236605476
Epoch 4/30, Training Loss: 0.6752471923828125, Validation Loss: 0.6762296340044807
Epoch 5/30, Training Loss: 0.6669188141822815, Validation Loss: 0.6745116850909065
Epoch 6/30, Training Loss: 0.674771785736084, Validation Loss: 0.6690268369281993
Epoch 7/30, Training Loss: 0.6458699107170105, Validation Loss: 0.6624691395198598
Epoch 8/30, Training Loss: 0.6596409678459167, Validation Loss: 0.6599424800452064
Epoch 9/30, Training Loss: 0.626015305519104, Validation Loss: 0.6608275739585652
Epoch 10/30, Training Loss: 0.6195699572563171, Validation Loss: 0.6560220290632809
Epoch 11/30, Training Loss: 0.6939972639083862, Validation Loss: 0.6565064872012419
Epoch 12/30, Training Loss: 0.632562518119812, Validation Loss: 0.6524908472509945
Epoch 13

In [None]:
mfcc_model.eval()
with torch.no_grad():
    mfcc_embeddings = mfcc_model(torch.tensor(mfcc_features, dtype=torch.float32)).numpy()

In [None]:
torch.save(mfcc_model.state_dict(), './ffnn_model.pth')

In [None]:
batch_size = 8

all_embeddings = []
all_labels = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_labels = labels[i:i+batch_size]
    inputs = distilbert_tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = distilbert_model(**inputs, output_hidden_states=True)
        hidden_states = outputs.hidden_states[-1]
        batch_embeddings = hidden_states.mean(dim=1).cpu().numpy()

    all_embeddings.append(batch_embeddings)
    all_labels.extend(batch_labels)

all_embeddings = np.vstack(all_embeddings)
all_labels = np.array(all_labels)

In [None]:
combined_features = np.concatenate((mfcc_embeddings, all_embeddings), axis=1)

X_train_combined, X_test_combined, y_train_combined, y_test_combined = train_test_split(combined_features, all_labels, test_size=0.3, random_state=42)

In [None]:
rf_model = RandomForestClassifier(max_depth=20 ,n_estimators=400)
rf_model.fit(X_train_combined, y_train_combined)
rf_predictions = rf_model.predict(X_test_combined)

In [None]:
print(f'Accuracy RF : {accuracy_score(y_test_combined, rf_predictions)}')
print(f'Precision RF : {precision_score(y_test_combined, rf_predictions)}')
print(f'Recall RF : {recall_score(y_test_combined, rf_predictions)}')
print(f'F1 Score RF : {f1_score(y_test_combined, rf_predictions)}')

Accuracy RF : 0.8192904656319291
Precision RF : 0.8252901649358583
Recall RF : 0.8693693693693694
F1 Score RF : 0.8467565026637417


In [None]:
#joblib.dump(rf_model, "Random_Forest_Classifier.joblib")

['Random_Forest_Classifier.joblib']

In [None]:
svm_model = SVC(kernel='rbf', C=2)
svm_model.fit(X_train_combined, y_train_combined)
svm_predictions = svm_model.predict(X_test_combined)

In [None]:
print(f'Accuracy SVM: {accuracy_score(y_test_combined, svm_predictions)}')
print(f'Precision SVM: {precision_score(y_test_combined, svm_predictions)}')
print(f'Recall SVM: {recall_score(y_test_combined, svm_predictions)}')
print(f'F1 Score SVM: {f1_score(y_test_combined, svm_predictions)}')

Accuracy SVM: 0.8187361419068736
Precision SVM: 0.8231540565177757
Recall SVM: 0.8716216216216216
F1 Score SVM: 0.8466947960618847


In [None]:
#joblib.dump(svm_model, "SVM_Classifier.joblib")

['SVM_Classifier.joblib']

In [None]:
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_combined, y_train_combined)
dt_prediction = dt_model.predict(X_test_combined)

In [None]:
print(f'Accuracy DT: {accuracy_score(y_test_combined, dt_prediction)}')
print(f'Precision DT: {precision_score(y_test_combined, dt_prediction)}')
print(f'Recall DT: {recall_score(y_test_combined, dt_prediction)}')
print(f'F1 Score DT: {f1_score(y_test_combined, dt_prediction)}')

Accuracy DT: 0.7348484848484849
Precision DT: 0.7678514249119437
Recall DT: 0.7715572715572716
F1 Score DT: 0.7696998876584819


In [None]:
#joblib.dump(dt_model, "Decision_Tree_Classifier.joblib")

['Decision_Tree_Classifier.joblib']

In [None]:
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_combined, y_train_combined)
nb_predictions = nb_classifier.predict(X_test_combined)

In [None]:
print("Naive Bayes Accuracy:", accuracy_score(y_test_combined, nb_predictions))
print("Naive Bayes Precision:", precision_score(y_test_combined, nb_predictions))
print("Naive Bayes Recall:", recall_score(y_test_combined, nb_predictions))
print("Naive Bayes F1-Score:", f1_score(y_test_combined, nb_predictions))

Naive Bayes Accuracy: 0.8137472283813747
Naive Bayes Precision: 0.8331218274111675
Naive Bayes Recall: 0.8449163449163449
Naive Bayes F1-Score: 0.8389776357827475


In [None]:
#joblib.dump(nb_classifier, "Naive_Bayes_Classifier.joblib")

['Naive_Bayes_Classifier.joblib']