In [8]:
from context import speechact
import speechact.classifier.embedding as emb
import speechact.classifier.base as b
import speechact.evaluation as evaluation
import speechact.corpus as corp
import speechact.annotate as anno
import matplotlib.pyplot as plt

In [9]:
labels = [act.value for act in emb.SPEECH_ACTS]

# Load upsampled data. Note: the file names are correct even if it does not look like it.
test_corpus = corp.Corpus('../data/annotated data/dev-set-sentiment-train-upsampled.conllu.bz2')
dev_corpus = corp.Corpus('../data/annotated data/dev-set-sentiment-test-upsampled.conllu.bz2')
train_corpus = corp.Corpus('../data/auto-annotated data/speech-acts.conllu.bz2')


In [10]:
def plot_loss_histories(loss_history: list[float], dev_loss_history: list[float]):
    epochs = range(1, len(loss_history) + 1)
    plt.plot(epochs, loss_history, label='Training Loss')
    plt.plot(epochs, dev_loss_history, label='Dev Loss')
    plt.title('Training and Dev Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [11]:
def train_model(corpus: corp.Corpus, dev_corpus: corp.Corpus, model_name: str):
    print('Load classifier')
    classifier = emb.EmbeddingClassifier()

    print('Load dataset')
    dataset = emb.CorpusDataset(corpus)
    dev_dataset = emb.CorpusDataset(dev_corpus)

    loss_history = []
    dev_loss_history = []
    
    print(f'Train classifier: {model_name}')
    classifier.train(dataset, 32, loss_history=loss_history, 
                     dev_loss_history=dev_loss_history,
                     dev_data=dev_dataset, save_each_epoch=model_name,
                     use_class_weights=True)
    classifier.save(model_name)

    plot_loss_histories(loss_history, dev_loss_history)

In [12]:
def evaluate_model(corpus: corp.Corpus, model_name: str):
    print('Load classifier')
    classifier = emb.EmbeddingClassifier()
    classifier.load(model_name)

    print(f'Evaluate classifier: {model_name}')
    return evaluation.evaluate(
        corpus,
        classifier,
        labels,
        draw_conf_matrix=True
    )

# Train the Model

In [13]:
train_model(train_corpus, dev_corpus, 
            model_name='../models/neural/no-hidden/final-model.pth')

Load classifier


Load dataset
Train classifier: ../models/neural/no-hidden/final-model.pth


Training: epoch 1/10", unit="batch: 100%|██████████| 102932/102932 [12:19:30<00:00,  2.32it/s] 


Saving model to "../models/neural/no-hidden/final-model.pth"
Epoch 1/10, Loss: 0.7533875958772465


Eval on dev data: epoch 1/10", unit="batch: 100%|██████████| 34/34 [00:12<00:00,  2.77it/s]


Epoch 1/10, Dev loss: 0.6058076111709371


Training: epoch 2/10", unit="batch: 100%|██████████| 102932/102932 [12:13:11<00:00,  2.34it/s] 


Saving model to "../models/neural/no-hidden/final-model.pth"
Epoch 2/10, Loss: 0.7001822348486465


Eval on dev data: epoch 2/10", unit="batch: 100%|██████████| 34/34 [00:11<00:00,  3.00it/s]


Epoch 2/10, Dev loss: 0.5847504770054537


Training: epoch 3/10", unit="batch: 100%|██████████| 102932/102932 [12:13:55<00:00,  2.34it/s] 


Saving model to "../models/neural/no-hidden/final-model.pth"
Epoch 3/10, Loss: 0.6876029005967683


Eval on dev data: epoch 3/10", unit="batch: 100%|██████████| 34/34 [00:12<00:00,  2.75it/s]


Epoch 3/10, Dev loss: 0.5745385911534814


Training: epoch 4/10", unit="batch: 100%|██████████| 102932/102932 [12:21:14<00:00,  2.31it/s] 


Saving model to "../models/neural/no-hidden/final-model.pth"
Epoch 4/10, Loss: 0.6813968165765644


Eval on dev data: epoch 4/10", unit="batch: 100%|██████████| 34/34 [00:11<00:00,  3.08it/s]


Epoch 4/10, Dev loss: 0.5554561834124958


Training: epoch 5/10", unit="batch: 100%|██████████| 102932/102932 [12:23:05<00:00,  2.31it/s] 


Saving model to "../models/neural/no-hidden/final-model.pth"
Epoch 5/10, Loss: 0.6775944436474449


Eval on dev data: epoch 5/10", unit="batch: 100%|██████████| 34/34 [00:11<00:00,  2.99it/s]


Epoch 5/10, Dev loss: 0.5650510538150283


Training: epoch 6/10", unit="batch:  36%|███▌      | 36875/102932 [4:27:02<7:58:21,  2.30it/s] 


KeyboardInterrupt: 

# Evaluate the Model

In [None]:
evaluate_model(test_corpus, model_name='../models/neural/no-hidden/final-model.pth')