In [1]:
from context import speechact
import speechact.classifier.embedding as emb
import speechact.classifier.base as b
import speechact.evaluation as evaluation
import speechact.corpus as corp
import speechact.annotate as anno
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
labels = [act.value for act in emb.SPEECH_ACTS]

# Load upsampled data. Note: the file names are correct even if it does not look like it.
test_corpus = corp.Corpus('../data/annotated data/dev-set-sentiment-train-upsampled.conllu.bz2')
dev_corpus = corp.Corpus('../data/annotated data/dev-set-sentiment-test-upsampled.conllu.bz2')
train_corpus = corp.Corpus('../data/auto-annotated data/speech-acts.conllu.bz2')


In [3]:
def plot_loss_histories(loss_history: list[float], dev_loss_history: list[float]):
    epochs = range(1, len(loss_history) + 1)
    plt.plot(epochs, loss_history, label='Training Loss')
    plt.plot(epochs, dev_loss_history, label='Dev Loss')
    plt.title('Training and Dev Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [4]:
def train_model(corpus: corp.Corpus, dev_corpus: corp.Corpus, model_name: str):
    print('Load classifier')
    classifier = emb.EmbeddingClassifier()

    print('Load dataset')
    dataset = emb.CorpusDataset(corpus)
    dev_dataset = emb.CorpusDataset(dev_corpus)

    loss_history = []
    dev_loss_history = []
    
    print(f'Train classifier: {model_name}')
    classifier.train(dataset, 32, loss_history=loss_history, 
                     dev_loss_history=dev_loss_history,
                     dev_data=dev_dataset, save_each_epoch=model_name,
                     use_class_weights=True)
    classifier.save(model_name)

    plot_loss_histories(loss_history, dev_loss_history)

In [5]:
def evaluate_model(corpus: corp.Corpus, model_name: str):
    print('Load classifier')
    classifier = emb.EmbeddingClassifier()
    classifier.load(model_name)

    print(f'Evaluate classifier: {model_name}')
    return evaluation.evaluate(
        corpus,
        classifier,
        labels,
        draw_conf_matrix=True
    )

# Train the Model

In [6]:
train_model(train_corpus, dev_corpus, 
            model_name='../models/neural/no-hidden/final-model.pth')

Load classifier
Load dataset
Train classifier: ../models/neural/no-hidden/final-model.pth


Training: epoch 1/10", unit="batch:   3%|▎         | 3032/102932 [18:02<9:54:27,  2.80it/s] 


KeyboardInterrupt: 

# Evaluate the Model

In [None]:
evaluate_model(test_corpus, model_name='../models/neural/no-hidden/final-model.pth')