Environment: Google colab with T4 gpu selected.

## Step 1: Train the model for Neutral and NonNeutral.

In [None]:
# import packages
import pandas as pd

# load google drive
from google.colab import drive
drive.mount('/content/gdrive')

! pip install flair  # install Flair framework

Mounted at /content/gdrive
Collecting flair
  Downloading flair-0.12.2-py3-none-any.whl (373 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.1/373.1 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting segtok>=1.5.7 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting mpld3==0.3 (from flair)
  Downloading mpld3-0.3.tar.gz (788 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m788.5/788.5 kB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sqlitedict>=1.6.0 (from flair)
  Downloading sqlitedict-2.1.0.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deprecated>=1.2.4 (from flair)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting boto3 (from flair)
  Downloading boto3-1.28.64-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m20.0 MB/s[0m et

In [None]:
# load training data
df = pd.read_csv('/content/gdrive/MyDrive/Enron_email/4763.csv')

In [None]:
df = df[['Content', 'Label']]  # subset the columns
df.columns = ['Text', 'Sentiment']   # change the column names

In [None]:
# convert Positive and Negative label to NotNeutral
new_label = []   # create a list to store labels
for index, row in df.iterrows():
  if row['Sentiment'] == 'Neutral':
    new_label.append('Neutral')
  else:
    new_label.append('NotNeutral')
df['Sentiment'] = new_label
df

Unnamed: 0,Text,Sentiment
0,"Ok, let me know if you got an extra ticket. Se...",Neutral
1,"Done. Sent: Tuesday, November 06, 2001 9:35 AM...",Neutral
2,"Hey, Phillip already made reservations and Jef...",Neutral
3,I probably won't be able to go. Could we send ...,NotNeutral
4,I got this from elizabeth. IF you really want ...,Neutral
...,...,...
4758,This is a summary of my position and trading g...,NotNeutral
4759,"not after today Sent: Tuesday, June 12, 2001 1...",Neutral
4760,Agreement on Power I assume you're going to le...,Neutral
4761,are you on msn messenger. i think i'm set up w...,NotNeutral


In [None]:
df.to_csv('/content/gdrive/MyDrive/Enron_email/Flair/model1_train/train.csv', index = False)  # save into a csv file for training


In [None]:
# load modules from Flair framework
from flair.data import Corpus
from flair.embeddings import TransformerDocumentEmbeddings
from flair.datasets import CSVClassificationCorpus
from flair.models import TextClassifier
from flair.trainers import ModelTrainer

data_path = '/content/gdrive/MyDrive/Enron_email/Flair/model1_train' # set a folder to store training data

# Define column names
column_names = {0: 'text', 1: 'label'}

label_type = 'Sentiment'

# Create a Corpus using the CSVClassificationCorpus class
corpus: Corpus = CSVClassificationCorpus(data_folder = data_path,
                                         train_file = '/content/gdrive/MyDrive/Enron_email/Flair/model1_train/train.csv', # specify the training file
                                         column_name_map = {0: 'text', 1: 'label'},
                                         label_type = label_type, skip_header=True)

# create the label dictionary
label_dict = corpus.make_label_dictionary(label_type=label_type)

# initialize transformer document embeddings
document_embeddings = TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=True)

# create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type=label_type)

# initialize trainer
trainer = ModelTrainer(classifier, corpus)

# run training
trainer.train(
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_with_validation',  # Specify the path to save the model
    #'/content/gdrive/MyDrive/Enron_email/Flair/bert_model_final',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_aug',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_semi',
    '/content/gdrive/MyDrive/Enron_email/Flair/model1',

    learning_rate=0.01,
    mini_batch_size=32,
    # max_epochs=10,
    max_epochs = 3
)

2023-10-16 12:09:30,603 Reading data from /content/gdrive/MyDrive/Enron_email/Flair/model1_train
2023-10-16 12:09:30,607 Train: /content/gdrive/MyDrive/Enron_email/Flair/model1_train/train.csv
2023-10-16 12:09:30,609 Dev: None
2023-10-16 12:09:30,611 Test: None
2023-10-16 12:09:30,683 Computing label dictionary. Progress:


3858it [00:09, 396.84it/s]

2023-10-16 12:09:40,415 Dictionary created for label 'Sentiment' with 3 values: Neutral (seen 2471 times), NotNeutral (seen 1387 times)





Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

2023-10-16 12:09:46,136 ----------------------------------------------------------------------------------------------------
2023-10-16 12:09:46,139 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out

100%|██████████| 14/14 [00:08<00:00,  1.56it/s]

2023-10-16 12:12:58,157 Evaluating as a multi-label problem: False
2023-10-16 12:12:58,186 DEV : loss 0.6055618524551392 - f1-score (micro avg)  0.7249





2023-10-16 12:13:00,268 BAD EPOCHS (no improvement): 0
2023-10-16 12:13:00,277 saving best model
2023-10-16 12:13:01,361 ----------------------------------------------------------------------------------------------------
2023-10-16 12:13:19,617 epoch 2 - iter 12/121 - loss 0.52142234 - time (sec): 18.25 - samples/sec: 21.04 - lr: 0.010000
2023-10-16 12:13:38,187 epoch 2 - iter 24/121 - loss 0.52166884 - time (sec): 36.82 - samples/sec: 20.86 - lr: 0.010000
2023-10-16 12:13:55,692 epoch 2 - iter 36/121 - loss 0.52793849 - time (sec): 54.33 - samples/sec: 21.20 - lr: 0.010000
2023-10-16 12:14:14,169 epoch 2 - iter 48/121 - loss 0.52576120 - time (sec): 72.81 - samples/sec: 21.10 - lr: 0.010000
2023-10-16 12:14:32,277 epoch 2 - iter 60/121 - loss 0.53134478 - time (sec): 90.91 - samples/sec: 21.12 - lr: 0.010000
2023-10-16 12:14:50,768 epoch 2 - iter 72/121 - loss 0.53661696 - time (sec): 109.40 - samples/sec: 21.06 - lr: 0.010000
2023-10-16 12:15:09,231 epoch 2 - iter 84/121 - loss 0.52

100%|██████████| 14/14 [00:09<00:00,  1.47it/s]

2023-10-16 12:16:14,298 Evaluating as a multi-label problem: False
2023-10-16 12:16:14,314 DEV : loss 0.5088042616844177 - f1-score (micro avg)  0.7436





2023-10-16 12:16:15,528 BAD EPOCHS (no improvement): 0
2023-10-16 12:16:15,536 saving best model
2023-10-16 12:16:16,391 ----------------------------------------------------------------------------------------------------
2023-10-16 12:16:34,505 epoch 3 - iter 12/121 - loss 0.50662487 - time (sec): 18.11 - samples/sec: 21.20 - lr: 0.010000
2023-10-16 12:16:52,202 epoch 3 - iter 24/121 - loss 0.48219919 - time (sec): 35.81 - samples/sec: 21.45 - lr: 0.010000
2023-10-16 12:17:10,356 epoch 3 - iter 36/121 - loss 0.49078078 - time (sec): 53.96 - samples/sec: 21.35 - lr: 0.010000
2023-10-16 12:17:28,560 epoch 3 - iter 48/121 - loss 0.48906629 - time (sec): 72.17 - samples/sec: 21.28 - lr: 0.010000
2023-10-16 12:17:47,579 epoch 3 - iter 60/121 - loss 0.48891808 - time (sec): 91.19 - samples/sec: 21.06 - lr: 0.010000
2023-10-16 12:18:05,450 epoch 3 - iter 72/121 - loss 0.48645508 - time (sec): 109.06 - samples/sec: 21.13 - lr: 0.010000
2023-10-16 12:18:24,218 epoch 3 - iter 84/121 - loss 0.48

100%|██████████| 14/14 [00:09<00:00,  1.55it/s]

2023-10-16 12:19:28,531 Evaluating as a multi-label problem: False
2023-10-16 12:19:28,548 DEV : loss 0.5244603157043457 - f1-score (micro avg)  0.7739





2023-10-16 12:19:29,828 BAD EPOCHS (no improvement): 0
2023-10-16 12:19:29,834 saving best model
2023-10-16 12:19:31,539 ----------------------------------------------------------------------------------------------------


100%|██████████| 15/15 [00:11<00:00,  1.31it/s]

2023-10-16 12:19:44,620 Evaluating as a multi-label problem: False
2023-10-16 12:19:44,638 0.7437	0.7437	0.7437	0.7437
2023-10-16 12:19:44,640 
Results:
- F-score (micro) 0.7437
- F-score (macro) 0.7292
- Accuracy 0.7437

By class:
              precision    recall  f1-score   support

     Neutral     0.8406    0.7484    0.7918       310
  NotNeutral     0.6100    0.7349    0.6667       166

    accuracy                         0.7437       476
   macro avg     0.7253    0.7417    0.7292       476
weighted avg     0.7602    0.7437    0.7482       476

2023-10-16 12:19:44,642 ----------------------------------------------------------------------------------------------------





{'test_score': 0.7436974789915967,
 'dev_score_history': [0.7249417249417249,
  0.7435897435897436,
  0.7738927738927739],
 'train_loss_history': [0.6166844264159492,
  0.5267105292878416,
  0.4692852861711928],
 'dev_loss_history': [0.6055618524551392,
  0.5088042616844177,
  0.5244603157043457]}

Change the learning rate, keep training

In [None]:
trainer.train(
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_with_validation',  # Specify the path to save the model
    #'/content/gdrive/MyDrive/Enron_email/Flair/bert_model_final',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_aug',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_semi',
    '/content/gdrive/MyDrive/Enron_email/Flair/model1',

    learning_rate=0.005,
    mini_batch_size=32,
    # max_epochs=10,
    max_epochs = 3
)

2023-10-16 12:21:48,571 ----------------------------------------------------------------------------------------------------
2023-10-16 12:21:48,575 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out



2023-10-16 12:22:05,841 epoch 1 - iter 12/121 - loss 0.35090006 - time (sec): 17.23 - samples/sec: 22.29 - lr: 0.005000
2023-10-16 12:22:24,943 epoch 1 - iter 24/121 - loss 0.37926348 - time (sec): 36.33 - samples/sec: 21.14 - lr: 0.005000
2023-10-16 12:22:43,656 epoch 1 - iter 36/121 - loss 0.36943935 - time (sec): 55.04 - samples/sec: 20.93 - lr: 0.005000
2023-10-16 12:23:01,955 epoch 1 - iter 48/121 - loss 0.35183496 - time (sec): 73.34 - samples/sec: 20.94 - lr: 0.005000
2023-10-16 12:23:19,853 epoch 1 - iter 60/121 - loss 0.35480740 - time (sec): 91.24 - samples/sec: 21.04 - lr: 0.005000
2023-10-16 12:23:36,641 epoch 1 - iter 72/121 - loss 0.35327815 - time (sec): 108.03 - samples/sec: 21.33 - lr: 0.005000
2023-10-16 12:23:54,461 epoch 1 - iter 84/121 - loss 0.35412722 - time (sec): 125.85 - samples/sec: 21.36 - lr: 0.005000
2023-10-16 12:24:12,683 epoch 1 - iter 96/121 - loss 0.35258946 - time (sec): 144.07 - samples/sec: 21.32 - lr: 0.005000
2023-10-16 12:24:30,783 epoch 1 - ite

100%|██████████| 14/14 [00:09<00:00,  1.52it/s]

2023-10-16 12:25:00,193 Evaluating as a multi-label problem: False
2023-10-16 12:25:00,209 DEV : loss 0.4818975031375885 - f1-score (micro avg)  0.7692





2023-10-16 12:25:01,082 BAD EPOCHS (no improvement): 0
2023-10-16 12:25:01,089 saving best model
2023-10-16 12:25:01,949 ----------------------------------------------------------------------------------------------------
2023-10-16 12:25:19,570 epoch 2 - iter 12/121 - loss 0.41035410 - time (sec): 17.62 - samples/sec: 21.80 - lr: 0.005000
2023-10-16 12:25:37,433 epoch 2 - iter 24/121 - loss 0.40100795 - time (sec): 35.48 - samples/sec: 21.65 - lr: 0.005000
2023-10-16 12:25:56,179 epoch 2 - iter 36/121 - loss 0.38646683 - time (sec): 54.23 - samples/sec: 21.24 - lr: 0.005000
2023-10-16 12:26:14,291 epoch 2 - iter 48/121 - loss 0.38974343 - time (sec): 72.34 - samples/sec: 21.23 - lr: 0.005000
2023-10-16 12:26:31,547 epoch 2 - iter 60/121 - loss 0.39227762 - time (sec): 89.59 - samples/sec: 21.43 - lr: 0.005000
2023-10-16 12:26:50,318 epoch 2 - iter 72/121 - loss 0.38923779 - time (sec): 108.37 - samples/sec: 21.26 - lr: 0.005000
2023-10-16 12:27:07,567 epoch 2 - iter 84/121 - loss 0.38

100%|██████████| 14/14 [00:08<00:00,  1.56it/s]

2023-10-16 12:28:12,194 Evaluating as a multi-label problem: False
2023-10-16 12:28:12,210 DEV : loss 0.515893816947937 - f1-score (micro avg)  0.7716





2023-10-16 12:28:13,445 BAD EPOCHS (no improvement): 0
2023-10-16 12:28:13,451 saving best model
2023-10-16 12:28:18,516 ----------------------------------------------------------------------------------------------------
2023-10-16 12:28:36,193 epoch 3 - iter 12/121 - loss 0.37638290 - time (sec): 17.67 - samples/sec: 21.73 - lr: 0.005000
2023-10-16 12:28:55,665 epoch 3 - iter 24/121 - loss 0.34028962 - time (sec): 37.15 - samples/sec: 20.67 - lr: 0.005000
2023-10-16 12:29:13,164 epoch 3 - iter 36/121 - loss 0.32505084 - time (sec): 54.65 - samples/sec: 21.08 - lr: 0.005000
2023-10-16 12:29:31,929 epoch 3 - iter 48/121 - loss 0.32335332 - time (sec): 73.41 - samples/sec: 20.92 - lr: 0.005000
2023-10-16 12:29:50,204 epoch 3 - iter 60/121 - loss 0.32767281 - time (sec): 91.69 - samples/sec: 20.94 - lr: 0.005000
2023-10-16 12:30:08,451 epoch 3 - iter 72/121 - loss 0.33418430 - time (sec): 109.93 - samples/sec: 20.96 - lr: 0.005000
2023-10-16 12:30:26,739 epoch 3 - iter 84/121 - loss 0.34

100%|██████████| 14/14 [00:09<00:00,  1.48it/s]

2023-10-16 12:31:31,322 Evaluating as a multi-label problem: False
2023-10-16 12:31:31,348 DEV : loss 0.5153331160545349 - f1-score (micro avg)  0.7692





2023-10-16 12:31:32,975 BAD EPOCHS (no improvement): 1
2023-10-16 12:31:33,889 ----------------------------------------------------------------------------------------------------


100%|██████████| 15/15 [00:10<00:00,  1.45it/s]

2023-10-16 12:31:45,867 Evaluating as a multi-label problem: False
2023-10-16 12:31:45,889 0.75	0.75	0.75	0.75
2023-10-16 12:31:45,893 
Results:
- F-score (micro) 0.75
- F-score (macro) 0.702
- Accuracy 0.75

By class:
              precision    recall  f1-score   support

     Neutral     0.7675    0.8839    0.8216       310
  NotNeutral     0.6975    0.5000    0.5825       166

    accuracy                         0.7500       476
   macro avg     0.7325    0.6919    0.7020       476
weighted avg     0.7431    0.7500    0.7382       476

2023-10-16 12:31:45,897 ----------------------------------------------------------------------------------------------------





{'test_score': 0.75,
 'dev_score_history': [0.7692307692307693,
  0.7715617715617715,
  0.7692307692307693],
 'train_loss_history': [0.398372258962035,
  0.3795696081059655,
  0.34550870113758425],
 'dev_loss_history': [0.4818975031375885,
  0.515893816947937,
  0.5153331160545349]}

lower the learning rate, Continue to train

In [None]:
trainer.train(
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_with_validation',  # Specify the path to save the model
    #'/content/gdrive/MyDrive/Enron_email/Flair/bert_model_final',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_aug',
    #'/content/gdrive/MyDrive/Enron_email/Flair/model_semi',
    '/content/gdrive/MyDrive/Enron_email/Flair/model1',

    learning_rate=0.0025,
    mini_batch_size=32,
    # max_epochs=10,
    max_epochs = 3
)

2023-10-17 04:07:13,035 Reading data from /content/gdrive/MyDrive/Enron_email/Flair/model1_train
2023-10-17 04:07:13,036 Train: /content/gdrive/MyDrive/Enron_email/Flair/model1_train/train.csv
2023-10-17 04:07:13,039 Dev: None
2023-10-17 04:07:13,042 Test: None




2023-10-17 04:07:14,753 ----------------------------------------------------------------------------------------------------
2023-10-17 04:07:14,758 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out

100%|██████████| 14/14 [00:09<00:00,  1.54it/s]


2023-10-17 04:10:22,005 Evaluating as a multi-label problem: False
2023-10-17 04:10:22,034 DEV : loss 0.3692004382610321 - f1-score (micro avg)  0.8625
2023-10-17 04:10:23,017 BAD EPOCHS (no improvement): 0
2023-10-17 04:10:23,023 saving best model
2023-10-17 04:10:23,854 ----------------------------------------------------------------------------------------------------
2023-10-17 04:10:41,897 epoch 2 - iter 12/121 - loss 0.38377795 - time (sec): 18.04 - samples/sec: 21.29 - lr: 0.002500
2023-10-17 04:11:00,766 epoch 2 - iter 24/121 - loss 0.38446408 - time (sec): 36.91 - samples/sec: 20.81 - lr: 0.002500
2023-10-17 04:11:18,929 epoch 2 - iter 36/121 - loss 0.35389708 - time (sec): 55.07 - samples/sec: 20.92 - lr: 0.002500
2023-10-17 04:11:35,904 epoch 2 - iter 48/121 - loss 0.34686225 - time (sec): 72.05 - samples/sec: 21.32 - lr: 0.002500
2023-10-17 04:11:52,857 epoch 2 - iter 60/121 - loss 0.34767360 - time (sec): 89.00 - samples/sec: 21.57 - lr: 0.002500
2023-10-17 04:12:08,625 ep

100%|██████████| 14/14 [00:07<00:00,  1.79it/s]

2023-10-17 04:13:27,221 Evaluating as a multi-label problem: False
2023-10-17 04:13:27,239 DEV : loss 0.32578739523887634 - f1-score (micro avg)  0.8741





2023-10-17 04:13:28,413 BAD EPOCHS (no improvement): 0
2023-10-17 04:13:28,420 saving best model
2023-10-17 04:13:33,518 ----------------------------------------------------------------------------------------------------
2023-10-17 04:13:50,898 epoch 3 - iter 12/121 - loss 0.33426361 - time (sec): 17.38 - samples/sec: 22.10 - lr: 0.002500
2023-10-17 04:14:08,349 epoch 3 - iter 24/121 - loss 0.31271642 - time (sec): 34.83 - samples/sec: 22.05 - lr: 0.002500
2023-10-17 04:14:26,463 epoch 3 - iter 36/121 - loss 0.32018725 - time (sec): 52.94 - samples/sec: 21.76 - lr: 0.002500
2023-10-17 04:14:43,950 epoch 3 - iter 48/121 - loss 0.32486224 - time (sec): 70.43 - samples/sec: 21.81 - lr: 0.002500
2023-10-17 04:15:01,791 epoch 3 - iter 60/121 - loss 0.32156162 - time (sec): 88.27 - samples/sec: 21.75 - lr: 0.002500
2023-10-17 04:15:18,484 epoch 3 - iter 72/121 - loss 0.32757861 - time (sec): 104.96 - samples/sec: 21.95 - lr: 0.002500
2023-10-17 04:15:35,701 epoch 3 - iter 84/121 - loss 0.33

100%|██████████| 14/14 [00:08<00:00,  1.71it/s]

2023-10-17 04:16:38,771 Evaluating as a multi-label problem: False
2023-10-17 04:16:38,789 DEV : loss 0.33177655935287476 - f1-score (micro avg)  0.8741





2023-10-17 04:16:39,613 BAD EPOCHS (no improvement): 1
2023-10-17 04:16:47,891 ----------------------------------------------------------------------------------------------------


100%|██████████| 15/15 [00:08<00:00,  1.70it/s]


2023-10-17 04:16:59,816 Evaluating as a multi-label problem: False
2023-10-17 04:16:59,849 0.8109	0.8109	0.8109	0.8109
2023-10-17 04:16:59,851 
Results:
- F-score (micro) 0.8109
- F-score (macro) 0.7952
- Accuracy 0.8109

By class:
              precision    recall  f1-score   support

     Neutral     0.8145    0.8931    0.8520       290
  NotNeutral     0.8038    0.6828    0.7384       186

    accuracy                         0.8109       476
   macro avg     0.8091    0.7879    0.7952       476
weighted avg     0.8103    0.8109    0.8076       476

2023-10-17 04:16:59,854 ----------------------------------------------------------------------------------------------------


{'test_score': 0.8109243697478992,
 'dev_score_history': [0.8624708624708625,
  0.8741258741258742,
  0.8741258741258742],
 'train_loss_history': [0.36040138246606457,
  0.34930577928257334,
  0.33109594343115223],
 'dev_loss_history': [0.3692004382610321,
  0.32578739523887634,
  0.33177655935287476]}

## Step 2: Delete runtime(Otherwise model will take too much memory, unable to run), then finetune a pre-trained model for Positive and Negative.

In [1]:
# import package again
import pandas as pd

# load google drive
from google.colab import drive
drive.mount('/content/gdrive')

# install Flair framework
! pip install flair

Mounted at /content/gdrive
Collecting flair
  Downloading flair-0.12.2-py3-none-any.whl (373 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.1/373.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting segtok>=1.5.7 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting mpld3==0.3 (from flair)
  Downloading mpld3-0.3.tar.gz (788 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m788.5/788.5 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sqlitedict>=1.6.0 (from flair)
  Downloading sqlitedict-2.1.0.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deprecated>=1.2.4 (from flair)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting boto3 (from flair)
  Downloading boto3-1.28.65-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m17.9 MB/s[0m et

In [7]:
# load packages from Flair
from flair.models import TextClassifier
from flair.data import Sentence
from flair.data import Corpus
from flair.datasets import CSVClassificationCorpus
from flair.trainers import ModelTrainer

# Load the pre-trained sentiment classifier
classifier = TextClassifier.load('sentiment-fast')


In [8]:
# load training data
df = pd.read_csv('/content/gdrive/MyDrive/Enron_email/4763.csv')  # this time use oversampled dataset

df = df[['Content', 'Label']]  # subset the columns
df.columns = ['Text', 'Sentiment']   # change the column names

df = df[df['Sentiment'] != 'Neutral']  # get the Positive and Negative rows
df

Unnamed: 0,Text,Sentiment
3,I probably won't be able to go. Could we send ...,Negative
5,"Stevens, Joe put a call into Destin today, but...",Positive
6,We moved the meeting to 12:00 for tomorrow and...,Positive
8,thank ;you very much Kam Keiser 05/08/2001 01:...,Positive
10,Don't hate me for sending a note. What do you ...,Negative
...,...,...
4748,I don't know if I gave this to you yet. Sent: ...,Positive
4755,you're right about the email address. thanks. ...,Positive
4757,"see you there Sent: Thursday, September 27, 20...",Positive
4758,This is a summary of my position and trading g...,Positive


In [9]:
# convert labels to Capital because the pre-trained model out put capital letters
new_list = []  # create a list to store the labels

for index, row in df.iterrows():
  if row['Sentiment'] == 'Positive':
    new_list.append('POSITIVE')
  else:
    new_list.append('NEGATIVE')

df['Sentiment'] = new_list
df

Unnamed: 0,Text,Sentiment
3,I probably won't be able to go. Could we send ...,NEGATIVE
5,"Stevens, Joe put a call into Destin today, but...",POSITIVE
6,We moved the meeting to 12:00 for tomorrow and...,POSITIVE
8,thank ;you very much Kam Keiser 05/08/2001 01:...,POSITIVE
10,Don't hate me for sending a note. What do you ...,NEGATIVE
...,...,...
4748,I don't know if I gave this to you yet. Sent: ...,POSITIVE
4755,you're right about the email address. thanks. ...,POSITIVE
4757,"see you there Sent: Thursday, September 27, 20...",POSITIVE
4758,This is a summary of my position and trading g...,POSITIVE


In [10]:
df.to_csv('/content/gdrive/MyDrive/Enron_email/Flair/model2_train/train.csv',index = False)  # save into a train file

In [11]:
data_path = '/content/gdrive/MyDrive/Enron_email/Flair/model2_train'

# Define column names
column_names = {0: 'text', 1: 'label'}

# Create a Corpus using the CSVClassificationCorpus class
corpus: Corpus = CSVClassificationCorpus(data_folder = data_path,
                                         train_file = '/content/gdrive/MyDrive/Enron_email/Flair/model2_train/train.csv',
                                         column_name_map = {0: 'text', 1: 'label'}, label_type = 'Sentiment', skip_header=True)

trainer = ModelTrainer(classifier, corpus)

# Fine-tune the classifier
trainer.fine_tune(
    '/content/gdrive/MyDrive/Enron_email/Flair/model2',  # Specify the path to save the model, for the second model, we don't actually load model from this folder, we load saved state_dict
    learning_rate=0.01,
    mini_batch_size=32,
    max_epochs=20
)

2023-10-18 02:46:44,222 Reading data from /content/gdrive/MyDrive/Enron_email/Flair/model2_train
2023-10-18 02:46:44,224 Train: /content/gdrive/MyDrive/Enron_email/Flair/model2_train/train.csv
2023-10-18 02:46:44,226 Dev: None
2023-10-18 02:46:44,229 Test: None
2023-10-18 02:46:44,281 ----------------------------------------------------------------------------------------------------
2023-10-18 02:46:44,282 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('en-crawl')
    )
    (word_reprojection_map): Linear(in_features=300, out_features=300, bias=True)
    (rnn): LSTM(300, 256, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): BCEWithLogitsLoss()
  (weights): None
  (weight_ten

100%|██████████| 5/5 [00:01<00:00,  3.02it/s]

2023-10-18 02:46:58,721 Evaluating as a multi-label problem: True





2023-10-18 02:46:58,802 DEV : loss 0.6910243630409241 - f1-score (micro avg)  0.9415
2023-10-18 02:46:59,339 ----------------------------------------------------------------------------------------------------
2023-10-18 02:47:00,494 epoch 2 - iter 4/43 - loss 0.90489863 - time (sec): 1.15 - samples/sec: 111.49 - lr: 0.005402
2023-10-18 02:47:01,510 epoch 2 - iter 8/43 - loss 0.90634547 - time (sec): 2.16 - samples/sec: 118.30 - lr: 0.005862
2023-10-18 02:47:02,160 epoch 2 - iter 12/43 - loss 0.89567632 - time (sec): 2.81 - samples/sec: 136.46 - lr: 0.006322
2023-10-18 02:47:03,381 epoch 2 - iter 16/43 - loss 0.91058319 - time (sec): 4.04 - samples/sec: 126.88 - lr: 0.006782
2023-10-18 02:47:04,128 epoch 2 - iter 20/43 - loss 0.88724230 - time (sec): 4.78 - samples/sec: 133.82 - lr: 0.007241
2023-10-18 02:47:05,008 epoch 2 - iter 24/43 - loss 0.88022889 - time (sec): 5.66 - samples/sec: 135.63 - lr: 0.007701
2023-10-18 02:47:05,657 epoch 2 - iter 28/43 - loss 0.90796150 - time (sec): 6

100%|██████████| 5/5 [00:00<00:00,  8.54it/s]

2023-10-18 02:47:09,037 Evaluating as a multi-label problem: True
2023-10-18 02:47:09,059 DEV : loss 0.884061336517334 - f1-score (micro avg)  0.8974





2023-10-18 02:47:09,357 ----------------------------------------------------------------------------------------------------
2023-10-18 02:47:10,504 epoch 3 - iter 4/43 - loss 0.69090975 - time (sec): 1.13 - samples/sec: 112.81 - lr: 0.009962
2023-10-18 02:47:11,206 epoch 3 - iter 8/43 - loss 0.91984423 - time (sec): 1.84 - samples/sec: 139.34 - lr: 0.009912
2023-10-18 02:47:11,958 epoch 3 - iter 12/43 - loss 1.04501007 - time (sec): 2.59 - samples/sec: 148.35 - lr: 0.009861
2023-10-18 02:47:13,076 epoch 3 - iter 16/43 - loss 1.08214151 - time (sec): 3.71 - samples/sec: 138.12 - lr: 0.009810
2023-10-18 02:47:13,911 epoch 3 - iter 20/43 - loss 1.06440180 - time (sec): 4.54 - samples/sec: 140.90 - lr: 0.009760
2023-10-18 02:47:15,652 epoch 3 - iter 24/43 - loss 1.06792070 - time (sec): 6.28 - samples/sec: 122.25 - lr: 0.009709
2023-10-18 02:47:16,958 epoch 3 - iter 28/43 - loss 1.06998667 - time (sec): 7.59 - samples/sec: 118.06 - lr: 0.009659
2023-10-18 02:47:18,013 epoch 3 - iter 32/43

100%|██████████| 5/5 [00:00<00:00,  8.30it/s]

2023-10-18 02:47:21,622 Evaluating as a multi-label problem: True
2023-10-18 02:47:21,643 DEV : loss 0.9275895953178406 - f1-score (micro avg)  0.9217





2023-10-18 02:47:21,957 ----------------------------------------------------------------------------------------------------
2023-10-18 02:47:22,607 epoch 4 - iter 4/43 - loss 0.95399591 - time (sec): 0.65 - samples/sec: 198.12 - lr: 0.009418
2023-10-18 02:47:23,315 epoch 4 - iter 8/43 - loss 0.99630953 - time (sec): 1.35 - samples/sec: 189.02 - lr: 0.009368
2023-10-18 02:47:24,496 epoch 4 - iter 12/43 - loss 1.08481103 - time (sec): 2.53 - samples/sec: 151.49 - lr: 0.009317
2023-10-18 02:47:25,159 epoch 4 - iter 16/43 - loss 1.06539428 - time (sec): 3.20 - samples/sec: 160.06 - lr: 0.009267
2023-10-18 02:47:25,918 epoch 4 - iter 20/43 - loss 1.04713275 - time (sec): 3.96 - samples/sec: 161.73 - lr: 0.009216
2023-10-18 02:47:26,870 epoch 4 - iter 24/43 - loss 1.04942860 - time (sec): 4.91 - samples/sec: 156.45 - lr: 0.009166
2023-10-18 02:47:27,459 epoch 4 - iter 28/43 - loss 1.03391854 - time (sec): 5.50 - samples/sec: 162.95 - lr: 0.009115
2023-10-18 02:47:28,651 epoch 4 - iter 32/43

100%|██████████| 5/5 [00:01<00:00,  3.73it/s]

2023-10-18 02:47:33,254 Evaluating as a multi-label problem: True
2023-10-18 02:47:33,293 DEV : loss 0.7564862966537476 - f1-score (micro avg)  0.9162





2023-10-18 02:47:34,063 ----------------------------------------------------------------------------------------------------
2023-10-18 02:47:35,963 epoch 5 - iter 4/43 - loss 0.89906542 - time (sec): 1.90 - samples/sec: 67.49 - lr: 0.008875
2023-10-18 02:47:37,211 epoch 5 - iter 8/43 - loss 0.92183768 - time (sec): 3.15 - samples/sec: 81.39 - lr: 0.008824
2023-10-18 02:47:38,136 epoch 5 - iter 12/43 - loss 0.89716754 - time (sec): 4.07 - samples/sec: 94.34 - lr: 0.008774
2023-10-18 02:47:39,299 epoch 5 - iter 16/43 - loss 0.88821124 - time (sec): 5.23 - samples/sec: 97.84 - lr: 0.008723
2023-10-18 02:47:40,810 epoch 5 - iter 20/43 - loss 0.90313703 - time (sec): 6.74 - samples/sec: 94.90 - lr: 0.008673
2023-10-18 02:47:42,560 epoch 5 - iter 24/43 - loss 0.94337039 - time (sec): 8.49 - samples/sec: 90.42 - lr: 0.008622
2023-10-18 02:47:43,444 epoch 5 - iter 28/43 - loss 0.94289855 - time (sec): 9.38 - samples/sec: 95.54 - lr: 0.008571
2023-10-18 02:47:44,271 epoch 5 - iter 32/43 - loss

100%|██████████| 5/5 [00:00<00:00,  7.91it/s]

2023-10-18 02:47:47,187 Evaluating as a multi-label problem: True
2023-10-18 02:47:47,216 DEV : loss 0.8017870783805847 - f1-score (micro avg)  0.9245





2023-10-18 02:47:47,554 ----------------------------------------------------------------------------------------------------
2023-10-18 02:47:48,330 epoch 6 - iter 4/43 - loss 0.98754506 - time (sec): 0.77 - samples/sec: 166.86 - lr: 0.008331
2023-10-18 02:47:49,019 epoch 6 - iter 8/43 - loss 0.94664606 - time (sec): 1.46 - samples/sec: 175.81 - lr: 0.008281
2023-10-18 02:47:50,463 epoch 6 - iter 12/43 - loss 0.97644989 - time (sec): 2.90 - samples/sec: 132.37 - lr: 0.008230
2023-10-18 02:47:51,092 epoch 6 - iter 16/43 - loss 0.96295768 - time (sec): 3.53 - samples/sec: 145.08 - lr: 0.008180
2023-10-18 02:47:51,682 epoch 6 - iter 20/43 - loss 0.96688035 - time (sec): 4.12 - samples/sec: 155.35 - lr: 0.008129
2023-10-18 02:47:52,434 epoch 6 - iter 24/43 - loss 0.98980596 - time (sec): 4.87 - samples/sec: 157.66 - lr: 0.008078
2023-10-18 02:47:53,585 epoch 6 - iter 28/43 - loss 0.98219385 - time (sec): 6.02 - samples/sec: 148.77 - lr: 0.008028
2023-10-18 02:47:54,548 epoch 6 - iter 32/43

100%|██████████| 5/5 [00:01<00:00,  2.94it/s]

2023-10-18 02:47:58,980 Evaluating as a multi-label problem: True
2023-10-18 02:47:59,017 DEV : loss 0.7840322256088257 - f1-score (micro avg)  0.9189





2023-10-18 02:47:59,564 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:00,618 epoch 7 - iter 4/43 - loss 0.69147760 - time (sec): 1.05 - samples/sec: 121.89 - lr: 0.007788
2023-10-18 02:48:01,508 epoch 7 - iter 8/43 - loss 0.80454598 - time (sec): 1.94 - samples/sec: 131.94 - lr: 0.007737
2023-10-18 02:48:02,208 epoch 7 - iter 12/43 - loss 0.89770515 - time (sec): 2.64 - samples/sec: 145.45 - lr: 0.007686
2023-10-18 02:48:02,851 epoch 7 - iter 16/43 - loss 0.92940257 - time (sec): 3.28 - samples/sec: 155.97 - lr: 0.007636
2023-10-18 02:48:04,125 epoch 7 - iter 20/43 - loss 0.97258186 - time (sec): 4.56 - samples/sec: 140.44 - lr: 0.007585
2023-10-18 02:48:04,778 epoch 7 - iter 24/43 - loss 0.98897778 - time (sec): 5.21 - samples/sec: 147.41 - lr: 0.007535
2023-10-18 02:48:05,614 epoch 7 - iter 28/43 - loss 0.99991937 - time (sec): 6.05 - samples/sec: 148.20 - lr: 0.007484
2023-10-18 02:48:06,323 epoch 7 - iter 32/43

100%|██████████| 5/5 [00:00<00:00,  8.47it/s]

2023-10-18 02:48:09,447 Evaluating as a multi-label problem: True
2023-10-18 02:48:09,469 DEV : loss 0.7620786428451538 - f1-score (micro avg)  0.9162





2023-10-18 02:48:09,783 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:10,388 epoch 8 - iter 4/43 - loss 0.85255668 - time (sec): 0.60 - samples/sec: 213.13 - lr: 0.007244
2023-10-18 02:48:11,491 epoch 8 - iter 8/43 - loss 0.84094487 - time (sec): 1.70 - samples/sec: 150.23 - lr: 0.007193
2023-10-18 02:48:12,553 epoch 8 - iter 12/43 - loss 0.84667771 - time (sec): 2.77 - samples/sec: 138.83 - lr: 0.007143
2023-10-18 02:48:13,715 epoch 8 - iter 16/43 - loss 0.83601658 - time (sec): 3.93 - samples/sec: 130.35 - lr: 0.007092
2023-10-18 02:48:14,871 epoch 8 - iter 20/43 - loss 0.84952004 - time (sec): 5.08 - samples/sec: 125.89 - lr: 0.007042
2023-10-18 02:48:16,563 epoch 8 - iter 24/43 - loss 0.85828148 - time (sec): 6.78 - samples/sec: 113.35 - lr: 0.006991
2023-10-18 02:48:17,870 epoch 8 - iter 28/43 - loss 0.83944645 - time (sec): 8.08 - samples/sec: 110.85 - lr: 0.006941
2023-10-18 02:48:18,917 epoch 8 - iter 32/43

100%|██████████| 5/5 [00:01<00:00,  4.49it/s]

2023-10-18 02:48:21,902 Evaluating as a multi-label problem: True
2023-10-18 02:48:21,924 DEV : loss 0.7573550939559937 - f1-score (micro avg)  0.9301





2023-10-18 02:48:22,250 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:23,233 epoch 9 - iter 4/43 - loss 0.82976510 - time (sec): 0.98 - samples/sec: 130.74 - lr: 0.006700
2023-10-18 02:48:23,878 epoch 9 - iter 8/43 - loss 0.82458211 - time (sec): 1.62 - samples/sec: 157.65 - lr: 0.006650
2023-10-18 02:48:24,533 epoch 9 - iter 12/43 - loss 0.79814103 - time (sec): 2.28 - samples/sec: 168.53 - lr: 0.006599
2023-10-18 02:48:25,705 epoch 9 - iter 16/43 - loss 0.80173765 - time (sec): 3.45 - samples/sec: 148.39 - lr: 0.006549
2023-10-18 02:48:26,268 epoch 9 - iter 20/43 - loss 0.78175652 - time (sec): 4.01 - samples/sec: 159.47 - lr: 0.006498
2023-10-18 02:48:26,970 epoch 9 - iter 24/43 - loss 0.79478298 - time (sec): 4.72 - samples/sec: 162.88 - lr: 0.006448
2023-10-18 02:48:27,580 epoch 9 - iter 28/43 - loss 0.83286207 - time (sec): 5.33 - samples/sec: 168.23 - lr: 0.006397
2023-10-18 02:48:28,919 epoch 9 - iter 32/43

100%|██████████| 5/5 [00:01<00:00,  4.76it/s]

2023-10-18 02:48:32,458 Evaluating as a multi-label problem: True





2023-10-18 02:48:32,492 DEV : loss 0.7299948930740356 - f1-score (micro avg)  0.9301
2023-10-18 02:48:33,078 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:34,693 epoch 10 - iter 4/43 - loss 0.79255138 - time (sec): 1.61 - samples/sec: 79.57 - lr: 0.006157
2023-10-18 02:48:35,803 epoch 10 - iter 8/43 - loss 0.78652608 - time (sec): 2.72 - samples/sec: 94.17 - lr: 0.006106
2023-10-18 02:48:37,110 epoch 10 - iter 12/43 - loss 0.86024653 - time (sec): 4.03 - samples/sec: 95.40 - lr: 0.006056
2023-10-18 02:48:37,900 epoch 10 - iter 16/43 - loss 0.88001252 - time (sec): 4.82 - samples/sec: 106.33 - lr: 0.006005
2023-10-18 02:48:38,471 epoch 10 - iter 20/43 - loss 0.86408675 - time (sec): 5.39 - samples/sec: 118.82 - lr: 0.005954
2023-10-18 02:48:39,610 epoch 10 - iter 24/43 - loss 0.87834163 - time (sec): 6.53 - samples/sec: 117.70 - lr: 0.005904
2023-10-18 02:48:40,502 epoch 10 - iter 28/43 - loss 0.86173681 - time (sec

100%|██████████| 5/5 [00:00<00:00,  8.26it/s]

2023-10-18 02:48:44,187 Evaluating as a multi-label problem: True
2023-10-18 02:48:44,214 DEV : loss 0.7100428342819214 - f1-score (micro avg)  0.9301





2023-10-18 02:48:44,517 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:45,247 epoch 11 - iter 4/43 - loss 0.67583301 - time (sec): 0.72 - samples/sec: 177.08 - lr: 0.005613
2023-10-18 02:48:46,415 epoch 11 - iter 8/43 - loss 0.74341238 - time (sec): 1.89 - samples/sec: 135.37 - lr: 0.005563
2023-10-18 02:48:47,051 epoch 11 - iter 12/43 - loss 0.77574591 - time (sec): 2.53 - samples/sec: 151.98 - lr: 0.005512
2023-10-18 02:48:47,917 epoch 11 - iter 16/43 - loss 0.81846144 - time (sec): 3.39 - samples/sec: 150.90 - lr: 0.005461
2023-10-18 02:48:48,961 epoch 11 - iter 20/43 - loss 0.84009929 - time (sec): 4.44 - samples/sec: 144.25 - lr: 0.005411
2023-10-18 02:48:49,963 epoch 11 - iter 24/43 - loss 0.85121232 - time (sec): 5.44 - samples/sec: 141.19 - lr: 0.005360
2023-10-18 02:48:51,612 epoch 11 - iter 28/43 - loss 0.84349442 - time (sec): 7.09 - samples/sec: 126.41 - lr: 0.005310
2023-10-18 02:48:53,073 epoch 11 - it

100%|██████████| 5/5 [00:01<00:00,  4.51it/s]

2023-10-18 02:48:56,727 Evaluating as a multi-label problem: True
2023-10-18 02:48:56,752 DEV : loss 0.7730908989906311 - f1-score (micro avg)  0.908





2023-10-18 02:48:57,054 ----------------------------------------------------------------------------------------------------
2023-10-18 02:48:57,993 epoch 12 - iter 4/43 - loss 0.88989694 - time (sec): 0.93 - samples/sec: 137.36 - lr: 0.005070
2023-10-18 02:48:58,645 epoch 12 - iter 8/43 - loss 0.88336451 - time (sec): 1.58 - samples/sec: 161.61 - lr: 0.005019
2023-10-18 02:48:59,243 epoch 12 - iter 12/43 - loss 0.89321876 - time (sec): 2.18 - samples/sec: 175.97 - lr: 0.004968
2023-10-18 02:49:00,497 epoch 12 - iter 16/43 - loss 0.92968196 - time (sec): 3.44 - samples/sec: 149.02 - lr: 0.004918
2023-10-18 02:49:01,177 epoch 12 - iter 20/43 - loss 0.91075967 - time (sec): 4.12 - samples/sec: 155.50 - lr: 0.004867
2023-10-18 02:49:01,800 epoch 12 - iter 24/43 - loss 0.94019208 - time (sec): 4.74 - samples/sec: 162.05 - lr: 0.004817
2023-10-18 02:49:02,708 epoch 12 - iter 28/43 - loss 0.92139536 - time (sec): 5.65 - samples/sec: 158.67 - lr: 0.004766
2023-10-18 02:49:03,289 epoch 12 - it

100%|██████████| 5/5 [00:01<00:00,  4.71it/s]

2023-10-18 02:49:06,874 Evaluating as a multi-label problem: True





2023-10-18 02:49:06,915 DEV : loss 0.8170658946037292 - f1-score (micro avg)  0.8974
2023-10-18 02:49:07,447 ----------------------------------------------------------------------------------------------------
2023-10-18 02:49:08,562 epoch 13 - iter 4/43 - loss 0.82348540 - time (sec): 1.11 - samples/sec: 115.11 - lr: 0.004526
2023-10-18 02:49:10,206 epoch 13 - iter 8/43 - loss 0.79100858 - time (sec): 2.76 - samples/sec: 92.89 - lr: 0.004475
2023-10-18 02:49:11,734 epoch 13 - iter 12/43 - loss 0.72624234 - time (sec): 4.28 - samples/sec: 89.65 - lr: 0.004425
2023-10-18 02:49:12,738 epoch 13 - iter 16/43 - loss 0.76169648 - time (sec): 5.29 - samples/sec: 96.82 - lr: 0.004374
2023-10-18 02:49:13,642 epoch 13 - iter 20/43 - loss 0.79278745 - time (sec): 6.19 - samples/sec: 103.35 - lr: 0.004324
2023-10-18 02:49:14,746 epoch 13 - iter 24/43 - loss 0.78457028 - time (sec): 7.30 - samples/sec: 105.25 - lr: 0.004273
2023-10-18 02:49:15,272 epoch 13 - iter 28/43 - loss 0.78028186 - time (sec

100%|██████████| 5/5 [00:00<00:00,  8.13it/s]

2023-10-18 02:49:18,715 Evaluating as a multi-label problem: True
2023-10-18 02:49:18,737 DEV : loss 0.720430850982666 - f1-score (micro avg)  0.9245





2023-10-18 02:49:19,055 ----------------------------------------------------------------------------------------------------
2023-10-18 02:49:19,738 epoch 14 - iter 4/43 - loss 0.64567868 - time (sec): 0.68 - samples/sec: 188.85 - lr: 0.003982
2023-10-18 02:49:20,475 epoch 14 - iter 8/43 - loss 0.70266919 - time (sec): 1.41 - samples/sec: 181.06 - lr: 0.003932
2023-10-18 02:49:21,673 epoch 14 - iter 12/43 - loss 0.75341159 - time (sec): 2.61 - samples/sec: 146.99 - lr: 0.003881
2023-10-18 02:49:22,217 epoch 14 - iter 16/43 - loss 0.76970857 - time (sec): 3.16 - samples/sec: 162.19 - lr: 0.003831
2023-10-18 02:49:22,914 epoch 14 - iter 20/43 - loss 0.75094119 - time (sec): 3.85 - samples/sec: 166.10 - lr: 0.003780
2023-10-18 02:49:24,064 epoch 14 - iter 24/43 - loss 0.69970602 - time (sec): 5.00 - samples/sec: 153.49 - lr: 0.003729
2023-10-18 02:49:25,014 epoch 14 - iter 28/43 - loss 0.70684113 - time (sec): 5.95 - samples/sec: 150.49 - lr: 0.003679
2023-10-18 02:49:26,873 epoch 14 - it

100%|██████████| 5/5 [00:01<00:00,  4.98it/s]

2023-10-18 02:49:30,759 Evaluating as a multi-label problem: True
2023-10-18 02:49:30,781 DEV : loss 0.7925640344619751 - f1-score (micro avg)  0.9217





2023-10-18 02:49:31,084 ----------------------------------------------------------------------------------------------------
2023-10-18 02:49:32,320 epoch 15 - iter 4/43 - loss 0.58084518 - time (sec): 1.23 - samples/sec: 104.10 - lr: 0.003439
2023-10-18 02:49:33,112 epoch 15 - iter 8/43 - loss 0.59595024 - time (sec): 2.02 - samples/sec: 126.61 - lr: 0.003388
2023-10-18 02:49:33,612 epoch 15 - iter 12/43 - loss 0.72767221 - time (sec): 2.52 - samples/sec: 152.31 - lr: 0.003338
2023-10-18 02:49:34,187 epoch 15 - iter 16/43 - loss 0.73586914 - time (sec): 3.10 - samples/sec: 165.35 - lr: 0.003287
2023-10-18 02:49:35,594 epoch 15 - iter 20/43 - loss 0.73475988 - time (sec): 4.50 - samples/sec: 142.11 - lr: 0.003236
2023-10-18 02:49:36,168 epoch 15 - iter 24/43 - loss 0.75468955 - time (sec): 5.08 - samples/sec: 151.24 - lr: 0.003186
2023-10-18 02:49:36,790 epoch 15 - iter 28/43 - loss 0.77191209 - time (sec): 5.70 - samples/sec: 157.22 - lr: 0.003135
2023-10-18 02:49:37,423 epoch 15 - it

100%|██████████| 5/5 [00:00<00:00,  8.16it/s]

2023-10-18 02:49:40,607 Evaluating as a multi-label problem: True
2023-10-18 02:49:40,631 DEV : loss 0.7247409224510193 - f1-score (micro avg)  0.9329





2023-10-18 02:49:41,118 ----------------------------------------------------------------------------------------------------
2023-10-18 02:49:42,225 epoch 16 - iter 4/43 - loss 0.59549575 - time (sec): 1.10 - samples/sec: 116.33 - lr: 0.002895
2023-10-18 02:49:44,050 epoch 16 - iter 8/43 - loss 0.74248869 - time (sec): 2.93 - samples/sec: 87.52 - lr: 0.002845
2023-10-18 02:49:44,919 epoch 16 - iter 12/43 - loss 0.74003669 - time (sec): 3.79 - samples/sec: 101.22 - lr: 0.002794
2023-10-18 02:49:45,869 epoch 16 - iter 16/43 - loss 0.76679634 - time (sec): 4.74 - samples/sec: 107.92 - lr: 0.002743
2023-10-18 02:49:46,890 epoch 16 - iter 20/43 - loss 0.77978253 - time (sec): 5.77 - samples/sec: 111.00 - lr: 0.002693
2023-10-18 02:49:48,114 epoch 16 - iter 24/43 - loss 0.74669798 - time (sec): 6.99 - samples/sec: 109.89 - lr: 0.002642
2023-10-18 02:49:49,430 epoch 16 - iter 28/43 - loss 0.73722661 - time (sec): 8.31 - samples/sec: 107.88 - lr: 0.002592
2023-10-18 02:49:50,365 epoch 16 - ite

100%|██████████| 5/5 [00:00<00:00,  8.12it/s]

2023-10-18 02:49:52,702 Evaluating as a multi-label problem: True
2023-10-18 02:49:52,726 DEV : loss 0.7193549871444702 - f1-score (micro avg)  0.9415





2023-10-18 02:49:53,553 ----------------------------------------------------------------------------------------------------
2023-10-18 02:49:54,139 epoch 17 - iter 4/43 - loss 0.95501663 - time (sec): 0.58 - samples/sec: 220.38 - lr: 0.002351
2023-10-18 02:49:54,719 epoch 17 - iter 8/43 - loss 0.80338716 - time (sec): 1.16 - samples/sec: 220.43 - lr: 0.002301
2023-10-18 02:49:55,511 epoch 17 - iter 12/43 - loss 0.78967255 - time (sec): 1.95 - samples/sec: 196.59 - lr: 0.002250
2023-10-18 02:49:56,161 epoch 17 - iter 16/43 - loss 0.79492727 - time (sec): 2.60 - samples/sec: 196.67 - lr: 0.002200
2023-10-18 02:49:57,293 epoch 17 - iter 20/43 - loss 0.77949751 - time (sec): 3.74 - samples/sec: 171.34 - lr: 0.002149
2023-10-18 02:49:58,011 epoch 17 - iter 24/43 - loss 0.76348118 - time (sec): 4.45 - samples/sec: 172.47 - lr: 0.002099
2023-10-18 02:49:58,914 epoch 17 - iter 28/43 - loss 0.76323827 - time (sec): 5.36 - samples/sec: 167.29 - lr: 0.002048
2023-10-18 02:50:00,119 epoch 17 - it

100%|██████████| 5/5 [00:01<00:00,  4.79it/s]

2023-10-18 02:50:05,067 Evaluating as a multi-label problem: True





2023-10-18 02:50:05,102 DEV : loss 0.7301152944564819 - f1-score (micro avg)  0.9329
2023-10-18 02:50:05,632 ----------------------------------------------------------------------------------------------------
2023-10-18 02:50:06,710 epoch 18 - iter 4/43 - loss 0.80669850 - time (sec): 1.07 - samples/sec: 119.17 - lr: 0.001808
2023-10-18 02:50:07,903 epoch 18 - iter 8/43 - loss 0.71876007 - time (sec): 2.27 - samples/sec: 112.92 - lr: 0.001757
2023-10-18 02:50:08,843 epoch 18 - iter 12/43 - loss 0.84072594 - time (sec): 3.21 - samples/sec: 119.75 - lr: 0.001707
2023-10-18 02:50:09,576 epoch 18 - iter 16/43 - loss 0.78955471 - time (sec): 3.94 - samples/sec: 129.96 - lr: 0.001656
2023-10-18 02:50:10,303 epoch 18 - iter 20/43 - loss 0.74396789 - time (sec): 4.67 - samples/sec: 137.13 - lr: 0.001606
2023-10-18 02:50:11,352 epoch 18 - iter 24/43 - loss 0.71940326 - time (sec): 5.72 - samples/sec: 134.37 - lr: 0.001555
2023-10-18 02:50:11,932 epoch 18 - iter 28/43 - loss 0.70968875 - time (

100%|██████████| 5/5 [00:01<00:00,  4.57it/s]

2023-10-18 02:50:15,330 Evaluating as a multi-label problem: True
2023-10-18 02:50:15,353 DEV : loss 0.783759355545044 - f1-score (micro avg)  0.9134





2023-10-18 02:50:15,662 ----------------------------------------------------------------------------------------------------
2023-10-18 02:50:16,397 epoch 19 - iter 4/43 - loss 0.70854752 - time (sec): 0.73 - samples/sec: 175.71 - lr: 0.001264
2023-10-18 02:50:17,501 epoch 19 - iter 8/43 - loss 0.80264892 - time (sec): 1.83 - samples/sec: 139.70 - lr: 0.001214
2023-10-18 02:50:19,052 epoch 19 - iter 12/43 - loss 0.72443959 - time (sec): 3.38 - samples/sec: 113.47 - lr: 0.001163
2023-10-18 02:50:20,523 epoch 19 - iter 16/43 - loss 0.67538220 - time (sec): 4.85 - samples/sec: 105.47 - lr: 0.001113
2023-10-18 02:50:21,641 epoch 19 - iter 20/43 - loss 0.68278947 - time (sec): 5.97 - samples/sec: 107.15 - lr: 0.001062
2023-10-18 02:50:22,671 epoch 19 - iter 24/43 - loss 0.67056493 - time (sec): 7.00 - samples/sec: 109.67 - lr: 0.001011
2023-10-18 02:50:23,582 epoch 19 - iter 28/43 - loss 0.70445135 - time (sec): 7.91 - samples/sec: 113.23 - lr: 0.000961
2023-10-18 02:50:24,877 epoch 19 - it

100%|██████████| 5/5 [00:00<00:00,  8.03it/s]

2023-10-18 02:50:27,411 Evaluating as a multi-label problem: True
2023-10-18 02:50:27,440 DEV : loss 0.7166587710380554 - f1-score (micro avg)  0.9387





2023-10-18 02:50:27,751 ----------------------------------------------------------------------------------------------------
2023-10-18 02:50:29,213 epoch 20 - iter 4/43 - loss 0.48885839 - time (sec): 1.46 - samples/sec: 87.85 - lr: 0.000721
2023-10-18 02:50:30,063 epoch 20 - iter 8/43 - loss 0.52971163 - time (sec): 2.31 - samples/sec: 110.98 - lr: 0.000670
2023-10-18 02:50:31,200 epoch 20 - iter 12/43 - loss 0.52380271 - time (sec): 3.44 - samples/sec: 111.50 - lr: 0.000619
2023-10-18 02:50:32,315 epoch 20 - iter 16/43 - loss 0.57145733 - time (sec): 4.56 - samples/sec: 112.31 - lr: 0.000569
2023-10-18 02:50:33,146 epoch 20 - iter 20/43 - loss 0.57077797 - time (sec): 5.39 - samples/sec: 118.74 - lr: 0.000518
2023-10-18 02:50:34,966 epoch 20 - iter 24/43 - loss 0.58417288 - time (sec): 7.21 - samples/sec: 106.51 - lr: 0.000468
2023-10-18 02:50:36,098 epoch 20 - iter 28/43 - loss 0.56728963 - time (sec): 8.34 - samples/sec: 107.40 - lr: 0.000417
2023-10-18 02:50:37,692 epoch 20 - ite

100%|██████████| 5/5 [00:01<00:00,  4.84it/s]

2023-10-18 02:50:42,865 Evaluating as a multi-label problem: True





2023-10-18 02:50:42,908 DEV : loss 0.7614547610282898 - f1-score (micro avg)  0.9273
2023-10-18 02:50:44,653 ----------------------------------------------------------------------------------------------------
2023-10-18 02:50:44,674 Testing using last state of model ...


100%|██████████| 6/6 [00:00<00:00,  6.60it/s]

2023-10-18 02:50:45,600 Evaluating as a multi-label problem: True
2023-10-18 02:50:45,622 1.0	0.8293	0.9067	0.7941
2023-10-18 02:50:45,625 
Results:
- F-score (micro) 0.9067
- F-score (macro) 0.8828
- Accuracy 0.7941

By class:
              precision    recall  f1-score   support

    POSITIVE     1.0000    0.8865    0.9398       141
    NEGATIVE     1.0000    0.7031    0.8257        64

   micro avg     1.0000    0.8293    0.9067       205
   macro avg     1.0000    0.7948    0.8828       205
weighted avg     1.0000    0.8293    0.9042       205
 samples avg     1.0000    0.8971    0.9314       205

2023-10-18 02:50:45,628 ----------------------------------------------------------------------------------------------------





{'test_score': 0.9066666666666667,
 'dev_score_history': [0.9415384615384615,
  0.8973607038123167,
  0.9216867469879518,
  0.9161676646706587,
  0.9244712990936556,
  0.9189189189189189,
  0.9161676646706587,
  0.9300911854103344,
  0.9300911854103344,
  0.9300911854103344,
  0.9080118694362018,
  0.8973607038123167,
  0.9244712990936556,
  0.9216867469879518,
  0.9329268292682927,
  0.9415384615384615,
  0.9329268292682927,
  0.9134328358208955,
  0.9386503067484663,
  0.9272727272727274],
 'train_loss_history': [1.1403793678283691,
  0.9266684126420455,
  1.0422619941017846,
  1.0238450414484197,
  0.935342639576305,
  0.938274150501598,
  0.96972036188299,
  0.8372741394042968,
  0.8466119634454901,
  0.8616214710582386,
  0.8271330608021129,
  0.8514165399724787,
  0.7922810960249468,
  0.7517330800836737,
  0.7329854625355113,
  0.7305406077991833,
  0.7255179748535157,
  0.6846445645419034,
  0.7000734017112038,
  0.618255593733354],
 'dev_loss_history': [0.6910243630409241,
  0

In [12]:
import torch
torch.save(classifier.state_dict(), "/content/gdrive/MyDrive/Enron_email/Flair/model2/saved_model")  # save the state_dict

## Step 3 Delete runtime, then testing

In [1]:
# import packages
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# load google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
# install Flair framework
! pip install flair

Collecting flair
  Downloading flair-0.12.2-py3-none-any.whl (373 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/373.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/373.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m368.6/373.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.1/373.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting segtok>=1.5.7 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting mpld3==0.3 (from flair)
  Downloading mpld3-0.3.tar.gz (788 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m788.5/788.5 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sqlitedict>=1.6.0 (from flair)
  Downloading sqlitedict-2.1.0.ta

In [4]:
# load packages from Flair
from flair.models import TextClassifier
from flair.data import Sentence

In [5]:
# load the first model for predict Neutral and nonNeutral class
classifier1 = TextClassifier.load('/content/gdrive/MyDrive/Enron_email/Flair/model1/best-model.pt')

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [6]:
# import package
import torch

# load the second model for predict Positive and Negative
classifier2 = TextClassifier.load('sentiment-fast')

# load state_dict for the second model
classifier2.load_state_dict(torch.load("/content/gdrive/MyDrive/Enron_email/Flair/model2/saved_model"))
classifier2.eval()

2023-10-18 03:02:37,638 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-fasttext-rnn/sentiment-en-mix-ft-rnn_v8.pt not found in cache, downloading to /tmp/tmpkl2q3vvy


100%|██████████| 1.16G/1.16G [02:49<00:00, 7.32MB/s]

2023-10-18 03:05:28,004 copying /tmp/tmpkl2q3vvy to cache at /root/.flair/models/sentiment-en-mix-ft-rnn_v8.pt





2023-10-18 03:05:32,518 removing temp file /tmp/tmpkl2q3vvy


TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings('en-crawl')
    )
    (word_reprojection_map): Linear(in_features=300, out_features=300, bias=True)
    (rnn): LSTM(300, 256, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): BCEWithLogitsLoss()
)

In [7]:
# load test set
df = pd.read_csv('/content/gdrive/MyDrive/Enron_email/1191.csv')

In [8]:
df = df[['Content', 'Label']]  # subset the columns
df.columns = ['Text', 'Sentiment']   # change the column names

In [9]:
# load torch
import torch
# Set device to GPU for fast prediction
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# define a function for prediction
def get_sentiment(text):
    value = ''
    # Create a Sentence object
    sentence1 = Sentence(text)   #  create a sentence object for prediction

    # Predict the sentiment
    with torch.no_grad():
        classifier1.predict(sentence1)

    # Get the predicted label, if the prediction is Neutral, return the result, if the prediction is not Neutral, use the second model
    label1 = sentence1.labels[0]   # predicted label
    if label1.value == 'Neutral':
        return 'Neutral'
    else:

        sentence2 = Sentence(text)  # create a new sentence object, because the first one will contain the prediction of the first model

    # Predict the sentiment
        with torch.no_grad():
            classifier2.predict(sentence2)

    # Get the predicted label
        label2 = sentence2.labels[0]   # predicted label

        return label2.value.title()   # title case the prediction, because the second model only out put capital letters


cuda


In [10]:
df['result'] = df['Text'].apply(get_sentiment)   # add prediction result column

In [11]:
predicted_labels = df['result']
actual_labels = df['Sentiment']

# Calculate accuracy overall
accuracy = accuracy_score(actual_labels, predicted_labels)
print(f'Overall Accuracy: {accuracy:.2f}')

# Calculate accuracy and f1 scores by class
class_report = classification_report(actual_labels, predicted_labels)
print(class_report)

Overall Accuracy: 0.76
              precision    recall  f1-score   support

    Negative       0.47      0.44      0.46       104
     Neutral       0.81      0.88      0.84       764
    Positive       0.71      0.58      0.64       323

    accuracy                           0.76      1191
   macro avg       0.66      0.63      0.65      1191
weighted avg       0.75      0.76      0.75      1191

