In [2]:
import torch
from helpers import ClassifierModel, loadTrainTweets, normalizeTweets, tokenizeTweets, train

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
mode = 'train'

# Load BERTweet architecture

In [3]:
checkpoint = 'vinai/bertweet-base'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_classfier = ClassifierModel(checkpoint = checkpoint, num_labels =2).to(device)

# Load and preprocess Tweets

In [4]:
positive_file_path = 'data/train_pos_full.txt'
negative_file_path = 'data/train_neg_full.txt'

# Load Tweets
df = loadTrainTweets(positive_file_path, negative_file_path)

# Normalize Tweets
df = normalizeTweets(df)

# Best seeds
seeds = [4,99,1]

# Tokenize Tweets
train_dataloader1, eval_dataloader1, test_dataloader1 = tokenizeTweets(checkpoint,df,mode,seeds[0])
train_dataloader2, eval_dataloader2, test_dataloader2 = tokenizeTweets(checkpoint,df,mode,seeds[1])
train_dataloader3, eval_dataloader3, test_dataloader3 = tokenizeTweets(checkpoint,df,mode,seeds[2])

100%|██████████| 200002/200002 [00:51<00:00, 3876.52it/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 1723.41 examples/s]
Map: 100%|██████████| 99901/99901 [00:29<00:00, 3438.13 examples/s]
Map: 100%|██████████| 99901/99901 [00:21<00:00, 4555.15 examples/s]


# Train

## Train with first seed

In [3]:
model_classfier = ClassifierModel(checkpoint = checkpoint, num_labels =2).to(device)
best_weights1 = train(model_classfier,train_dataloader1,eval_dataloader1,device)

## Train with second seed

In [2]:
model_classfier = ClassifierModel(checkpoint = checkpoint, num_labels =2).to(device)
best_weights2 = train(model_classfier,train_dataloader2,eval_dataloader2,device)

## Train with third seed

In [1]:
model_classfier = ClassifierModel(checkpoint = checkpoint, num_labels =2).to(device)
best_weights3 = train(model_classfier,train_dataloader3,eval_dataloader3,device)

# Save Best Weights

In [None]:
torch.save(best_weights1, 'weights/best_model_weights1_new.pt')
torch.save(best_weights2, 'weights/best_model_weights2_new.pt')
torch.save(best_weights3, 'weights/best_model_weights3_new.pt')