# DESCARGA DE DATOS

In [2]:
#Create DatasetDict
from datasets import DatasetDict, Dataset, load_dataset
dataset_path = 'RikoteMaster/isear_augmented'
dataset_dict = load_dataset(dataset_path)

dataset_dict = dataset_dict.remove_columns('Augmented')
dataset_dict 


Found cached dataset parquet (/root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    validation: Dataset({
        features: ['Emotion', 'Text_processed'],
        num_rows: 752
    })
    test: Dataset({
        features: ['Emotion', 'Text_processed'],
        num_rows: 752
    })
    train: Dataset({
        features: ['Emotion', 'Text_processed'],
        num_rows: 10751
    })
})

In [3]:
#charge the train datasetDict as a df
df = dataset_dict['train'].to_pandas()
df.head()
#create id2label and label2id
id2label = {i: label for i, label in enumerate(df['Emotion'].unique())}
label2id = {label: i for i, label in enumerate(df['Emotion'].unique())}

#apply label2id to the datasetDict
dataset_dict = dataset_dict.map(lambda example: {'labels': label2id[example['Emotion']]}, remove_columns=['Emotion'])



Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-67b0a7624c7a5d2f.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-161de27b49f6fdea.arrow


Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-03e305454b1bb339.arrow


In [4]:
dataset_dict

DatasetDict({
    validation: Dataset({
        features: ['Text_processed', 'labels'],
        num_rows: 752
    })
    test: Dataset({
        features: ['Text_processed', 'labels'],
        num_rows: 752
    })
    train: Dataset({
        features: ['Text_processed', 'labels'],
        num_rows: 10751
    })
})

### Carga del tokenizador

In [5]:
from transformers import AutoTokenizer
model_ckpt = "bhadresh-savani/bert-base-go-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

In [6]:
def tokenize_text(examples):
    return tokenizer(examples["Text_processed"], padding="max_length")

In [7]:
dataset_dict = dataset_dict.map(tokenize_text, batched=True)
dataset_dict

Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-de0f2d38f00f62e0.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-d12b0dfd191548b2.arrow


Loading cached processed dataset at /root/.cache/huggingface/datasets/RikoteMaster___parquet/RikoteMaster--isear_augmented-b6d7bc560c3e10d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-c3255ff59bfc3ef6.arrow


DatasetDict({
    validation: Dataset({
        features: ['Text_processed', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 752
    })
    test: Dataset({
        features: ['Text_processed', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 752
    })
    train: Dataset({
        features: ['Text_processed', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 10751
    })
})

In [8]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    
    return {'eval_accuracy': acc, 'f1': f1}

def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "weight_decay" : trial.suggest_float("weight_decay", 1e-6, 1e-1, log=True),

    }

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def model_init(trial):
    if 'model' in locals():
        del model
        torch.cuda.empty_cache()

    model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=len(id2label), ignore_mismatched_sizes=True).to(device)
    for param in model.base_model.parameters():
        param.requires_grad = False
    
    return model


def compute_objective(metrics):
    
    return metrics['eval_accuracy'] + metrics['f1']


batch_size = 16
epochs = 5

output_dir = './results_searching_hyperparameters'
logging_steps = len(dataset_dict['train']) // batch_size

args = TrainingArguments( 
                        output_dir=output_dir, 
                        num_train_epochs=epochs,
                        per_device_train_batch_size=batch_size,
                        per_device_eval_batch_size=batch_size,
                        evaluation_strategy='epoch',
                        logging_steps=logging_steps,
                        fp16=True,
                        push_to_hub=False,
                        # prevent saving checkpoints
                        save_strategy="steps",
                        save_steps=int(1e9),  # a large number
                    )

trainer = Trainer(
    model=None,
    args=args,
    train_dataset= dataset_dict['train'],
    eval_dataset= dataset_dict['validation'],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    model_init=model_init,
)

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=20
)

# Save best_trial to a text file

best_trial_dict = {
    'run_id': best_trial.run_id,
    'objective': best_trial.objective,
    'hyperparameters': best_trial.hyperparameters
}

# Save best_trial_dict to a text file
import json
with open('best_trial_frozen.txt', 'w') as file:
    file.write(json.dumps(best_trial_dict, indent=4))  # indent=4 for pretty printing



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2023-07-14 08:14:04,550] A new study created in memory with name: no-name-f8cf4139-9976-42ab-be1b-b5d865ebc956
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shap

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.921,1.829193,0.30984,0.284362
2,1.8,1.739646,0.384309,0.361191
3,1.7482,1.698882,0.390957,0.372862
4,1.7209,1.677238,0.404255,0.385012
5,1.7132,1.67127,0.398936,0.383083


[I 2023-07-14 08:19:49,235] Trial 0 finished with value: 0.782019553079611 and parameters: {'learning_rate': 1.4488231669565197e-05, 'weight_decay': 0.002913414501542372}. Best is trial 0 with value: 0.782019553079611.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.9837,1.93383,0.198138,0.18169
2,1.9072,1.879359,0.256649,0.240071
3,1.871,1.846732,0.293883,0.274941
4,1.8491,1.828356,0.327128,0.307326
5,1.841,1.822428,0.337766,0.318337


[I 2023-07-14 08:25:33,009] Trial 1 finished with value: 0.6561028938473347 and parameters: {'learning_rate': 5.369775710518277e-06, 'weight_decay': 0.004674647270538714}. Best is trial 0 with value: 0.782019553079611.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.9997,1.957781,0.175532,0.163728
2,1.9328,1.913639,0.216755,0.200376
3,1.903,1.887137,0.243351,0.226082
4,1.885,1.872018,0.260638,0.240916
5,1.8785,1.86706,0.267287,0.249602


[I 2023-07-14 08:31:19,263] Trial 2 finished with value: 0.5168896401200112 and parameters: {'learning_rate': 3.8227902332466176e-06, 'weight_decay': 0.03686212554180017}. Best is trial 0 with value: 0.782019553079611.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.772,1.621789,0.412234,0.394918
2,1.6383,1.569651,0.418883,0.406422
3,1.617,1.559034,0.428191,0.416158
4,1.6044,1.545832,0.433511,0.423901
5,1.6057,1.545552,0.43484,0.423758


[I 2023-07-14 08:37:04,635] Trial 3 finished with value: 0.8585981176195774 and parameters: {'learning_rate': 6.231785865276698e-05, 'weight_decay': 0.00034236789789547216}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.9582,1.892848,0.24867,0.232788
2,1.8635,1.820407,0.327128,0.306167
3,1.8177,1.780712,0.367021,0.345145
4,1.7913,1.759026,0.381649,0.357863
5,1.7821,1.752378,0.384309,0.363007


[I 2023-07-14 08:42:48,681] Trial 4 finished with value: 0.7473150417104302 and parameters: {'learning_rate': 8.520797749449048e-06, 'weight_decay': 2.322381520221782e-06}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.0063,1.967315,0.164894,0.148404


[I 2023-07-14 08:43:57,900] Trial 5 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.0418,2.021058,0.131649,0.080148


[I 2023-07-14 08:45:07,038] Trial 6 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7395,1.592843,0.413564,0.39944
2,1.6206,1.552311,0.422872,0.412029
3,1.6055,1.546369,0.428191,0.416487
4,1.5945,1.533852,0.4375,0.428244
5,1.5962,1.534394,0.426862,0.416331


[I 2023-07-14 08:50:52,392] Trial 7 finished with value: 0.8431929086834407 and parameters: {'learning_rate': 8.534112670172364e-05, 'weight_decay': 1.1016485928577635e-05}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.0293,2.001453,0.144947,0.106967


[I 2023-07-14 08:52:02,701] Trial 8 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,2.0429,2.02286,0.131649,0.077524


[I 2023-07-14 08:53:12,362] Trial 9 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7286,1.58458,0.418883,0.406392
2,1.6156,1.547017,0.424202,0.41264
3,1.6022,1.542526,0.426862,0.414293
4,1.5915,1.530237,0.43617,0.427173
5,1.5932,1.531047,0.425532,0.415167


[I 2023-07-14 09:02:14,720] Trial 10 finished with value: 0.8406990752889849 and parameters: {'learning_rate': 9.569776250115391e-05, 'weight_decay': 8.511482690085097e-05}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7636,1.61372,0.409574,0.392401
2,1.6333,1.564846,0.420213,0.408036
3,1.6138,1.55553,0.426862,0.414966
4,1.6017,1.542497,0.43484,0.425401
5,1.6031,1.542503,0.430851,0.419903


[I 2023-07-14 09:07:58,605] Trial 11 finished with value: 0.8507540329749596 and parameters: {'learning_rate': 6.739810030507733e-05, 'weight_decay': 4.4614218717108616e-05}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8219,1.678377,0.388298,0.370143
2,1.6758,1.6052,0.397606,0.381205


[I 2023-07-14 09:10:16,709] Trial 12 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8335,1.693428,0.388298,0.370154
2,1.6866,1.615685,0.392287,0.374161


[I 2023-07-14 09:13:25,585] Trial 13 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8272,1.685121,0.385638,0.367973
2,1.6806,1.609816,0.396277,0.379121


[I 2023-07-14 09:18:06,222] Trial 14 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8908,1.778583,0.347074,0.325813


[I 2023-07-14 09:19:15,583] Trial 15 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7559,1.606664,0.416223,0.400334
2,1.6289,1.560642,0.417553,0.405449
3,1.611,1.552478,0.428191,0.416667
4,1.5993,1.539631,0.43617,0.426816
5,1.6008,1.539803,0.432181,0.421576


[I 2023-07-14 09:24:57,999] Trial 16 finished with value: 0.8537565820359238 and parameters: {'learning_rate': 7.254174683232566e-05, 'weight_decay': 1.277550591162229e-06}. Best is trial 3 with value: 0.8585981176195774.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7782,1.627958,0.408245,0.389075


[I 2023-07-14 09:26:07,166] Trial 17 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.8754,1.754099,0.364362,0.344188


[I 2023-07-14 09:27:16,100] Trial 18 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.7854,1.635501,0.401596,0.381708


[I 2023-07-14 09:28:25,001] Trial 19 pruned. 


In [8]:
!nvidia-smi

Thu Jul 13 11:29:28 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN V      Off  | 00000000:01:00.0 Off |                  N/A |
| 34%   45C    P8    26W / 250W |  10004MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [12]:
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=len(id2label), ignore_mismatched_sizes=True)
#freeze all the parameters except the last layer, be sure that you freeze all excepting the last layer
for param in model.base_model.parameters():
    param.requires_grad = False

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/bert-base-go-emotion and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([28, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([28]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from transformers import EarlyStoppingCallback
batch_size = 16
epochs = 30

output_dir = './results_freezed'
logging_steps = len(dataset_dict['train']) // batch_size
args = TrainingArguments( output_dir=output_dir, 
                        num_train_epochs=epochs,
                        learning_rate=3.562121201880562e-04,
                        per_device_train_batch_size=batch_size,
                        per_device_eval_batch_size=batch_size,
                        weight_decay=0.0007924379520012866,
                        evaluation_strategy='epoch',
                        save_strategy='epoch',
                        logging_steps=logging_steps,
                        fp16=True,
                        push_to_hub=False,
                        load_best_model_at_end=True,
                        metric_for_best_model='accuracy')

In [21]:
trainer = Trainer(model=model,
                  args=args,
                  train_dataset=dataset_dict['train'],
                  eval_dataset=dataset_dict['validation'],
                  compute_metrics=compute_metrics,
                  tokenizer=tokenizer,
                  callbacks = [EarlyStoppingCallback(early_stopping_patience=int(0.2*epochs))])

In [22]:
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.5951,1.531633,0.442819,0.430315
2,1.5875,1.514691,0.432181,0.420767
3,1.5811,1.524175,0.445479,0.441161
4,1.5743,1.530792,0.416223,0.416816
5,1.5738,1.51581,0.445479,0.433519
6,1.5655,1.509822,0.448138,0.438635
7,1.5631,1.502079,0.465426,0.452725
8,1.558,1.511683,0.464096,0.454242
9,1.5462,1.491038,0.462766,0.455032
10,1.5519,1.495883,0.458777,0.445807


TrainOutput(global_step=12096, training_loss=1.5571194822510714, metrics={'train_runtime': 1307.1586, 'train_samples_per_second': 246.741, 'train_steps_per_second': 15.423, 'total_flos': 5.09190110148096e+16, 'train_loss': 1.5571194822510714, 'epoch': 18.0})

In [23]:
#push to hub
trainer.push_to_hub()

/home/mriciba/Projects/dipsy/BERTS/code/BERT/./results_freezed is already a clone of https://huggingface.co/RikoteMaster/results_freezed. Make sure you pull the latest changes with `repo.git_pull()`.


Upload file pytorch_model.bin:   0%|          | 1.00/418M [00:00<?, ?B/s]

Upload file runs/Jul13_12-41-57_2bfb4451df90/events.out.tfevents.1689252118.2bfb4451df90.283443.21:   0%|     …

Upload file training_args.bin:   0%|          | 1.00/3.87k [00:00<?, ?B/s]

To https://huggingface.co/RikoteMaster/results_freezed
   f0a7fe0..3799876  main -> main

To https://huggingface.co/RikoteMaster/results_freezed
   3799876..2563c18  main -> main



'https://huggingface.co/RikoteMaster/results_freezed/commit/37998761a70e46bb59eb1487e7155648e5274438'

In [None]:
#predict on test set
preds = trainer.predict(dataset_dict['test'])
preds = preds.predictions.argmax(-1)
#calculate accuracy
acc = accuracy_score(test_df['Emotion'], preds)

print(acc)

In [None]:
for param in model.bert.encoder.layer[-2:].parameters():
    param.requires_grad = False


In [None]:
trainer.train()

In [None]:
trainer.push_to_hub()