# Extract features from a penultimate layer in Emotion English DistilRoBERTa-base model

In [None]:
# install the transformers library
!pip install transformers

# import required packages
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# work with cuda
device = torch.device('cuda')

In [None]:
# load tokenizer and model
model_name = "j-hartmann/emotion-english-distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.to(device)

## Preparation of dataset

In [37]:
# create list of texts
pred_texts = ['I like that', 'That is annoying', 'This is great!', 'Wouldn´t recommend it.']

In [38]:
def get_features(name):
    def hook(model, input, output):
        features[name] = output.detach()
    return hook

In [39]:
model.classifier.dense.register_forward_hook(get_features('feats'))

<torch.utils.hooks.RemovableHandle at 0x7f6b70424190>

## Extract features from penultimate layer

In [40]:
# placeholders
PREDICTIONS = []
FEATS = []

# placeholder for batch features
features = {}

for idx, inputs in enumerate(pred_texts):
     
    inputs = tokenizer(inputs, return_tensors="pt")
    inputs = inputs.to(device)

    predictions=model(**inputs).logits

    PREDICTIONS.append(predictions.detach().cpu().numpy())
    FEATS.append(features['feats'].cpu().numpy())
       

In [None]:
# Inspect features

PREDICTIONS = np.concatenate(PREDICTIONS)
FEATS = np.concatenate(FEATS)

print('preds shape:', PREDICTIONS.shape)
print('feats shape:', FEATS.shape)