In [3]:
!pip install -q seqeval==1.2.2

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone


In [4]:
!pip install -q datasets==3.2.0

In [5]:
!pip install -q evaluate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

## Aspect Term Extraction

In [3]:
from seqeval.metrics import accuracy_score
from datasets import load_dataset
from transformers import (AutoTokenizer,
                          DataCollatorForTokenClassification,
                          AutoModelForTokenClassification)


In [4]:
ds = load_dataset("thainq107/abte-restaurants")

README.md:   0%|          | 0.00/454 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/183k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/61.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3602 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1119 [00:00<?, ? examples/s]

In [78]:
" ".join(ds['train'][1]["Tokens"])

'To be completely fair , the only redeeming factor was the food , which was above average , but could "nt" make up for all the other deficiencies of Teodora .'

In [6]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

def tokenize_and_align_labels(examples):
    tokenized_inputs = []
    labels = []
    for tokens, tags in zip(examples['Tokens'], examples['Tags']):
        # tokens = tokens.replace("'", "").strip("][").split(', ')
        # tags = tags.strip('][').split(', ')

        bert_tokens = []
        bert_tags = []
        for i in range(len(tokens)):
            t = tokenizer.tokenize(tokens[i])
            bert_tokens += t
            bert_tags += [int(tags[i])]*len(t)

        bert_ids = tokenizer.convert_tokens_to_ids(bert_tokens)

        tokenized_inputs.append(bert_ids)
        labels.append(bert_tags)

    return {
        'input_ids': tokenized_inputs ,
        'labels': labels
    }

preprocessed_ds = ds.map(tokenize_and_align_labels, batched = True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/3602 [00:00<?, ? examples/s]

Map:   0%|          | 0/1119 [00:00<?, ? examples/s]

In [7]:
preprocessed_ds['train'][0]

{'Tokens': ['But', 'the', 'staff', 'was', 'so', 'horrible', 'to', 'us', '.'],
 'Tags': ['0', '0', '1', '0', '0', '0', '0', '0', '0'],
 'Polarities': ['-1', '-1', '0', '-1', '-1', '-1', '-1', '-1', '-1'],
 'input_ids': [2021, 1996, 3095, 2001, 2061, 9202, 2000, 2149, 1012],
 'labels': [0, 0, 1, 0, 0, 0, 0, 0, 0]}

In [8]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [9]:
import numpy as np
from seqeval.metrics import accuracy_score, f1_score, classification_report

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens) and convert to labels
    true_predictions = [
        [id2label[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [id2label[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    # Calculate F1 score (macro-averaged)
    results = f1_score(true_labels, true_predictions, average="macro")  # Add average="macro"

    # Optional: Get a full classification report (precision, recall, F1, support)
    report = classification_report(true_labels, true_predictions)
    print(report)  # Print the report for detailed analysis

    return {"f1": results}

In [10]:
id2label = {
    0: "O",
    1: "B-Term",
    2: "I-Term"
}
label2id = {
    "O": 0,
    "B-Term": 1,
    "I-Term": 2
}


## Model

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import TrainingArguments, Trainer, AutoConfig, PreTrainedModel
from transformers.modeling_outputs import TokenClassifierOutput

In [69]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding='same'):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride, padding)
        self.bn2 = nn.BatchNorm1d(out_channels)

        self.shortcut = nn.Sequential()
        if in_channels != out_channels: # Projection if dimensions change
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1, padding='same'),  # 1x1 conv for projection
                nn.BatchNorm1d(out_channels)
            )
    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(residual)  # Add shortcut connection
        out = F.relu(out)
        return out


class Conv1dATEModel(PreTrainedModel):
    config_class = AutoConfig  # Use AutoConfig for compatibility

    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
        self.in_channels = config.hidden_size
        
         # --- Residual Blocks ---
        self.res_block1 = ResidualBlock(self.in_channels, config.hidden_size, kernel_size=3)
        self.res_block2 = ResidualBlock(config.hidden_size, config.hidden_size, kernel_size=3)
        self.res_block3 = ResidualBlock(config.hidden_size, config.hidden_size, kernel_size=3)
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        # Weight initialization (important!)
        self.init_weights()  # Initialize weights as HuggingFace does


    def forward(self, input_ids=None, attention_mask=None, labels=None):
        # Embedding layer
        embeddings = self.embedding(input_ids)

        # Conv1d expects input (batch_size, channels, sequence_length)
        #   - embeddings is (batch_size, sequence_length, hidden_size)
        #   - Transpose to (batch_size, hidden_size, sequence_length)
        embeddings = embeddings.transpose(1, 2)

        # Convolutional layers
        x = self.res_block1(embeddings)
        x = self.res_block2(x)
        x = self.res_block3(x)

        # Transpose back to (batch_size, sequence_length, hidden_size)
        x = x.transpose(1, 2)

        # Dropout and classification
        x = self.dropout(x)
        logits = self.classifier(x)

        # Calculate loss (if labels are provided)
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.config.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
                )
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return TokenClassifierOutput(
            loss=loss,
            logits=logits
        )

    def init_weights(self):
        # Use the same initialization as Hugging Face models
        self.apply(self._init_weights)

    def _init_weights(self, module):
        """Initialize the weights like the original models"""
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.Conv1d):
             module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
             if module.bias is not None:
                module.bias.data.zero_()

In [70]:
config = AutoConfig.from_pretrained(
    "distilbert/distilbert-base-uncased",  # Base config on a known model
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
    initializer_range=0.02,  # Standard initialization range
)

In [71]:
model_conv1d = Conv1dATEModel(config=config)

In [15]:

model_pretrained = AutoModelForTokenClassification.from_pretrained(
    "distilbert/distilbert-base-uncased",
    num_labels =3, id2label=id2label , label2id=label2id
)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
import os
os.environ['WANDB_DISABLED'] = 'true'
from transformers import TrainingArguments, Trainer

In [17]:
training_args = TrainingArguments(
    output_dir="distilbert-base-uncased",
    learning_rate =2e-5,
    per_device_train_batch_size =16,
    per_device_eval_batch_size =16,
    num_train_epochs =5,
    weight_decay =0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model_pretrained ,
    args=training_args ,
    train_dataset=preprocessed_ds["train"],
    eval_dataset=preprocessed_ds["test"],
    processing_class=tokenizer ,
    data_collator=data_collator ,
    compute_metrics=compute_metrics ,
)

trainer.train()
trainer.save_model("abte-restaurants-distilbert-base-uncased")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,F1
1,No log,0.210162,0.693302
2,No log,0.186067,0.80589
3,No log,0.196794,0.820221
4,No log,0.206575,0.81557
5,0.119100,0.213183,0.821367


              precision    recall  f1-score   support

        Term       0.65      0.74      0.69      4022

   micro avg       0.65      0.74      0.69      4022
   macro avg       0.65      0.74      0.69      4022
weighted avg       0.65      0.74      0.69      4022





              precision    recall  f1-score   support

        Term       0.79      0.82      0.81      4022

   micro avg       0.79      0.82      0.81      4022
   macro avg       0.79      0.82      0.81      4022
weighted avg       0.79      0.82      0.81      4022





              precision    recall  f1-score   support

        Term       0.82      0.82      0.82      4022

   micro avg       0.82      0.82      0.82      4022
   macro avg       0.82      0.82      0.82      4022
weighted avg       0.82      0.82      0.82      4022





              precision    recall  f1-score   support

        Term       0.81      0.83      0.82      4022

   micro avg       0.81      0.83      0.82      4022
   macro avg       0.81      0.83      0.82      4022
weighted avg       0.81      0.83      0.82      4022





              precision    recall  f1-score   support

        Term       0.82      0.83      0.82      4022

   micro avg       0.82      0.83      0.82      4022
   macro avg       0.82      0.83      0.82      4022
weighted avg       0.82      0.83      0.82      4022



In [72]:
training_args = TrainingArguments(
    output_dir="conv1d",
    learning_rate =1e-4,
    per_device_train_batch_size =16,
    per_device_eval_batch_size =16,
    num_train_epochs =10,
    weight_decay =0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model_conv1d ,
    args=training_args ,
    train_dataset=preprocessed_ds["train"],
    eval_dataset=preprocessed_ds["test"],
    processing_class=tokenizer ,
    data_collator=data_collator ,
    compute_metrics=compute_metrics ,
)

trainer.train()
trainer.save_model("abte-restaurants-conv1d")

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,F1
1,No log,0.465517,0.58178
2,No log,0.39209,0.604575
3,No log,0.374261,0.600605
4,No log,0.490846,0.626244
5,0.086400,0.453762,0.614678
6,0.086400,0.426093,0.639668
7,0.086400,0.416792,0.642613
8,0.086400,0.405667,0.649864
9,0.008300,0.425405,0.643846
10,0.008300,0.446236,0.64263


              precision    recall  f1-score   support

        Term       0.61      0.55      0.58      4022

   micro avg       0.61      0.55      0.58      4022
   macro avg       0.61      0.55      0.58      4022
weighted avg       0.61      0.55      0.58      4022





              precision    recall  f1-score   support

        Term       0.67      0.55      0.60      4022

   micro avg       0.67      0.55      0.60      4022
   macro avg       0.67      0.55      0.60      4022
weighted avg       0.67      0.55      0.60      4022





              precision    recall  f1-score   support

        Term       0.64      0.57      0.60      4022

   micro avg       0.64      0.57      0.60      4022
   macro avg       0.64      0.57      0.60      4022
weighted avg       0.64      0.57      0.60      4022





              precision    recall  f1-score   support

        Term       0.68      0.58      0.63      4022

   micro avg       0.68      0.58      0.63      4022
   macro avg       0.68      0.58      0.63      4022
weighted avg       0.68      0.58      0.63      4022





              precision    recall  f1-score   support

        Term       0.67      0.57      0.61      4022

   micro avg       0.67      0.57      0.61      4022
   macro avg       0.67      0.57      0.61      4022
weighted avg       0.67      0.57      0.61      4022





              precision    recall  f1-score   support

        Term       0.67      0.61      0.64      4022

   micro avg       0.67      0.61      0.64      4022
   macro avg       0.67      0.61      0.64      4022
weighted avg       0.67      0.61      0.64      4022





              precision    recall  f1-score   support

        Term       0.65      0.63      0.64      4022

   micro avg       0.65      0.63      0.64      4022
   macro avg       0.65      0.63      0.64      4022
weighted avg       0.65      0.63      0.64      4022





              precision    recall  f1-score   support

        Term       0.68      0.63      0.65      4022

   micro avg       0.68      0.63      0.65      4022
   macro avg       0.68      0.63      0.65      4022
weighted avg       0.68      0.63      0.65      4022





              precision    recall  f1-score   support

        Term       0.66      0.62      0.64      4022

   micro avg       0.66      0.62      0.64      4022
   macro avg       0.66      0.62      0.64      4022
weighted avg       0.66      0.62      0.64      4022





              precision    recall  f1-score   support

        Term       0.66      0.63      0.64      4022

   micro avg       0.66      0.63      0.64      4022
   macro avg       0.66      0.63      0.64      4022
weighted avg       0.66      0.63      0.64      4022



In [20]:
from transformers import pipeline

token_classifier = pipeline(
    model="thainq107/abte-restaurants-distilbert-base-uncased",
    aggregation_strategy="simple"
)

test_sentence = 'The bread is top notch as well'
results = token_classifier(test_sentence)
results

config.json:   0%|          | 0.00/712 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


[{'entity_group': 'Term',
  'score': 0.90669304,
  'word': 'bread',
  'start': 4,
  'end': 9}]

In [21]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [73]:
!huggingface-cli repo create abte-restaurants-conv1d --organization Epsilon123 --type model -y

[90mgit version 2.34.1[0m
[90mgit-lfs/3.0.2 (GitHub; linux amd64; go 1.18.1)[0m

You are about to create [1mEpsilon123/abte-restaurants-conv1d[0m

Your repo now lives at:
  [1mhttps://huggingface.co/Epsilon123/abte-restaurants-conv1d[0m

You can clone it locally with the command below, and commit/push as usual.

  git clone https://huggingface.co/Epsilon123/abte-restaurants-conv1d



In [74]:
!huggingface-cli upload Epsilon123/abte-restaurants-conv1d abte-restaurants-conv1d

Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
Start hashing 7 files.
Finished hashing 7 files.
training_args.bin:   0%|                            | 0.00/5.30k [00:00<?, ?B/s]
model.safetensors:   0%|                             | 0.00/136M [00:00<?, ?B/s][A

Upload 2 LFS files:   0%|                                 | 0/2 [00:00<?, ?it/s][A[A
model.safetensors:   1%|▏                    | 918k/136M [00:00<00:16, 8.28MB/s][A
training_args.bin: 100%|███████████████████| 5.30k/5.30k [00:00<00:00, 16.1kB/s][A

model.safetensors:   8%|█▋                  | 11.3M/136M [00:00<00:05, 21.2MB/s][A
model.safetensors:  11%|██▏                 | 15.2M/136M [00:00<00:04, 25.5MB/s][A
model.safetensors:  13%|██▋                 | 18.0M/136M [00:01<00:08, 14.7MB/s][A
model.safetensors:  20%|███▉                | 26.6M/136M [00:01<00:06, 15.8MB/s][A
model.safetensors:  22%

In [21]:
from transformers import pipeline

token_classifier = pipeline(
    "token-classification",  # Specify the task
    model="Epsilon123/abte-restaurants-distilbert-base-uncased",  # Your model's repository ID
    aggregation_strategy="simple" # Or your preferred strategy
)

test_sentence = """To be completely fair , the only redeeming factor was the food , which was above average , but could "nt" make up for all the other deficiencies of Teodora ."""
results = token_classifier(test_sentence)
print(results)

Device set to use cuda:0


[{'entity_group': 'Term', 'score': 0.84177595, 'word': 'food', 'start': 58, 'end': 62}]


## Aspect Term Sentiment Classification

In [6]:
from seqeval.metrics import accuracy_score
from datasets import load_dataset
from transformers import (AutoTokenizer,
                          DataCollatorForTokenClassification,
                          AutoModelForSequenceClassification)


In [7]:
ds = load_dataset("thainq107/abte-restaurants")

README.md:   0%|          | 0.00/454 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/183k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/61.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3602 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1119 [00:00<?, ? examples/s]

In [42]:
len(ds['train'])

3602

In [9]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

def tokenize_and_align_labels(examples):
    sentences, sentence_tags = [], []
    labels = []
    for tokens , pols in zip(examples['Tokens'], examples['Polarities']):
        # tokens = tokens.replace("'", "").strip("][").split(', ')
        # pols = pols.strip('][').split(', ')

        bert_tokens = []
        bert_att = []
        pols_label = 0
        for i in range(len(tokens)):
            t = tokenizer.tokenize(tokens[i])
            bert_tokens += t
            if int(pols[i]) != -1:
                bert_att += t
                pols_label = int(pols[i])

        sentences.append(" ".join(bert_tokens))
        sentence_tags.append(" ".join(bert_att))
        labels.append(pols_label)

    tokenized_inputs = tokenizer(sentences , sentence_tags , padding=True , truncation=True , return_tensors="pt")
    tokenized_inputs['labels'] = labels
    return tokenized_inputs

preprocessed_ds = ds.map(tokenize_and_align_labels, batched = True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/3602 [00:00<?, ? examples/s]

Map:   0%|          | 0/1119 [00:00<?, ? examples/s]

In [10]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions , axis =1)
    return accuracy.compute(predictions=predictions , references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [11]:
id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
label2id = {'Negative': 0, 'Neutral': 1, 'Positive': 2}

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased", num_labels =3, id2label=id2label , label2id=label2id
)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
import os
from transformers import TrainingArguments , Trainer

os.environ['WANDB_DISABLED'] = 'true'

training_args = TrainingArguments(
    output_dir="absa-distilbert-base-uncased",
    learning_rate =2e-5,
    per_device_train_batch_size =16,
    per_device_eval_batch_size =16,
    num_train_epochs =5,
    weight_decay =0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model ,
    args=training_args ,
    train_dataset=preprocessed_ds["train"],
    eval_dataset=preprocessed_ds["test"],
    processing_class=tokenizer ,
    compute_metrics=compute_metrics ,
)
trainer.train()
trainer.save_model('bsa-restaurants-distilbert-base-uncased')

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.623762,0.763181
2,No log,0.529678,0.779267
3,No log,0.515828,0.806077
4,No log,0.494574,0.820375
5,0.516500,0.509793,0.81412




In [13]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
!huggingface-cli repo create absa-restaurants-distilbert-base-uncased --organization Epsilon123 --type model -y

[90mgit version 2.34.1[0m
[90mgit-lfs/3.0.2 (GitHub; linux amd64; go 1.18.1)[0m

You are about to create [1mEpsilon123/absa-restaurants-distilbert-base-uncased[0m

Your repo now lives at:
  [1mhttps://huggingface.co/Epsilon123/absa-restaurants-distilbert-base-uncased[0m

You can clone it locally with the command below, and commit/push as usual.

  git clone https://huggingface.co/Epsilon123/absa-restaurants-distilbert-base-uncased



In [15]:
!huggingface-cli upload Epsilon123/absa-restaurants-distilbert-base-uncased bsa-restaurants-distilbert-base-uncased

Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
Start hashing 7 files.
Finished hashing 7 files.
model.safetensors:   0%|                             | 0.00/268M [00:00<?, ?B/s]
training_args.bin:   0%|                            | 0.00/5.37k [00:00<?, ?B/s][A

training_args.bin: 100%|███████████████████| 5.37k/5.37k [00:00<00:00, 15.9kB/s][A[A
model.safetensors: 100%|█████████████████████| 268M/268M [00:09<00:00, 29.0MB/s]


Upload 2 LFS files: 100%|█████████████████████████| 2/2 [00:09<00:00,  4.73s/it][A[A
https://huggingface.co/Epsilon123/absa-restaurants-distilbert-base-uncased/tree/main/.


In [71]:
from transformers import pipeline

token_classifier = pipeline(
    "token-classification",  # Specify the task
    model="Epsilon123/abte-restaurants-distilbert-base-uncased",
    aggregation_strategy="simple"
)

classifier = pipeline(
    "text-classification",
    model="Epsilon123/absa-restaurants-distilbert-base-uncased",

)

test_sentence = 'The food is terrible'
results = token_classifier(test_sentence)
sentence_tags = " ".join([ result['word'] for result in results ])

pred_label = classifier(f'{test_sentence} [SEP] {sentence_tags}')
sentence_tags, pred_label

Device set to use cuda:0
Device set to use cuda:0


('food', [{'label': 'Negative', 'score': 0.8919645547866821}])

In [46]:
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer

model_name = "Epsilon123/absa-restaurants-distilbert-base-uncased"

model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Now you can use the model and tokenizer directly:
inputs = tokenizer("The food was amazing, but the service was slow. [SEP] food service", return_tensors="pt")
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=-1)

# Process the predictions as needed...

array([[ 101, 1996, 2833, 2001, 6429, 1010, 2021, 1996, 2326, 2001, 4030,
        1012,  102, 2833, 2326,  102]])

In [57]:
tokenizer.decode(inputs['input_ids'][0].cpu().numpy()), predictions

('[CLS] the food was amazing, but the service was slow. [SEP] food service [SEP]',
 tensor([[1, 2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 2, 1, 1, 2]]))