<a href="https://colab.research.google.com/github/TehJimmmyy/SentimentalAnalysis/blob/main/Irony_Speech_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!gdown 1sh1yidC5_bDWiW7wEE_Ehk-U7VPOXW5m #model1
#!gdown --id 1DooDLNEIwX0yqg_qDOcjylIlQXH8JkuC
!pip install pytorch-lightning==1.2.8 --quiet
!pip install torchtext==0.6 --quiet
!pip install transformers --quiet

import numpy as np
import torch
import torch.nn as nn
from transformers import BertTokenizerFast as BertTokenizer, BertModel
import pytorch_lightning as pl

LABEL_COLUMNS="Irony_Speech"
BERT_MODEL_NAME = 'bert-base-cased'
MAX_TOKEN_COUNT = 90
N_EPOCHS = 10
BATCH_SIZE = 4
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)

Downloading...
From (original): https://drive.google.com/uc?id=1sh1yidC5_bDWiW7wEE_Ehk-U7VPOXW5m
From (redirected): https://drive.google.com/uc?id=1sh1yidC5_bDWiW7wEE_Ehk-U7VPOXW5m&confirm=t&uuid=2716c32b-375f-4a4d-9afb-b40da4fd95ae
To: /content/IronySpeechModel.ckpt
100% 1.30G/1.30G [00:18<00:00, 69.4MB/s]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
class IronySpeechTagger(pl.LightningModule):

  def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
    super().__init__()
    self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
    self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
    self.n_training_steps = n_training_steps
    self.n_warmup_steps = n_warmup_steps
    self.criterion = nn.BCELoss()

  def forward(self, input_ids, attention_mask, labels=None):
    output = self.bert(input_ids, attention_mask=attention_mask)
    output = self.classifier(output.pooler_output)
    output = torch.sigmoid(output)
    loss = 0
    if labels is not None:
        loss = self.criterion(output, labels)
    return loss, output

  def training_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": outputs, "labels": labels}

  def validation_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": outputs, "labels": labels}

  def validation_epoch_end(self, outputs):

    labels = []
    predictions = []
    for output in outputs:
      for out_labels in output["labels"].detach().cpu():
        labels.append(out_labels)
      for out_predictions in output["predictions"].detach().cpu():
        predictions.append(out_predictions)

    labels = torch.stack(labels).int()
    predictions = torch.stack(predictions)

    validation_acc = accuracy(predictions, labels)
    self.logger.experiment.add_scalar("Validation Accuracy", validation_acc, self.current_epoch)

  def test_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("test_loss", loss, prog_bar=True, logger=True)
    return loss

  def training_epoch_end(self, outputs):

    labels = []
    predictions = []
    for output in outputs:
      for out_labels in output["labels"].detach().cpu():
        labels.append(out_labels)
      for out_predictions in output["predictions"].detach().cpu():
        predictions.append(out_predictions)

    labels = torch.stack(labels).int()
    predictions = torch.stack(predictions)


    class_roc_auc = auroc(predictions, labels)
    self.logger.experiment.add_scalar("Irony Speech ROC Curve/Train", class_roc_auc, self.current_epoch)

    training_acc = accuracy(predictions, labels)
    self.logger.experiment.add_scalar("Training Accuracy", training_acc, self.current_epoch)


  def configure_optimizers(self):

    optimizer = AdamW(self.parameters(), lr=5e-8)

    scheduler = get_linear_schedule_with_warmup(
      optimizer,
      num_warmup_steps=self.n_warmup_steps,
      num_training_steps=self.n_training_steps
    )

    return dict(
      optimizer=optimizer,
      lr_scheduler=dict(
        scheduler=scheduler,
        interval='step'
      )
    )

steps_per_epoch= 29752 // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS
warmup_steps = total_training_steps // 10

model = IronySpeechTagger(
  n_classes=len(LABEL_COLUMNS),
  n_warmup_steps=warmup_steps,
  n_training_steps=total_training_steps
)

trained_model = IronySpeechTagger.load_from_checkpoint(
  '/content/IronySpeechModel.ckpt',
  n_classes=1,strict=False
)
trained_model.eval()
trained_model.freeze()

In [6]:
#Enter comment to tag as irony speech and click run

test_comment = "A man who needs medical assistance is run over by the ambulance sent to help him."

THRESHOLD = 55  #fifty percent threshold
encoding = tokenizer.encode_plus(
  test_comment,
  add_special_tokens=True,
  max_length=256,
  return_token_type_ids=False,
  padding="max_length",
  return_attention_mask=True,
  return_tensors='pt',
)

_, test_prediction = trained_model(encoding["input_ids"], encoding["attention_mask"])
test_prediction = test_prediction.flatten().numpy()


for label, prediction in zip(LABEL_COLUMNS, test_prediction):
  prediction = prediction.astype(float) * 100
  if prediction < THRESHOLD:
    print(f"Not irony speech with a probability of %.2f %%" % prediction)
    continue
  print(f"Irony Speech with a probability of %.2f %%" % prediction)

Irony Speech with a probability of 66.15 %
