In [None]:
 !pip install tensorflow_hub tensorflow_text pytorch_lightning datasets -q

In [None]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from torch.utils.data import DataLoader
from datasets import Dataset, load_dataset, load_metric
import numpy as np
import dataloading
import modeling
from typing import List, Dict

In [None]:
pl.seed_everything(445326, workers=True)

INFO:lightning_fabric.utilities.seed:Seed set to 445326


445326

In [None]:
# model_URL = 'https://tfhub.dev/google/universal-sentence-encoder-large/5'
# encoder = hub.load(model_URL)

In [None]:
data=dataloading.YelpDataLoader()
data.prepare_data()

In [None]:
data.setup()
print(len(data.train))
print(len(data.val))
print(len(data.test))

* Computations (init).

* Train Loop (training_step)

* Validation Loop (validation_step)

* Test Loop (test_step)

* Prediction Loop (predict_step)

* Optimizers and LR Schedulers (configure_optimizers)

## Train

In [None]:
model = modeling.Model()

In [None]:
epochs = 15
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor="val_loss",
    dirpath="model",
    filename="yelp-sentiment-multilingual-{epoch:02d}-{val_loss:.3f}",
    save_top_k=3,
    mode="min")
trainer = pl.Trainer( epochs=epochs,callbacks=[checkpoint_callback])

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, data.train_dataloader(), data.val_dataloader())

In [None]:
trainer.test(dataloaders=data.test_dataloader())

In [None]:
best_model = modeling.Model.load_from_checkpoint(checkpoint_callback.best_model_path)

In [None]:
from pprint import PrettyPrinter
pp = PrettyPrinter()

In [None]:
def predict(text: List[str]):
    """
    function predicts the sentiment of a list of sentences using a pre-trained model.
    """
    embeddings=torch.Tensor(dataloading.embed_text(text)).cuda()
    logits=best_model(embeddings)
    preds=torch.argmax(logits, dim=1).detach().cpu().numpy()
    scores= torch.softmax(logits, dim=1).detach().cpu().numpy()
    results=[]
    for t, best_index, score_pair in zip(text, preds, scores):
        results.append({
            "text": t,
            "label": "positive" if best_index == 1 else "negative",
            "score": score_pair[best_index]
        })
    return results

Compare predictions for eng and german.

In [None]:
english_text = "Like any Barnes & Noble, it has a nice comfy cafe, and a large selection of books. The staff is very friendly and helpful. They stock a decent selection, and the prices are pretty reasonable."
german_translation = "Wie jedes Barnes & Noble hat es ein nettes, gemütliches Café und eine große Auswahl an Büchern. Das Personal ist sehr freundlich und hilfsbereit. Sie haben eine anständige Auswahl und die Preise sind ziemlich vernünftig."
pp.pprint(predict([english_text, german_translation]))

[   {   'label': 'positive',
        'score': 0.99977034,
        'text': 'Like any Barnes & Noble, it has a nice comfy cafe, and a '
                'large selection of books. The staff is very friendly and '
                'helpful. They stock a decent selection, and the prices are '
                'pretty reasonable.'},
    {   'label': 'positive',
        'score': 0.9281242,
        'text': 'Wie jedes Barnes & Noble hat es ein nettes, gemütliches Café '
                'und eine große Auswahl an Büchern. Das Personal ist sehr '
                'freundlich und hilfsbereit. Sie haben eine anständige Auswahl '
                'und die Preise sind ziemlich vernünftig.'}]


In [None]:
embeddings = torch.Tensor(dataloading.embed_text([english_text, italian_translation])).cuda()
logits=best_model(embeddings)
preds=torch.argmax(logits, dim=1).detach().cpu().numpy()
scores=torch.softmax(logits, dim=1).detach().cpu().numpy()
results=[]
for t, best_index, score_pair in zip([english_text, italian_translation], preds, scores):
    results.append({
        "text": t,
        "label": "positive" if best_index == 1 else "negative",
        "score": score_pair[best_index]
    })
pp.pprint(results)

In [None]:
english_text = "The inside of the Restaurant was not clean at all. And we also did not like their lighting arrangement. Too dark."
italian_translation = "L'interno del Ristorante non era affatto pulito. E non ci piaceva nemmeno la loro disposizione delle luci. Troppo scuro."
finnish_translation = "Ravintolan sisäpuoli ei ollut ollenkaan puhdas. Ja emme myöskään pitäneet heidän valaistusjärjestelystä. Liian pimeä."
pp.pprint(predict([english_text, italian_translation, finnish_translation]))

[   {   'label': 'negative',
        'score': 0.99893755,
        'text': 'The inside of the Restaurant was not clean at all. And we '
                'also did not like their lighting arrangement. Too dark.'},
    {   'label': 'negative',
        'score': 0.5849371,
        'text': "L'interno del Ristorante non era affatto pulito. E non ci "
                'piaceva nemmeno la loro disposizione delle luci. Troppo '
                'scuro.'},
    {   'label': 'negative',
        'score': 0.8327144,
        'text': 'Ravintolan sisäpuoli ei ollut ollenkaan puhdas. Ja emme '
                'myöskään pitäneet heidän valaistusjärjestelystä. Liian '
                'pimeä.'}]


In [None]:
english_text = "Just had the best day ever! Everything went perfectly, and I feel so happy!"
italian_translation = "Ho appena trascorso la giornata migliore di sempre! Tutto è andato perfettamente e mi sento così felice!"
pp.pprint(predict([english_text, italian_translation]))

[   {   'label': 'positive',
        'score': 0.98286957,
        'text': 'Just had the best day ever! Everything went perfectly, and I '
                'feel so happy!'},
    {   'label': 'positive',
        'score': 0.9439262,
        'text': 'Ho appena trascorso la giornata migliore di sempre! Tutto è '
                'andato perfettamente e mi sento così felice!'}]


In [None]:
english_text = "Today was a complete disaster. Nothing went right, and I feel so frustrated"
italian_translation = "Oggi è stato un completo disastro. Niente è andato bene e mi sento così frustrato."
pp.pprint(predict([english_text, italian_translation]))

[   {   'label': 'negative',
        'score': 0.9995815,
        'text': 'Today was a complete disaster. Nothing went right, and I feel '
                'so frustrated'},
    {   'label': 'positive',
        'score': 0.69991624,
        'text': 'Oggi è stato un completo disastro. Niente è andato bene e mi '
                'sento così frustrato.'}]
