In [None]:
# get needed libraries
!pip install clearml
!pip install gdown
!pip install pytorch-lightning

In [None]:
# download dataset
!gdown https://drive.google.com/uc?id=1hK-3iiRPlbePb99Fe-34LJNZ5yB-nduq
!tar -xvzf imdb_dataset.gz

In [1]:
%load_ext autoreload
%autoreload 2

# Task definition
Implement LSTM Sentiment Tagger for imdb reviews dataset.

1. (5pt) Fill missing code below
    * 1pt implement vectorization
    * 2pt implement \_\_init\_\_ and forward methods of models
    * 2pt implement collate function
2. (4pt) Implement training loop, choose proper loss function, use clear ml for max points.
    * 2pts is a baseline for well written, working code
    * 2pts if clear ml used properly
3. (3pt) Train the models (find proper hyperparams). Make sure you are not overfitting or underfitting. Visualize training of your best model (plot training, and test loss/accuracy in time). Your model should reach at least 87% accuracy. For max points it should exceed 89%. 
    * 1pt for accuracy above 89%
    * 1pt for accuracy above 87%
    * 1pt for visualizations

Remarks:
* Use embeddings of size 50
* Use 0.5 threshold when computing accuracy.
* Use supplied dataset for training and evaluation.
* You do not have to use validation set.
* You should monitor overfitting during training.
* For max points use clear ml to store and manage logs from your experiments. 
* We encourage to use pytorch lightning library (Addtional point for using it - however the sum must not exceed 12)

[Clear ML documentation](https://clear.ml/docs/latest/docs/)

[Clear ML notebook exercise from bootcamp](https://colab.research.google.com/drive/1wtLb4gg8beLS7smcyJlOZppn6_rQvSxL?usp=sharing)

In [2]:
import torch
from pytorch_lightning import LightningModule, Trainer

# Set Clear-ML

In [3]:
from clearml import Task
PROJECT_NAME = 'pt_pl'
TASK_NAME = 0

PADDING_VALUE = 0
params_clearml = {
    'web_host': 'https://app.community.clear.ml',
    'api_host': 'https://api.community.clear.ml',
    'files_host': 'https://files.community.clear.ml',
    'key': '7DW8R5K7T7L0UTGABC4R',
    'secret': 'kKfG5P9EZcLfsyXIpdaNpHePBA3pOTRcVbbFpCqikxm3PRxFZS' 
}
Task.set_credentials(**params_clearml)
task = Task.init(project_name=f'{PROJECT_NAME}', task_name=f'run_{TASK_NAME}')

ClearML Task: created new task id=206357c5dd1445e789ff296f1ee1984a
2022-01-15 22:46:39,573 - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page: https://app.community.clear.ml/projects/4305c327a98d4ab5b524dc97d13a87a9/experiments/206357c5dd1445e789ff296f1ee1984a/output/log


# Set TensorBoard

In [1]:
from pytorch_lightning.loggers import TensorBoardLogger

# logger = TensorBoardLogger("tb_logs", name="my_model")

%load_ext tensorboard
%tensorboard --logdir lightning_logs/

# Get Data

In [4]:
from data import get_loaders
AVAIL_GPUS = max(1, torch.cuda.device_count())
BATCH_SIZE = 128 if AVAIL_GPUS else 64
PATH_DATASET = 'imdb_dataset.csv'

loaders, vocab_size = get_loaders(PATH_DATASET, batch_size=BATCH_SIZE)

# Get Model

In [5]:
from model import LitLSTMSentimentTagger

params_model = {'embedding_dim': 50, 'hidden_dim': 128, 'vocab_size': vocab_size, 'classes': 1}
task.connect(params_model)

model = LitLSTMSentimentTagger(**params_model)

# Train

In [6]:
trainer = Trainer(
    gpus=AVAIL_GPUS,
    max_epochs=15,
#     logger=logger
)
trainer.fit(model, loaders['train'], loaders['test'])

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Missing logger folder: /home/bartekkrzepkowski/Pulpit/Zimowy/GSN/hw/hw3/hw3/hw3_pl_bk332269/lightning_logs

  | Name            | Type              | Params
------------------------------------------------------
0 | word_embeddings | Embedding         | 4.4 M 
1 | lstm            | LSTM              | 92.2 K
2 | hidden2tag      | Linear            | 129   
3 | criterion       | BCEWithLogitsLoss | 0     
4 | accuracy        | Accuracy          | 0     
------------------------------------------------------
4.4 M     Trainable params
0         Non-trainable params
4.4 M     Total params
17.775    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [10]:
task.get_last_scalar_metrics()

{'Loss': {'Train': {'last': 0.23891522, 'min': 0.19881018, 'max': 0.69051886},
  'Val': {'last': 0.26201195, 'min': 0.26201195, 'max': 0.69303733}},
 'Acc': {'Train': {'last': 0.9140625, 'min': 0.5546875, 'max': 0.9375},
  'Val': {'last': 0.8989, 'min': 0.60748, 'max': 0.8989}},
 'epoch': {'epoch': {'last': 9.0, 'min': 0.0, 'max': 9.0}},
 ':monitor:machine': {'network_rx_mbs': {'last': 0.026,
   'min': 0.01,
   'max': 0.109},
  'io_read_mbs': {'last': 0.008, 'min': 0.0, 'max': 0.119},
  'io_write_mbs': {'last': 0.265, 'min': 0.028, 'max': 2.0},
  'memory_free_gb': {'last': 7.046, 'min': 7.035, 'max': 7.848},
  'cpu_usage': {'last': 12.203, 'min': 8.958, 'max': 33.271},
  'network_tx_mbs': {'last': 0.008, 'min': 0.005, 'max': 0.039},
  'memory_used_gb': {'last': 2.755, 'min': 1.232, 'max': 2.755},
  'cpu_temperature': {'last': 60.523, 'min': 60.523, 'max': 92.407},
  'disk_free_percent': {'last': 23.7, 'min': 23.7, 'max': 23.8}},
 ':monitor:gpu': {'gpu_0_temperature': {'last': 55.139,
 

In [7]:
task