# Prepare

In [1]:
cd ../../../..

/home/majed_alshaibani/Experiments/DotlessArabic


In [2]:
import os
import random
import shutil
from pathlib import Path
from collections import defaultdict

import wandb


import torch
import torchmetrics
from torch import nn
from tqdm.auto import tqdm
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader

from pytorch_lightning import seed_everything
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import LightningModule,Trainer
from pytorch_lightning.callbacks import EarlyStopping,LearningRateMonitor,ModelCheckpoint


from sklearn.model_selection import train_test_split

from dotless_arabic.processing import undot,process
from dotless_arabic.tokenizers import CharacterTokenizer
from dotless_arabic.experiments.nlms.src import constants
from dotless_arabic.datasets.poems.collect import collect_dataset_for_meter_classification

In [3]:
random.seed(42)
# os.environ['WANDB_MODE']='disabled'
os.environ['CUDA_LAUNCH_BLOCKING']='1' # to see CUDA errors
torch.cuda.empty_cache() # to free gpu memory
# other options: https://stackoverflow.com/questions/15197286/how-can-i-flush-gpu-memory-using-cuda-physical-reset-is-unavailable
seed_everything(42,workers=True)

Global seed set to 42


42

# Dataset Preparation

In [4]:
dataset = collect_dataset_for_meter_classification()

Found cached dataset parquet (/home/majed_alshaibani/.cache/huggingface/datasets/arbml___parquet/MagedSaeed--ashaar-719bb58a76ea0092/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /home/majed_alshaibani/.cache/huggingface/datasets/arbml___parquet/MagedSaeed--ashaar-719bb58a76ea0092/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-ac36c4a5e19cc65c.arrow


####################################################################################################
Number datasets samples:
254630
####################################################################################################
####################################################################################################
Number datasets samples after filtering non accepted meters:
144186
####################################################################################################


0it [00:00, ?it/s]

####################################################################################################
Sample of datasets samples:
أَصبَحَ المُلك لِلَّذي فَطر الخَل
قَ بِتَقديرٍ للعَزيز العَليمِ
غافر الذَنب للمسيءِ بِعَفوٍ
قابل التَوب ذي العَطاء العَميمِ
مُرسل المُصطَفى البَشير إِلَينا
رَحمة مِنهُ بِالكَلام القَديمِ
رَبَنا رَبّنا إِلَيكَ أَنينا
فَأَجرنا مِن حَر نار الجَحيمِ
وَاكفِنا شَرّ ما نَخاف بِلُطفٍ
يا عَظيماً يَرجى لِكُل عَظيمِ
####################################################################################################
####################################################################################################
Number of Baits:
1,794,858
####################################################################################################
####################################################################################################
Map meter names to classes:
####################################################################################################


  0%|          | 0/1794858 [00:00<?, ?it/s]

In [5]:
set(dataset.values())

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}

In [6]:
number_of_classes = len(set(dataset.values()))
number_of_classes

16

## Train-Test Split

In [7]:
dataset = {
    bait: meter_class
    for bait, meter_class in tqdm(dataset.items())
    if process(bait[0]).strip() and process(bait[1]).strip()
}
len(dataset)

  0%|          | 0/1794858 [00:00<?, ?it/s]

1786467

In [8]:
def cap_baits_dataset(dataset,threshold=5_000):
    capped_dataset = {}
    meters_counts = defaultdict(int)
    for bait,meter_class in tqdm(dataset.items()):
        if meters_counts[meter_class] >= threshold:
            continue
        capped_dataset[bait] = meter_class
        meters_counts[meter_class]+=1
    return capped_dataset

In [9]:
# dataset = cap_baits_dataset(dataset)
len(dataset)

1786467

In [10]:
baits = list(
    map(
        lambda bait: f"{process(bait[0])} # {process(bait[1])}",
        tqdm(dataset.keys()),
    )
)
meters = list(dataset.values())
len(baits), len(meters)

  0%|          | 0/1786467 [00:00<?, ?it/s]

(1786467, 1786467)

Shuffle the dataset

In [11]:
zipped_dataset = list(zip(baits,meters))
random.shuffle(zipped_dataset)
baits,meters = list(map(list,zip(*zipped_dataset)))
baits[:10],meters[:10]

(['وبعدها ان اردتم سوء منقلب # فشاغبوا او فقولوا لا اذا امرا',
  'شهرت لسانا في ودادك جرحه # اذا حز في جلد النفاق رغيب',
  'فكان نهاية استتار ظهوره # وهذا من اعظم الحكم البالغة',
  'تذكرت بالشري ايامنا # وايامنا بكثيب الامر',
  'حاشا ابن محمود الولي المرتضى # اهل الوفاء ومعدن الالاء',
  'وليس سر السرور الا # ضرة اخلاقك الحسان',
  'اني سبرت الخلق طرا اصبحوا # فوضى بلا كيل ولا مقياس',
  'وانت سطوة عز عند حضرتها # هامات اعيان كبار الورى خضعت',
  'صاحبه مفلولس # قليل ذي الحيلل',
  'ان اخصبت او ايسرت فباهلها # وبنيلها تثري البلاد وتخصب'],
 [2, 0, 0, 14, 2, 2, 4, 2, 6, 4])

In [12]:
max_bait_length = len(max(baits,key=len))
# max_bait_length = 78
max_bait_length

110

In [13]:
x_train,x_test,y_train,y_test = train_test_split(baits,meters, test_size=0.05,random_state=42,shuffle=True,stratify=meters)
len(x_train),len(y_train),len(x_test),len(y_test)

(1697143, 1697143, 89324, 89324)

In [14]:
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train, test_size=0.05,random_state=42,shuffle=True,stratify=y_train)
len(x_train),len(y_train),len(x_val),len(y_val)

(1612285, 1612285, 84858, 84858)

In [15]:
class BaitsDataset(Dataset):
    def __init__(
        self,
        X,
        y,
        tokenizer,
        use_tqdm=True,
        undot_text=False,
    ):
        super().__init__()
        max_length = max_bait_length
        X = tqdm(X) if use_tqdm else dataset
        self.encoded_dataset = []
        self.X = X
        self.y = y
        for bait in X:
            if not bait:
                raise
            if undot_text:
                bait = undot(bait)
            tokenized_bait = tokenizer.tokenize_from_splits(bait)
            encoded_bait = []
            for token in tokenized_bait:
                encoded_bait.append(tokenizer.token_to_id(token))
            tokenizer.pad(encoded_bait,length=max_length)
            encoded_bait = encoded_bait[:max_length]
            self.encoded_dataset.append(encoded_bait)

    def __getitem__(self, index):
        inputs = torch.LongTensor(self.encoded_dataset[index])
        outputs = torch.LongTensor([self.y[index]])
        return inputs, outputs

    def __len__(self):
        assert len(self.X) == len(self.y)
        return len(self.encoded_dataset)

# Meter Classification Model

In [16]:
class LitMeterClassificationModel(LightningModule):
    def __init__(
        self,
        vocab_size,
        num_layers=5,
        gru_hiddens=256,
        gru_dropout=0.25,
        dropout_prob=0.333,
        learning_rate=0.001,
        embedding_size=256,
        number_of_classes=number_of_classes,
    ):
        super().__init__()
        self.save_hyperparameters()

        self.vocab_size = vocab_size
        self.num_layers = num_layers
        self.gru_hiddens = gru_hiddens
        self.dropout_prob = dropout_prob
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.number_of_classes = number_of_classes

        self.train_accuracy = torchmetrics.Accuracy(
            task="multiclass",
            num_classes=number_of_classes,
        )
        self.val_accuracy = torchmetrics.Accuracy(
            task="multiclass",
            num_classes=number_of_classes,
        )
        self.test_accuracy = torchmetrics.Accuracy(
            task="multiclass",
            num_classes=number_of_classes,
        )

        self.embedding_layer = nn.Embedding(
            num_embeddings=self.vocab_size,
            embedding_dim=self.embedding_size,
        )
        self.gru_layer = nn.GRU(
            input_size=self.embedding_size,
            hidden_size=self.gru_hiddens,
            num_layers=self.num_layers,
            dropout=gru_dropout,
            batch_first=True,
            bidirectional=True,
        )
        self.first_dense_layer = nn.Linear(
            in_features=self.gru_hiddens,
            out_features=128,
        )
        self.dropout_layer = nn.Dropout(p=self.dropout_prob)
        self.relu = nn.ReLU()
        self.second_dense_layer = nn.Linear(
            in_features=128,
            out_features=self.number_of_classes,
        )

    def forward(self, x, hiddens=None):
        outputs = self.embedding_layer(x)
        outputs, hiddens = self.gru_layer(outputs)
        # https://stackoverflow.com/a/50914946/4412324
        outputs = (
            outputs[:, :, : self.gru_hiddens] + outputs[:, :, self.gru_hiddens :]
        )  # GRUs are bidirectional
        outputs = self.first_dense_layer(outputs)
        outputs = self.dropout_layer(outputs)
        outputs = self.relu(outputs)
        outputs = self.second_dense_layer(outputs)
        return outputs

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        labels = labels.squeeze()  # drop unnecessary dimention
        outputs = outputs[:, -1, :]  # take the results at the last time-step
        loss = F.cross_entropy(outputs, labels)
        train_accuracy = self.train_accuracy(outputs, labels)
        self.log(
            "loss",
            loss,
            on_step=True,
            on_epoch=False,
        )
        self.log(
            "train_acc",
            train_accuracy,
            on_step=True,
            on_epoch=False,
            prog_bar=True,
            logger=True,
        )
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        labels = labels.squeeze()  # drop unnecessary dimention
        outputs = outputs[:, -1, :]  # take the results at the last time-step
        loss = F.cross_entropy(outputs, labels)
        val_accuracy = self.val_accuracy(outputs, labels)
        self.log("val_loss", loss, prog_bar=True)
        self.log(
            "val_acc",
            val_accuracy,
            on_step=True,
            on_epoch=False,
            prog_bar=True,
            logger=True,
        )
        return {"val_loss": loss}

    def test_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        labels = labels.squeeze()  # drop unnecessary dimention
        outputs = outputs[:, -1, :]  # take the results at the last time-step
        loss = F.cross_entropy(outputs, labels)
        test_accuracy = self.test_accuracy(outputs, labels)
        metrics = {"test_acc": test_accuracy, "test_loss": loss}
        self.log_dict(metrics, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr=self.learning_rate,
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer,
            factor=0.1,
            patience=2,
            
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": scheduler,
            "monitor": "val_loss",
        }

In [17]:
lr_monitor = LearningRateMonitor(
    logging_interval="step",
    log_momentum=True,
)

In [18]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmagedsaeed[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [19]:
def get_best_checkpoint(text_type, tokenizer_class=CharacterTokenizer, checkpoints_base_path="MeterClassification"):
    checkpoints_path = (
        f"{checkpoints_base_path}/{text_type}/{tokenizer_class.__name__}/all/checkpoints"
    )
    for file_name in os.listdir(checkpoints_path):
        if file_name.startswith("epoch"):
            return f"{checkpoints_path}/{file_name}"

In [20]:
batch_size = 1024
epochs = 100

# Dotted Experiment

In [21]:
with open("tmp_dataset2.txt", "w") as f:
    f.write("\n".join(item for item in x_train))
dotted_tokenizer = CharacterTokenizer(special_tokens=['#','<##>'])
dotted_tokenizer.train('tmp_dataset2.txt')

Training CharacterTokenizer ...


In [22]:
train_dataset = BaitsDataset(X=x_train,y=y_train,tokenizer=dotted_tokenizer)

  0%|          | 0/1612285 [00:00<?, ?it/s]

In [23]:
val_dataset = BaitsDataset(X=x_val,y=y_val,tokenizer=dotted_tokenizer)

  0%|          | 0/84858 [00:00<?, ?it/s]

In [24]:
test_dataset = BaitsDataset(X=x_test,y=y_test,tokenizer=dotted_tokenizer)

  0%|          | 0/89324 [00:00<?, ?it/s]

In [25]:
dotted_tokenizer.vocab_size

35

In [26]:
train_dataloader = DataLoader(
    shuffle=True,
    dataset=train_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [27]:
val_dataloader = DataLoader(
    shuffle=False,
    dataset=val_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [28]:
test_dataloader = DataLoader(
    shuffle=False,
    dataset=test_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [29]:
model = LitMeterClassificationModel(vocab_size=dotted_tokenizer.vocab_size)
model

LitMeterClassificationModel(
  (train_accuracy): MulticlassAccuracy()
  (val_accuracy): MulticlassAccuracy()
  (test_accuracy): MulticlassAccuracy()
  (embedding_layer): Embedding(35, 256)
  (gru_layer): GRU(256, 256, num_layers=5, batch_first=True, dropout=0.25, bidirectional=True)
  (first_dense_layer): Linear(in_features=256, out_features=128, bias=True)
  (dropout_layer): Dropout(p=0.333, inplace=False)
  (relu): ReLU()
  (second_dense_layer): Linear(in_features=128, out_features=16, bias=True)
)

In [30]:
shutil.rmtree(
    Path(
        f"MeterClassification/dotted/{dotted_tokenizer.__class__.__name__}/all/checkpoints"
    ),
    ignore_errors=True,
)
checkpoint_callback = ModelCheckpoint(
    mode="min",
    save_top_k=1,
    verbose=False,
    save_last=False,
    monitor="val_loss",
    save_weights_only=False,
    auto_insert_metric_name=True,
    save_on_train_epoch_end=False,
    filename="{epoch}-{val_loss:.3f}-{step}",
    dirpath=f"MeterClassification/dotted/{dotted_tokenizer.__class__.__name__}/all/checkpoints",
)

In [31]:
wandb_logger = WandbLogger(
    project="MC",
    name=f"dotted_{dotted_tokenizer.__class__.__name__}-all",
    reinit=True,
)
wandb_logger.watch(model, log="all")

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670670368087788, max=1.0…

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [32]:
early_stopping_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.05,
    patience=10,
    check_finite=True,
)

In [33]:
trainer = Trainer(
    max_epochs=epochs,
    devices=[0],
    deterministic=True,
    logger=wandb_logger,
    val_check_interval=0.5,
    accelerator="cuda",
    log_every_n_steps=max(len(train_dataloader) // 25, 1),
    # default_root_dir=f"LMsModels/{previous_hiddens}",
    callbacks=[
        checkpoint_callback,
        early_stopping_callback,
        lr_monitor,
    ],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [34]:
trainer.validate(
    model=model,
    dataloaders=val_dataloader,
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation: 0it [00:00, ?it/s]

[{'val_loss': 2.7426841259002686}]

In [35]:
trainer.fit(model,train_dataloader,val_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name               | Type               | Params
----------------------------------------------------------
0 | train_accuracy     | MulticlassAccuracy | 0     
1 | val_accuracy       | MulticlassAccuracy | 0     
2 | test_accuracy      | MulticlassAccuracy | 0     
3 | embedding_layer    | Embedding          | 9.0 K 
4 | gru_layer          | GRU                | 5.5 M 
5 | first_dense_layer  | Linear             | 32.9 K
6 | dropout_layer      | Dropout            | 0     
7 | relu               | ReLU               | 0     
8 | second_dense_layer | Linear             | 2.1 K 
----------------------------------------------------------
5.6 M     Trainable params
0         Non-trainable params
5.6 M     Total params
22.257    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [36]:
model = LitMeterClassificationModel.load_from_checkpoint(
    get_best_checkpoint(text_type="dotted")
)
model

LitMeterClassificationModel(
  (train_accuracy): MulticlassAccuracy()
  (val_accuracy): MulticlassAccuracy()
  (test_accuracy): MulticlassAccuracy()
  (embedding_layer): Embedding(35, 256)
  (gru_layer): GRU(256, 256, num_layers=5, batch_first=True, dropout=0.25, bidirectional=True)
  (first_dense_layer): Linear(in_features=256, out_features=128, bias=True)
  (dropout_layer): Dropout(p=0.333, inplace=False)
  (relu): ReLU()
  (second_dense_layer): Linear(in_features=128, out_features=16, bias=True)
)

In [37]:
trainer.test(model,test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

[{'test_acc': 0.9568628668785095, 'test_loss': 0.20164844393730164}]

In [38]:
wandb.finish()

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▆▃▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-Adam-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
test_loss,▁
train_acc,▁▃▆▇████████████████████████████████████
trainer/global_step,▁▁▁▁▁▁▁▂▁▁▃▁▃▁▁▁▁▄▁▁▅▁▅▁▁▁▁▆▂▂▇▂▇▂▂▂▂█▂▂
val_acc,▁▁▁▇▇███████████████████████████████████
val_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,8.0
loss,0.21332
lr-Adam,0.001
lr-Adam-momentum,0.9
test_acc,0.95686
test_loss,0.20165
train_acc,0.95605
trainer/global_step,11805.0
val_acc,0.94531
val_loss,0.20149


# Dotless Experiment

In [39]:
with open("tmp_dataset2.txt", "w") as f:
    f.write("\n".join(undot(item) for item in tqdm(x_train)))
dotless_tokenizer = CharacterTokenizer(special_tokens=['#','<##>'])
dotless_tokenizer.train('tmp_dataset2.txt')

  0%|          | 0/1612285 [00:00<?, ?it/s]

Training CharacterTokenizer ...


In [40]:
train_dataset = BaitsDataset(
    X=x_train,
    y=y_train,
    tokenizer=dotless_tokenizer,
    undot_text=True,
)

  0%|          | 0/1612285 [00:00<?, ?it/s]

In [41]:
val_dataset = BaitsDataset(
    X=x_val,
    y=y_val,
    tokenizer=dotless_tokenizer,
    undot_text=True,
)

  0%|          | 0/84858 [00:00<?, ?it/s]

In [42]:
test_dataset = BaitsDataset(
    X=x_test,
    y=y_test,
    tokenizer=dotless_tokenizer,
    undot_text=True,
)

  0%|          | 0/89324 [00:00<?, ?it/s]

In [43]:
dotless_tokenizer.vocab_size

23

In [44]:
train_dataloader = DataLoader(
    shuffle=True,
    dataset=train_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [45]:
val_dataloader = DataLoader(
    shuffle=False,
    dataset=val_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [46]:
test_dataloader = DataLoader(
    shuffle=False,
    dataset=test_dataset,
    num_workers=32,
    drop_last=True,
    batch_size=batch_size,
)

In [47]:
shutil.rmtree(
    Path(
        f"MeterClassification/dotless/{dotless_tokenizer.__class__.__name__}/all/checkpoints"
    ),
    ignore_errors=True,
)
checkpoint_callback = ModelCheckpoint(
    mode="min",
    save_top_k=1,
    verbose=False,
    save_last=False,
    monitor="val_loss",
    save_weights_only=False,
    auto_insert_metric_name=True,
    save_on_train_epoch_end=False,
    filename="{epoch}-{val_loss:.3f}-{step}",
    dirpath=f"MeterClassification/dotless/{dotless_tokenizer.__class__.__name__}/all/checkpoints",
)

In [48]:
model = LitMeterClassificationModel(
    # num_layers=3,
    # gru_hiddens=128,
    # embedding_size=128,
    # dropout_prob=0.45,
    # gru_dropout=0.25,
    vocab_size=dotless_tokenizer.vocab_size,
)
model

LitMeterClassificationModel(
  (train_accuracy): MulticlassAccuracy()
  (val_accuracy): MulticlassAccuracy()
  (test_accuracy): MulticlassAccuracy()
  (embedding_layer): Embedding(23, 256)
  (gru_layer): GRU(256, 256, num_layers=5, batch_first=True, dropout=0.25, bidirectional=True)
  (first_dense_layer): Linear(in_features=256, out_features=128, bias=True)
  (dropout_layer): Dropout(p=0.333, inplace=False)
  (relu): ReLU()
  (second_dense_layer): Linear(in_features=128, out_features=16, bias=True)
)

In [49]:
wandb_logger = WandbLogger(
    project="MC",
    name=f"dotless_{dotless_tokenizer.__class__.__name__}-all",
    reinit=True,
)
wandb_logger.watch(model, log="all")

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668485337868334, max=1.0…

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [50]:
early_stopping_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.05,
    patience=10,
    check_finite=True,
)

In [51]:
trainer = Trainer(
    max_epochs=epochs,
    devices=[0],
    deterministic=True,
    logger=wandb_logger,
    val_check_interval=0.5,
    accelerator="cuda",
    log_every_n_steps=max(len(train_dataloader) // 25, 1),
    # default_root_dir=f"LMsModels/{previous_hiddens}",
    callbacks=[
        checkpoint_callback,
        early_stopping_callback,
        lr_monitor,
    ],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [52]:
trainer.validate(
    model=model,
    dataloaders=val_dataloader,
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation: 0it [00:00, ?it/s]

[{'val_loss': 2.780548572540283}]

In [53]:
trainer.fit(model,train_dataloader,val_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name               | Type               | Params
----------------------------------------------------------
0 | train_accuracy     | MulticlassAccuracy | 0     
1 | val_accuracy       | MulticlassAccuracy | 0     
2 | test_accuracy      | MulticlassAccuracy | 0     
3 | embedding_layer    | Embedding          | 5.9 K 
4 | gru_layer          | GRU                | 5.5 M 
5 | first_dense_layer  | Linear             | 32.9 K
6 | dropout_layer      | Dropout            | 0     
7 | relu               | ReLU               | 0     
8 | second_dense_layer | Linear             | 2.1 K 
----------------------------------------------------------
5.6 M     Trainable params
0         Non-trainable params
5.6 M     Total params
22.245    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [54]:
model = LitMeterClassificationModel.load_from_checkpoint(
    get_best_checkpoint(text_type="dotless")
)
model

LitMeterClassificationModel(
  (train_accuracy): MulticlassAccuracy()
  (val_accuracy): MulticlassAccuracy()
  (test_accuracy): MulticlassAccuracy()
  (embedding_layer): Embedding(23, 256)
  (gru_layer): GRU(256, 256, num_layers=5, batch_first=True, dropout=0.25, bidirectional=True)
  (first_dense_layer): Linear(in_features=256, out_features=128, bias=True)
  (dropout_layer): Dropout(p=0.333, inplace=False)
  (relu): ReLU()
  (second_dense_layer): Linear(in_features=128, out_features=16, bias=True)
)

In [55]:
trainer.test(model,test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

[{'test_acc': 0.949858546257019, 'test_loss': 0.21966329216957092}]

In [56]:
wandb.finish()

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
loss,█▄▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-Adam-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_acc,▁
test_loss,▁
train_acc,▁▆▇▇████████████████████████████████████
trainer/global_step,▁▁▁▁▁▂▁▂▁▁▁▁▃▁▁▁▄▁▁▁▅▁▁▁▁▆▁▆▁▆▂▂▂▂▂▂▇▂█▂
val_acc,▁▁█▇████████████████████████████████████
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,11.0
loss,0.23941
lr-Adam,0.001
lr-Adam-momentum,0.9
test_acc,0.94986
test_loss,0.21966
train_acc,0.94531
trainer/global_step,16527.0
val_acc,0.93457
val_loss,0.22216
