In [1]:
import os
import tempfile
import shutil

import torch
import flash
import mlflow
import torchmetrics

from flash.text import TextClassificationData,TextClassifier
from pytorch_lightning.utilities.model_summary import ModelSummary



In [2]:
os.environ["AWS_ACCESS_KEY_ID"] = "minio"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minio123"
mlflow.environment_variables.MLFLOW_S3_ENDPOINT_URL = "http://127.0.0.1:9000"

In [3]:
data_module = TextClassificationData.from_csv(
    input_field="review",
    target_fields="sentiment",
    train_file="../data/imdb/train.csv",
    test_file="../data/imdb/test.csv",
    val_file="../data/imdb/valid.csv",
    batch_size=64
)

Map:   0%|          | 0/22500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
metrics = [torchmetrics.Accuracy(num_classes=data_module.num_classes),
           torchmetrics.F1Score(num_classes=data_module.num_classes),
           torchmetrics.Precision(num_classes=data_module.num_classes),
           torchmetrics.Recall(num_classes=data_module.num_classes)]

In [5]:
model = TextClassifier(backbone="prajjwal1/bert-tiny",
                       num_classes=data_module.num_classes,
                       labels=data_module.labels,
                       metrics=metrics)

trainer = flash.Trainer(max_epochs=3,gpus=torch.cuda.device_count())

Using 'prajjwal1/bert-tiny' provided by Hugging Face/transformers (https://github.com/huggingface/transformers).
Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification

In [6]:
EXPERIMENT_NAME = "full_fledged"
if not mlflow.get_experiment_by_name(EXPERIMENT_NAME):
    mlflow.create_experiment(name=EXPERIMENT_NAME,artifact_location="http://127.0.0.1:9000")
    
mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
print(experiment.experiment_id)

737308629393873003


In [7]:
def save_summary__file_to_model_artifact(file_content_string,filename="model_summary.txt"):
    tempdir = tempfile.mkdtemp()
    try:
        summary_file = os.path.join(tempdir,filename)
        with open(summary_file,"w") as f:
            f.write(file_content_string)
        
        # save the file to artifact
        mlflow.log_artifact(local_path=summary_file)
    finally:
        shutil.rmtree(tempdir)

In [8]:
with mlflow.start_run(experiment_id=experiment.experiment_id,
                      run_name="custom_log") as model_track_run:
    trainer.finetune(model,datamodule=data_module,strategy="freeze")
    

    # logging the metric
    cur_metric = trainer.callback_metrics
    final_metrics = {k:v.item() for k,v in cur_metric.items()}
    mlflow.log_metrics(final_metrics)
    
    # logging parameters
    params = {"epochs":trainer.max_epochs}
    if hasattr(trainer,"optimizers"):
        optimizer = trainer.optimizers[0]
        params["optimizer_name"] = optimizer.__class__.__name__
        if hasattr(optimizer,"defaults"):
            params.update(optimizer.defaults)
    
    params.update(model.hparams)
    mlflow.log_params(params)
    
    # logging the model summary
    summary = ModelSummary(model,max_depth=-1)
    save_summary__file_to_model_artifact(str(summary))
    
    mlflow.pytorch.log_model(pytorch_model=model,
                             artifact_path="sentiment_analysis",
                             registered_model_name="sentiment_analysis")
    
    

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | train_metrics | ModuleDict         | 0     
1 | val_metrics   | ModuleDict         | 0     
2 | test_metrics  | ModuleDict         | 0     
3 | adapter       | HuggingFaceAdapter | 4.4 M 
-----------------------------------------------------
258       Trainable params
4.4 M     Non-trainable params
4.4 M     Total params
17.545    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.
Successfully registered model 'sentiment_analysis'.
2023/03/23 17:40:28 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: sentiment_analysis, version 1
Created version '1' of model 'sentiment_analysis'.
