In [1]:
import mlflow
import os
import importlib
from transformers import pipeline

summarization_model = pipeline("summarization")
sentiment_model = pipeline("sentiment-analysis")
translation_model = pipeline('translation_en_to_fr')
image_classification = pipeline("image-classification")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to t5-base and revision 686f1db (https://huggingface.co/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512

In [2]:
import download_file

def predict_to_save(sentiment_model,summarization_model, translation_model,image_classification,input_type, data = None,min_length = 0, max_length = 150):
    """
    make and combine the prediction of all the differents models on the differents scoring tables

    params_scoring dictionnary: dictionnary that contain all the scoring tables on which models will...
    ... make predictions. It have to be a dictionnary as the predict function of mlflow only take a single argument.

    Return Pandas dataframe with all the fraud risk score (and few others informations) on the remise batch
    Return interpretation_remises pandas dataframe with the interpretation for the remises model
    Return interpretation_client pandas dataframe with the interpretation for the clients models
    """
    
    if input_type == "sentiment":
        return sentiment_model(data)
    elif input_type == "translation":
        return translation_model(data)
    elif input_type == "image":
        image = download_file.download_image(data)
        return image_classification(image)
    elif input_type == "summarization":

        if data is None:
            data = download_file.download_story()

        dict_result = summarization_model(data, min_length, max_length)[0]
        dict_result["input_text"] = data
        return dict_result

    return "mauvais type selectionné"


In [4]:
import cloudflow
importlib.reload(cloudflow)

tracking_uri ="/home/quinten/Projets/BPRI - Fraude/test_package_mlflow"
experiment_id = "quinten_test" 

cloudflow.prepare_env(tracking_uri,experiment_id)

with mlflow.start_run(experiment_id = experiment_id) as run:
    
    print("RUN ID : ", run.info.run_id)
    
    mlflow.log_metric('test_metrics', 0.99)

    model = cloudflow.cloudflow_model("DEBUG")        
    model.save(tracking_uri    = tracking_uri,
               experiment_id = experiment_id,
               run_id = run.info.run_id, 
               predict_function = predict_to_save, 
               models = {"summarization_model"  : summarization_model, 
                         "image_classification" : image_classification,
                         "translation_model"    : translation_model,
                         "sentiment_model"      : sentiment_model})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


2023-02-12 16:10:49,727.727 INFO save: Artifact path --> /home/quinten/Projets/BPRI - Fraude/test_package_mlflow/mlruns/quinten_test/226dc785fb264a03b097037756fccc64
2023-02-12 16:10:49,728.728 DEBUG get_all_subfolders: /home/quinten/Utilisateurs/slemouellic/MLFLOW_save --> Subfolder analysis
2023-02-12 16:10:49,730.730 DEBUG get_all_subfolders: /home/quinten/Utilisateurs/slemouellic/MLFLOW_save/sub_folder --> Subfolder analysis
2023-02-12 16:10:49,733.733 DEBUG get_all_subfolders: /home/quinten/Utilisateurs/slemouellic/MLFLOW_save/sub_folder/subsub_folder --> Subfolder analysis
2023-02-12 16:10:49,735.735 DEBUG get_all_subfolders: /home/quinten/Utilisateurs/slemouellic/MLFLOW_save/logs --> Subfolder analysis
2023-02-12 16:10:49,742.742 DEBUG register_all_function_by_value: download_file --> Analysis
2023-02-12 16:10:49,743.743 DEBUG register_all_function_by_value: <module 'download_file' from '/home/quinten/Utilisateurs/slemouellic/MLFLOW_save/download_file.py'> --> Enregistrement
202

RUN ID :  226dc785fb264a03b097037756fccc64


2023-02-12 16:11:19,978.978 DEBUG pickle_artifacts: pickle model DONE
2023-02-12 16:11:19,979.979 DEBUG pickle_artifacts: model --> image_classification
2023-02-12 16:11:19,980.980 DEBUG pickle_artifacts: pickle model --> image_classification
2023-02-12 16:11:19,981.981 DEBUG pickle_artifacts: pickle model path --> /home/quinten/Projets/BPRI - Fraude/test_package_mlflow/mlruns/quinten_test/226dc785fb264a03b097037756fccc64/image_classification.pkl
2023-02-12 16:11:25,013.013 DEBUG pickle_artifacts: pickle model DONE
2023-02-12 16:11:25,014.014 DEBUG pickle_artifacts: model --> translation_model
2023-02-12 16:11:25,016.016 DEBUG pickle_artifacts: pickle model --> translation_model
2023-02-12 16:11:25,017.017 DEBUG pickle_artifacts: pickle model path --> /home/quinten/Projets/BPRI - Fraude/test_package_mlflow/mlruns/quinten_test/226dc785fb264a03b097037756fccc64/translation_model.pkl
2023-02-12 16:11:46,879.879 DEBUG pickle_artifacts: pickle model DONE
2023-02-12 16:11:46,881.881 DEBUG pic