# Kidney Disease Classification using MLflow-DVC

## Workflows:

1. Update config.yaml
2. Update secrets.yaml [Optional]
3. Update params.yaml
4. Update the entity
5. Update the configuration manager in src config
6. Update the components
7. Update the pipeline 
8. Update the main.py
9. Update the dvc.yaml
10. app.py

In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
pwd

'd:\\data_science\\kidney_disease_classification'

MLFLOW_TRACKING_URI=https://dagshub.com/apri4u/kidney_disease_classification.mlflow \
MLFLOW_TRACKING_USERNAME=apri4u \
MLFLOW_TRACKING_PASSWORD=fded9025b0f1f046e473ab3a841fc37501954985 \
python script.py

In [4]:
os.environ["MLFLOW_TRACKING_URI"] = "https://dagshub.com/apri4u/kidney_disease_classification.mlflow"
os.environ["MLFLOW_TRACKING_USERNAME"] = "apri4u"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "fded9025b0f1f046e473ab3a841fc37501954985"

In [5]:
import tensorflow as tf

In [6]:
model = tf.keras.models.load_model("artifacts/model_training/model.hd5/")

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [7]:
# Entity
from dataclasses import dataclass
from pathlib import Path

@dataclass (frozen=True)
class EvaluationConfig:
    path_of_model: Path
    training_data: Path
    all_params: dict
    mlflow_uri: str
    params_image_size: list
    params_batch_size: int

In [8]:
# Creating configuration

import os
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories# Placeholder content
from cnnClassifier.entity.config_entity import DataIngestionConfig, PrepareBaseModelConfig, ModelTrainingConfig, EvaluationConfig

class ConfigurationManager:
    def __init__(self,
                 config_path = CONFIG_FILE_PATH,
                 params_path = PARAMS_FILE_PATH,
                 ):
        self.config = read_yaml(config_path)
        self.params = read_yaml(params_path)
        
        create_directories([self.config.artifacts_root])
    
    def model_evaluation_config(self) -> EvaluationConfig:
        training =self.config.model_training
        params = self.params
        create_directories([training.root_dir])
        
        evaluation_config = EvaluationConfig(
            path_of_model=os.path.join("artifacts", "model_training", "model.hd5"),
            training_data=os.path.join("artifacts", "data_ingestion", "raw", "kidney-ct-scan-image"),
            all_params=self.params,
            mlflow_uri="https://dagshub.com/apri4u/kidney_disease_classification.mlflow",
            params_image_size=self.params.IMAGE_SIZE,
            params_batch_size=self.params.BATCH_SIZE,
            
        )
        
        return evaluation_config

In [22]:
# components

# Update the components
from pathlib import Path
from cnnClassifier.entity.config_entity import EvaluationConfig
from cnnClassifier.utils.common import save_json
import mlflow
from urllib.parse import urlparse

class ModelEvaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
    
    def _test_ds(self):
        img_height = self.config.params_image_size[:-1][0]
        img_width = self.config.params_image_size[:-1][1]
        img_size = (img_height, img_width)
        
        self.test_ds = tf.keras.utils.image_dataset_from_directory(
                self.config.training_data,
                validation_split=0.3,
                subset="validation",
                seed=123,
                image_size=img_size,
                batch_size=self.config.params_batch_size)
        
        # Optimizing
        AUTOTUNE = tf.data.AUTOTUNE

        self.test_ds = self.test_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
        
        # Normalizing
        normalization_layer = tf.keras.layers.Rescaling(1./255)
        
        self.test_ds = self.test_ds.map(lambda x, y: (normalization_layer(x), y))          
        
    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)
    
    def evaluation(self):
        self.model = self.load_model(self.config.path_of_model)
        self._test_ds()
        self.score = self.model.evaluate(self.test_ds)
    
    def save_score(self):
        scores = {"loss": self.score[0],
                  "accuracy": self.score[1]}
        save_json(path=Path("scores.json"), data=scores)
    
    def log_onto_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_uri_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics({"loss": self.score[0],
                  "accuracy": self.score[1]})
            
            # Model registry won't work with filestorage
            if tracking_uri_type_store != "file":
                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.sklearn.log_model(
                    self.model, "model", registered_model_name="VGG16_Model"
                )
            else:
                mlflow.sklearn.log_model(self.model, "model")
        

In [23]:
# Update the pipeline 
# Updating pipeline
from cnnClassifier.utils import logger
try:
    config = ConfigurationManager()
    model_evaluation_config = config.model_evaluation_config()
    model = ModelEvaluation(model_evaluation_config)
    model.evaluation()
    model.save_score()
    model.log_onto_mlflow()
    logger.info(f"Evaluation pipeline succesful!")
except Exception as e:
    raise e

[2023-12-19 18:44:23,514: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-19 18:44:23,515: INFO: common: yaml file: params.yaml loaded successfully]
Found 465 files belonging to 2 classes.
Using 139 files for validation.
[2023-12-19 18:44:28,881: INFO: common: json file saved at: scores.json]


Registered model 'VGG16_Model' already exists. Creating a new version of this model...
2023/12/19 18:44:41 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: VGG16_Model, version 2


[2023-12-19 18:44:41,128: INFO: 3735879283: Evaluation pipeline succesful!]


Created version '2' of model 'VGG16_Model'.
