In [1]:
import os
#It goes back to the previous folder (main folder) as we're in research folder
os.chdir('../')

In [2]:
print(os.getcwd())

d:\Data Science\Python Assignment\End to End Kidney Disease Detection


In [None]:
#is a mapping object that represents the user’s OS environmental variables. It returns a dictionary having the user’s environmental variable as key and their values as value.
os.environ['ML_FLOW_TRACKING_UI']='https://dagshub.com/SepNem32bit/End-to-End-Kidney-Disease-Classification.mlflow'
os.environ['ML_FLOW_TRACKING_USERNAME']=""
os.environ['ML_FLOW_TRACKING_PASSWORD']=""

Loading the pre-trained model

In [None]:
import tensorflow as tf
tf.keras.models.load_model("artifacts\training_ML\model.h5")

Update the entity

In [None]:
from dataclasses import dataclass
from pathlib import Path

#returning all config parameters which will be used in arrow function output entities
#@dataclass(frozen=True) is a decorator in Python's dataclasses module that makes instances of the class immutable after creation. 
# When applied, it provides the following benefits:
# Immutability: Once an obSject is created, you cannot change its attributes. This is enforced by raising an error if there is any attempt to modify the instance's fields.
# Hashability: Instances of the class become hashable (i.e., you can use them as keys in dictionaries or add them to sets) as long as all their fields are hashable.
@dataclass(frozen=True)
class EvaluationMLConfig:
    trained_model_path: Path
    training_data: Path
    mlflow_uri:str
    all_params: dict
    params_batch_size:int
    params_image_size:list


Update the configuration manager in src config

In [None]:
from DiseaseClassifier.constants import *
from DiseaseClassifier.utils.common import read_yaml, create_directories, save_json


    
class ConfigurationManager:
    def __init__(self,
                 #from constants
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        #init file for referencing the config and params files
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        #we have artifacts_root in config file
        #It'll go through directories and create artifacts folder and subfolders
        create_directories([self.config.artifacts_root])

    #This method creates the directory structure needed for the base machine learning configuration 
    #and returns an instance of PrepareBaseMLConfig
    #we've defined DataIngestionConfig class above
    def get_evaluation_ML_config(self)->EvaluationMLConfig:

        evaluation_ml_config=EvaluationMLConfig(
            trained_model_path= "artifacts\training_ML\model.h5",
            training_data= "artifacts/data_ingestion/kidney-ct-scan-image-samples",
            mlflow_uri='https://dagshub.com/SepNem32bit/End-to-End-Kidney-Disease-Classification.mlflow',
            all_params= self.params,
            params_batch_size=self.params.BATCH_SIZE,
            params_image_size=self.params.IMAGE_SIZE
            )
        return evaluation_ml_config


In [None]:
from DiseaseClassifier.constants import CONFIG_FILE_PATH
print(CONFIG_FILE_PATH)

config\config.yaml


In [None]:
import os
from pathlib import Path
import mlflow
import mlflow.keras
from urllib.parse import urlparse


#What kwargs Does:
# It enables you to pass an arbitrary number of keyword arguments (arguments with names) to a function.
# Inside the function, kwargs is treated as a dictionary, where each key corresponds to the argument name and each value corresponds to the argument value.

class Evaluation:
    def __init__(self,config:EvaluationMLConfig):
        self.config=config

    #This code was copied from the keras documentation
    #This code configures TensorFlow's ImageDataGenerator objects to load and process image data from a directory for both training and validation purposes. 
    #It handles optional data augmentation for the training data and ensures that validation data is loaded WITHOUT augmentation
    
    def _valid_generator(self):
        #It sets up data generators for both training and validation datasets using the images from a specified directory.
        #The variable datagenerator_kwargs is used to pass common preprocessing settings to the ImageDataGenerator in your code, 
        #but it was not explicitly defined in the snippet. Typically, this dictionary would contain parameters that control how the images are preprocessed before being fed into the model.
        datagenerator_kwargs = dict(
            #This is used to normalize the pixel values in the images. For instance, rescale=1./255 scales pixel values from [0, 255] to [0, 1], which is common when feeding image data into neural networks.
            rescale = 1./255,
            #it will load the whole data then split
            validation_split=0.30
        )

        #all properties for the dataflow
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            #Chooses how to resize the images
            interpolation="bilinear"
        )
        #The ImageDataGenerator is initialized using datagenerator_kwargs, which is presumably a dictionary containing common preprocessing steps
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )
        
        #generating validation data
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

    #independent function which can be accessed from anywhere
    @staticmethod
    def load_model(path:Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)
    


    def evaluation(self):
        self.model=self.load_model(self.config.trained_model_path)
        self._valid_generator
        self.score=self.model.evaluate(self.valid_generator)
        self.save_score()

    def save_score(self):
        scores={"loss":self.score[0],"accuracy":self.score[1]}
        #from common in utility
        save_json(path=Path('score.json'),data=scores)


    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        # Start a run
        with mlflow.start_run():
            #logging all parameters and metrics
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                {"loss": self.score[0], "accuracy": self.score[1]}
            )
            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
            else:
                #saving the model
                mlflow.keras.log_model(self.model, "model")


# you can avoid creating a separate _valid_generator method by directly defining the validation data generator inside the evaluate method. 
# This keeps the code more concise, as the generator configuration is only needed within the context of the evaluation process.    
#     def evaluate(self):
#         # Load the model
#         model = self.load_model()

#         # Directly create the validation data generator
#         valid_generator = tf.keras.preprocessing.image.ImageDataGenerator(
#             rescale=1.0 / 255, validation_split=0.30
#         ).flow_from_directory(
#             directory=self.config.training_data,
#             target_size=self.config.params_image_size[:-1],
#             batch_size=self.config.params_batch_size,
#             interpolation="bilinear",
#             subset="validation",
#             shuffle=False
#         )

#         # Evaluate model and save score
#         score = model.evaluate(valid_generator)
#         scores = {"loss": score[0], "accuracy": score[1]}
#         save_json(Path('score.json'), data=scores)

#         # Log to MLflow
#         self.log_to_mlflow(model, scores)
    

Creating the data pipeline

In [None]:
try:
    config= ConfigurationManager()
    evaluation_config=config.get_evaluation_ML_config()
    evaluation=Evaluation(evaluation_config)
    evaluation.evaluation()
    evaluation.log_into_mlflow() 
except Exception as e:
    raise e


[2024-10-29 16:13:04,999: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-29 16:13:05,036: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-29 16:13:05,038: INFO: common: created directory at: <class 'pathlib.Path'>]
[2024-10-29 16:13:05,039: INFO: common: created directory at: <class 'pathlib.Path'>]
Found 40 images belonging to 2 classes.
Found 160 images belonging to 2 classes.


ValueError: Unknown variable: <KerasVariable shape=(25088, 2), dtype=float32, path=dense/kernel>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.