In [1]:
import os

In [2]:
os.getcwd()

'e:\\STUDY\\TENSORFLOW\\Projects\\1_CNN_Project\\research'

In [3]:
os.chdir("../")

In [4]:
os.getcwd()

'e:\\STUDY\\TENSORFLOW\\Projects\\1_CNN_Project'

In [5]:
import dagshub

"""
Initialize a DagsHub repository or DagsHub-related functionality.

Initialization includes:
Creates a repository on DagsHub if it doesn’t exist yet.

If dvc flag is set, adds the DagsHub repository as a dvc remote.

If mlflow flag is set, initializes MLflow environment variables to enable 
logging experiments into the DagsHub hosted MLflow. That means that if you call 
dagshub.init() in your script, then any MLflow function called later in the script
will log to the DagsHub hosted MLflow.

"""
# Approach1 : 

# dagshub.init(repo_owner='Aakash00004', 
#              repo_name='Chest-Cancer-Classification-Project', 
#              mlflow=True)

# import mlflow

# with mlflow.start_run():
#     mlflow.log_param('Learning rate', 0.01)
#     mlflow.log_metric('Accuracy', 0.8)


# Approach 2 : 

# below code : use it when we want to give the experiment name and inside it we want to store all our runs for the experiment tracking.  

# dagshub.init(repo_owner='Aakash00004',
#              repo_name='Chest-Cancer-Classification-Project',
#              mlflow=True)

# import mlflow

# mlflow.set_experiment("My Chest Cancer Experiment")  # Set the experiment name

# with mlflow.start_run():
#     mlflow.log_param('Learning rate', 0.01)
#     mlflow.log_metric('Accuracy', 0.8)

'\nInitialize a DagsHub repository or DagsHub-related functionality.\n\nInitialization includes:\nCreates a repository on DagsHub if it doesn’t exist yet.\n\nIf dvc flag is set, adds the DagsHub repository as a dvc remote.\n\nIf mlflow flag is set, initializes MLflow environment variables to enable \nlogging experiments into the DagsHub hosted MLflow. That means that if you call \ndagshub.init() in your script, then any MLflow function called later in the script\nwill log to the DagsHub hosted MLflow.\n\n'

#### NOTE : 
- below credentials are stored in the current session state only.

In [6]:
# Uncomment below lines of code iff above dagshub code along with mlflow is uncommented and ran

#print(os.getenv("MLFLOW_TRACKING_URI"))
#print(os.getenv("MLFLOW_TRACKING_USERNAME") )
#print(os.getenv("MLFLOW_TRACKING_PASSWORD") )
# '591674d59a270a3d488ce0b6498bc23ee1aa4d5f'

In [7]:
import tensorflow as tf

In [8]:
# load the trained model
model=tf.keras.models.load_model("trained_model/training/trained_model.keras")

In [9]:
model.summary()

In [10]:
# Update the src/cnn_classifier/entity/config_entity.py file

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    trained_model_path:Path
    training_data:Path
    testing_data:Path
    all_params:dict
    mlflow_uri:str
    params_image_size:list | tuple
    params_batch_size:int

In [11]:
# Update the src/cnn_classifier/config/configuration.py file.

import os
import dagshub
from cnn_classifier.constants import *
from cnn_classifier.utils.common import read_yaml, create_directories, save_json

In [12]:
class ConfigurationManager:

    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):

        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)

        # Creating directory
        create_directories([self.config.artifacts_root])

    def initialize_dagshub(self,
                           Repo_owner:str='Aakash00004' ,
                           Repo_name:str='Chest-Cancer-Classification-Project' ,
                           Mlflow:bool=True):
        
        """
        Below details of Repo_owner , Repo_name & Mlflow value are collected from the dagshub account under 'remote' option of the repo : Chest-Cancer-Classification-Project
        Repo_owner='Aakash00004' ,
        Repo_name='Chest-Cancer-Classification-Project' ,
        Mlflow=True
        
        This code initialize a DagsHub repository or DagsHub-related functionality.
        
        Initialization includes:
        Creates a repository on DagsHub if it doesn’t exist yet.

        If dvc flag is set, adds the DagsHub repository as a dvc remote.

        If mlflow flag is set, initializes MLflow environment variables to enable 
        logging experiments into the DagsHub hosted MLflow. That means that if you call 
        dagshub.init() in your script, then any MLflow function called later in the script
        will log to the DagsHub hosted MLflow.

        """
        dagshub.init(repo_owner=Repo_owner, 
                     repo_name=Repo_name, 
                     mlflow=Mlflow)
        
    
    def get_evaluation_config(self) -> EvaluationConfig:

        training_data=Path(os.path.join(self.config.data_ingestion.unzip_dir, r"data/train") )
        testing_data=Path(os.path.join(self.config.data_ingestion.unzip_dir, r"data/test") )
        
        # calling method
        self.initialize_dagshub()

        eval_config=EvaluationConfig(
            trained_model_path="trained_model/training/trained_model.keras",
            training_data=Path(training_data),
            testing_data=Path(testing_data),
            #mlflow_uri="https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow",
            mlflow_uri=os.getenv("MLFLOW_TRACKING_URI"),
            all_params=self.params,
            params_image_size=self.params.INPUT_SHAPE,
            params_batch_size=self.params.BATCH_SIZE
        )
        
        return eval_config

In [13]:
# Update the src/cnn_classifier/components/model_evaluation_mlflow.py file

import tensorflow as tf
from pathlib import Path
import mlflow
import dagshub
import mlflow.keras
from urllib.parse import urlparse
from tensorflow.keras.utils import image_dataset_from_directory as Images

In [None]:
class Evaluation:

    def __init__(self, config:EvaluationConfig):

        self.config = config

    def get_trained_model(self):

        return tf.keras.models.load_model( self.config.trained_model_path
                                         )   
                                            
    def get_test_data(self):

        self.images_test = Images(
                            directory=self.config.testing_data,
                            labels='inferred',
                            label_mode="categorical", # use loss=tf.keras.losses.CategoricalCrossentropy() because label_mode is set to 'categorical'
                            image_size = self.config.params_image_size[:-1],
                            batch_size = self.config.params_batch_size
                            )


    def save_eval_score(self):

        scores = {"loss": self.score[0], "accuracy": self.score[1]}
        save_json(path=Path("ModelEvaluation_scores.json"), data=scores)


    def model_evaluation(self):

        # calling method
        self.trained_model = self.get_trained_model()
        # calling method
        self.get_test_data()
        
        # Evaluating the trained model performance on test data
        self.score = self.trained_model.evaluate(self.images_test)
        
        # calling method
        self.save_eval_score()


    def log_into_mlflow(self ,Repo_Owner:str='Aakash00004',
                        Repo_Name:str='Chest-Cancer-Classification-Project' ,
                        MlFlow:bool=True ):

        """
        This initializes the integration with DagsHub for the specified repository.

        The mlflow=True argument ensures MLflow logs (parameters, metrics, artifacts) are 
        synchronized with the DagsHub repository i.e., it sets the registry_uri as well for 
        MLflow tracking server where runs, parameters, metrics, and artifacts will be logged..

        """
        # No need of below code for  Repository Aakash00004/Chest-Cancer-Classification-Project to be
        # initialized! because we've initialized the repo inside the 
        # method 'get_evaluation_config' of class 'ConfigurationManager' inside src/cnn_classifier/config/configuration.py file
          
        # dagshub.init(repo_owner=Repo_Owner,
        #              repo_name=Repo_Name,
        #              mlflow=MlFlow)
        
        
        #For debugging purpose : 
        # print("Mlflow tracking URI",mlflow.get_tracking_uri())
       
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        #For debugging purpose :
        #print("After dagshub.init of mlflow : ",tracking_url_type_store)
        
        # Set the experiment name
        mlflow.set_experiment("My Chest Cancer Experiment")  
        
        with mlflow.start_run():

            #For debugging purpose :
            #print("inside mlflow.start_run() ")
            
            #For debugging purpose :
            #print(self.config.all_params)
            
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                                {"loss": self.score[0], "accuracy": self.score[1] }
                            )
            
            # Model registry does not work with file store
            
            #For debugging purpose :
            #print("Before if statement of mlflow tracking_url_type_store : ",tracking_url_type_store)
            if tracking_url_type_store != "file":

                #For debugging purpose :
                #print("Inside if statement of mlflow tracking_url_type_store : ")

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.keras.log_model(self.trained_model,
                                       "model",
                                       registered_model_name="Custom_VGG16_Model")

            else:

                #For debugging purpose :
                #print("Inside else statment of mlflow")
                mlflow.keras.log_model(self.trained_model, "model")
        

In [None]:
# Pipeline : update the src/cnn_classifier/pipeline/Stage04_model_evaluation_mlflow.py file

try:
    
    config = ConfigurationManager()
    eval_config = config.get_evaluation_config()
    evaluation = Evaluation(eval_config)
    evaluation.model_evaluation()
    # comment below line while deploying the project to production because there we dont want expermient tracking and model logging
    #evaluation.log_into_mlflow()

except Exception as e:
   raise e

[2025-04-26 09:04:39,639 : INFO : common : yaml file : config\config.yaml loaded successfully]
[2025-04-26 09:04:39,639 : INFO : common : yaml file : params.yaml loaded successfully]
[2025-04-26 09:04:39,639 : INFO : common : Created directory at : artifacts]
[2025-04-26 09:04:40,161 : INFO : _client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-04-26 09:04:40,168 : INFO : helpers : Accessing as Aakash00004]
[2025-04-26 09:04:40,725 : INFO : _client : HTTP Request: GET https://dagshub.com/api/v1/repos/Aakash00004/Chest-Cancer-Classification-Project "HTTP/1.1 200 OK"]
[2025-04-26 09:04:41,243 : INFO : _client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-04-26 09:04:41,247 : INFO : helpers : Initialized MLflow to track repo "Aakash00004/Chest-Cancer-Classification-Project"]


[2025-04-26 09:04:41,247 : INFO : helpers : Repository Aakash00004/Chest-Cancer-Classification-Project initialized!]
Found 174 files belonging to 2 classes.
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 477ms/step - accuracy: 0.9785 - loss: 0.1537
[2025-04-26 09:04:46,938 : INFO : common : json file saved at : ModelEvaluation_scores.json]


Registered model 'Custom_VGG16_Model' already exists. Creating a new version of this model...
2025/04/26 09:05:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Custom_VGG16_Model, version 9
Created version '9' of model 'Custom_VGG16_Model'.


🏃 View run abundant-newt-841 at: https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow/#/experiments/1/runs/ad557fb7282d4dffa065fc9fc79c3e8e
🧪 View experiment at: https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow/#/experiments/1


In [16]:
# mlflow_uri="https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow"
# mlflow.set_registry_uri(mlflow_uri)

In [17]:
mlflow.get_registry_uri()

'https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow'

In [18]:
mlflow.get_tracking_uri()

'https://dagshub.com/Aakash00004/Chest-Cancer-Classification-Project.mlflow'

In [19]:
urlparse(mlflow.get_tracking_uri())

ParseResult(scheme='https', netloc='dagshub.com', path='/Aakash00004/Chest-Cancer-Classification-Project.mlflow', params='', query='', fragment='')

In [20]:
urlparse(mlflow.get_tracking_uri()).scheme 

'https'

In [21]:
if urlparse(mlflow.get_tracking_uri()).scheme  != "file":

    print("hello")

hello
