In [1]:
import os
%pwd

'c:\\Users\\lpaes\\projects\\Xray_img_detection\\research'

In [2]:
# os.chdir('../')
# %pwd

'c:\\Users\\lpaes\\projects\\Xray_img_detection'

In [3]:
import tensorflow as tf

In [36]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ExperimentConfig:
    path_of_model: Path
    training_data: Path
    validation_data: Path
    metric_file_name: Path
    base_model_path: Path
    updated_base_model_path: Path
    all_params: dict
    params_image_size: list
    params_batch_size: int
    params_is_augmentation: bool
    mlflow_uri: str
    artifact_uri : Path
    mlflow_experiment: Path

In [40]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories, save_json

class ConfigurationManager:
    os.environ['MLFLOW_TRACKING_URI']='https://dagshub.com/Laidson/Xray_img_detection.mlflow'
    os.environ['MLFLOW_TRACKING_USERNAME']='Laidson' 
    os.environ['MLFLOW_TRACKING_PASSWORD']='c6d4314eb632a1695545c2970d5966edfa2c8a4c' 

    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root, self.config.evaluation.root_dir])
    
    def get_experiment_config(self) -> ExperimentConfig:

        config = self.config
        experiment_config = ExperimentConfig(
            path_of_model = Path(config.training.trained_model_path),
            training_data = Path(config.data_ingestion.train_dir),
            validation_data = Path(config.data_ingestion.test_dir),
            metric_file_name = Path(config.evaluation.metric_file_name),
            base_model_path = Path(config.prepare_base_model.base_model_path),
            updated_base_model_path = Path(config.prepare_base_model.updated_base_model_path),
            all_params = self.params,
            params_image_size = self.params.IMAGE_SIZE,
            params_batch_size = self.params.BATCH_SIZE,
            params_is_augmentation = self.params.AUGMENTATION,
            mlflow_uri=os.environ['MLFLOW_TRACKING_URI'],
            artifact_uri = Path(config.evaluation.artifact_uri),
            mlflow_experiment = Path(config.evaluation.mlflow_experiment),
                       )
        return experiment_config



In [58]:
from urllib.parse import urlparse
import mlflow

class Experiment:
    def __init__(self, config:ExperimentConfig):
        self.config = config

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)
    
    def get_base_model(self):
        self.model = tf.keras.applications.vgg16.VGG16(
            input_shape=self.config.params_image_size,
            weights=self.config.all_params.WEIGHTS,
            include_top=self.config.all_params.INCLUDE_TOP
        )

        self.save_model(path=self.config.base_model_path, model=self.model)
        
        return self.model

    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
        if freeze_all:
            for layer in model.layers:
                model.trainable = False
        elif (freeze_till is not None) and (freeze_till > 0):
            for layer in model.layers[:-freeze_till]:
                model.trainable = False

        flatten_in = tf.keras.layers.Flatten()(model.output)
        prediction = tf.keras.layers.Dense(
            units=classes,
            activation="softmax"
        )(flatten_in)

        full_model = tf.keras.models.Model(
            inputs=model.input,
            outputs=prediction
        )

        full_model.compile(
            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=["accuracy"]
        )

        full_model.summary()
        return full_model
    
    def update_base_model(self):
        self.full_model = self._prepare_full_model(
            model=self.model,
            classes=self.config.all_params.CLASSES,#params_classes,
            freeze_all=True,
            freeze_till=None,
            learning_rate=self.config.all_params.LEARNING_RATE,#params_learning_rate
        )

        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)

        return self.full_model
    
    def experiment(self):#, callback_list:list):

        # **** PREPARE DATA TO MODEL ****
        datagenerator_kwargs = dict(rescale = 1./225, 
                                    #validation_split = 0.20,
                                    )

        dataflow_kwargs = dict(target_size = self.config.params_image_size[:-1],
                               batch_size = self.config.params_batch_size,
                               interpolation = 'bilinear',
                               class_mode='categorical',
                               shuffle=False, # Set to False to ensure the order of samples is preserved
                              ) 
        
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)

        self.valid_generator = valid_datagenerator.flow_from_directory(directory=self.config.validation_data,
                                                                       #subset='validation',
                                                                       **dataflow_kwargs)
        # Display the first batch of images and their labels
        # Visualize the first image and its label
        #self._validate_image_transformation(data_generator=self.valid_generator, img_idx=0)
        
        
        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            pass
            #train_datagenerator = valid_datagenerator
        train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)
        self.train_generator = train_datagenerator.flow_from_directory(
                directory = self.config.training_data,
                #subset='training',
                #shuffle=True,
                **dataflow_kwargs
            )

        # Display the first batch of images and their labels
        # Visualize the first image and its label
        #self._validate_image_transformation(data_generator=self.train_generator, img_idx=0)


        # **** TRAIN A MODEL ****
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        mlflow.set_tracking_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():

            self.get_base_model()
            
            model = self.update_base_model()

            model.fit(
                self.train_generator,
                epochs = self.config.all_params.EPOCHS,
                steps_per_epoch = self.steps_per_epoch,
                validation_steps = self.validation_steps,
                validation_data = self.valid_generator,
                #callbacks = callback_list,
            )

             # Log metrics
            (loss, accuracy) = model.evaluate(self.valid_generator)
            mlflow.log_metric('loss', loss)
            mlflow.log_metric('accuracy', accuracy)

             # Log parameters
            mlflow.log_params(datagenerator_kwargs) # If using ConfigBox, convert it to a dictionary
            mlflow.log_params(self.config.all_params.to_dict()) # If using ConfigBox, convert it to a dictionary

            # List of keys to keep in the new dictionary
            desired_keys = ['interpolation', 'class_mode', 'shuffle']
            # Create the new dictionary using dictionary comprehension
            new_dict = {key: dataflow_kwargs[key] for key in desired_keys}
            mlflow.log_params(new_dict) # If using ConfigBox, convert it to a dictionary

            #Model registry does not work file store
            if tracking_url_type_store != False:
                mlflow.tensorflow.log_model(model, artifact_path="model")

In [59]:
try:
    config = ConfigurationManager()
    val_config = config.get_experiment_config()
    experiment = Experiment(val_config)
    # evaluation.evaluation()
    # evaluation.save_score()
    experiment.experiment()
except Exception as e:
   raise e

[2023-08-03 14:49:50,130: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-08-03 14:49:50,136: INFO: common: yaml file: params.yaml loaded successfully]
[2023-08-03 14:49:50,141: INFO: common: created directory at: artifacts]
[2023-08-03 14:49:50,143: INFO: common: created directory at: artifacts/evaluation]


Found 624 images belonging to 2 classes.
Found 4763 images belonging to 2 classes.
Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                          

  saving_api.save_model(


 block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         
                                                                 
 block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         
                                                                 
 flatten_4 (Flatten)         (None, 25088)             0         
                                                                 
 dense_4 (Dense)             (None, 2)                 50178     
                                                                 
Total params: 14764866 (56.32 MB)
Trainable params: 50178 (196.01 KB)
Non-tr



INFO:tensorflow:Assets written to: C:\Users\lpaes\AppData\Local\Temp\tmpbg7jl_iu\model\data\model\assets
[2023-08-03 15:09:59,236: INFO: builder_impl: Assets written to: C:\Users\lpaes\AppData\Local\Temp\tmpbg7jl_iu\model\data\model\assets]


