In [1]:
import os
import cProfile
import pstats
import nest_asyncio
import asyncio



In [2]:
%pwd

'c:\\Users\\etrou\\OneDrive\\Desktop\\SE489GroupProjectGit\\group_project_se489\\notebooks'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\etrou\\OneDrive\\Desktop\\SE489GroupProjectGit\\group_project_se489'

In [5]:
import gdown
from se489_group_project import logger

#executer = ThreadPoolExecutor(max_workers=4)

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class CreateBaseModelConfig:
    root_dir: Path
    base_model_path: Path
    updated_base_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_classes: int

In [7]:
from se489_group_project.constants import *
from se489_group_project.utility.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.data_storage])

    

    def get_prepare_base_model_config(self) -> CreateBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([config.root_dir])

        prepare_base_model_config = CreateBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_classes=self.params.CLASSES
        )

        return prepare_base_model_config

In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf


In [10]:
class PrepareBaseModel:
    def __init__(self, config: CreateBaseModelConfig):
        self.config = config

    
    async def get_base_model(self):
        self.model = tf.keras.applications.vgg16.VGG16(
            input_shape=self.config.params_image_size,
            weights=self.config.params_weights,
            include_top=self.config.params_include_top
        )

        self.save_model(path=self.config.base_model_path, model=self.model)
    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
        if freeze_all:
            for layer in model.layers:
                model.trainable = False
        elif (freeze_till is not None) and (freeze_till > 0):
            for layer in model.layers[:-freeze_till]:
                model.trainable = False

        flatten_in = tf.keras.layers.Flatten()(model.output)

        batch_norm = tf.keras.layers.BatchNormalization()(flatten_in)

        prediction = tf.keras.layers.Dense(
            units=classes,
            activation="softmax"
        )(batch_norm)

        full_model = tf.keras.models.Model(
            inputs=model.input,
            outputs=prediction
        )

        full_model.compile(
            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(reduction='auto'),
            metrics=["accuracy"]
        )

        full_model.summary()
        return full_model
    
    async def update_base_model(self):
        self.full_model = self._prepare_full_model(
            model=self.model,
            classes=self.config.params_classes,
            freeze_all=True,
            freeze_till=None,
            learning_rate=self.config.params_learning_rate
        )

        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)

    
        
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)

In [11]:
def analyze(file):
    p = pstats.Stats(file)
    # Print top 10 functions sorted by cumulative time
    print("\nTop 10 functions sorted by cumulative time:")
    p.sort_stats('cumtime').print_stats(10)
    
    # Print top 10 functions sorted by total time
    print("\nTop 10 functions sorted by total time:")
    p.sort_stats('tottime').print_stats(10)
    

In [12]:
import subprocess
import pstats

async def main():
    
    try:
        log_dir = os.path.join(os.getcwd(), "se489_group_project", "visualizations")
        file = os.path.join(log_dir, 'cprofile_stats_prepare_base_model.prof')

        config = ConfigurationManager()
        prepare_base_model_config = config.get_prepare_base_model_config()
        prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
        profiler = cProfile.Profile()
        profiler.enable()
        logger.info("Preparing Base Model")
        await prepare_base_model.get_base_model()
        logger.info("Updating Base Model")
        await prepare_base_model.update_base_model()
        profiler.disable()
        profiler.dump_stats(file)
        
        profile_file_full_path = os.path.abspath(file)
        #await analyze(profile_file_full_path)
        #Automatically open snakeviz to visualize the profiling results
        try:
            subprocess.Popen(["snakeviz", profile_file_full_path])
        except FileNotFoundError:
            print("snakeviz is not installed or not found in the system path.")


    except Exception as e:
        logger.error("An Error Occurred: ${e}")
        raise e

In [13]:
if __name__ == "__main__":
    nest_asyncio.apply()
    asyncio.run(main())

[2024-05-30 16:27:51,382: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-30 16:27:51,385: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-30 16:27:51,387: INFO: common: created directory at: data]
[2024-05-30 16:27:51,388: INFO: common: created directory at: data/prepare_base_model]
[2024-05-30 16:27:51,389: INFO: 3117042638: Preparing Base Model]
[2024-05-30 16:27:51,998: INFO: 3117042638: Updating Base Model]
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                              