In [1]:
%pwd
%cd ..

/home/gorkemkola/Desktop/Projects/cv_project


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from pathlib import Path
from dataclasses import dataclass
# import datasets.ImageFolder
from torchvision import datasets
from typing import Dict, List, Tuple, Optional
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score,
    precision_recall_curve, roc_curve
)
import warnings
warnings.filterwarnings('ignore')

from ensemblenet import logger
from ensemblenet.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from ensemblenet.utils import read_yaml, create_directories
from torchvision import models, transforms

In [3]:
@dataclass(frozen=True)
class TestConfig:
    root_dir: Path
    models_dir: Path
    results_dir: Path
    test_dir: Path
    best_model_paths: List[Path]
    hyperparams_paths: List[Path]
    model_names: List[str]
    params: Dict[str, any]
    hyperparams: List[pd.DataFrame]
    training_metrics: List[pd.DataFrame]
    batch_size: int

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH,
    ) -> None:
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories(
            [
                self.config.artifacts_root,
                Path(self.config.test.root_dir),
                Path(self.config.test.results_dir),
            ]
        )

    def get_test_config(self) -> TestConfig:
        # Define model names based on your training artifacts
        model_names = sorted(self.config.test.model_names)
        best_model_paths = [self.config.test.best_model_path_template.format(model_name=name) for name in model_names]
        hyperparams_paths = [self.config.test.hyperparams_path_template.format(model_name=name) for name in model_names]
        hyperparams = [pd.read_csv(path) for path in hyperparams_paths]
        training_metric_paths = [self.config.test.training_metrics_path_template.format(model_name=name) for name in model_names]
        training_metrics = [pd.read_csv(path) for path in training_metric_paths]
        test_config = TestConfig(
            root_dir=Path(self.config.test.root_dir),
            models_dir=Path(self.config.test.models_dir),
            results_dir=Path(self.config.test.results_dir),
            test_dir=Path(self.config.data_ingestion.test_dir),
            best_model_paths=best_model_paths,
            hyperparams_paths=hyperparams_paths,
            model_names=model_names,
            params=self.params,
            hyperparams=hyperparams,
            training_metrics=training_metrics,
            batch_size=self.params.BATCH_SIZE
        )
        
        return test_config

In [None]:
ConfigurationManager().get_test_config()

[2025-06-21 09:22:38,554: INFO: utils: yaml file config/config.yaml loaded successfully:]
[2025-06-21 09:22:38,559: INFO: utils: yaml file config/params.yaml loaded successfully:]
[2025-06-21 09:22:38,560: INFO: utils: created directory at: artifacts:]
[2025-06-21 09:22:38,561: INFO: utils: created directory at: artifacts/test:]
[2025-06-21 09:22:38,561: INFO: utils: created directory at: artifacts/test/results:]


TestConfig(root_dir=PosixPath('artifacts/test'), models_dir=PosixPath('artifacts/training'), results_dir=PosixPath('artifacts/test/results'), test_dir=PosixPath('artifacts/data_ingestion/test'), best_model_paths=['artifacts/training/model_ensemblenet_best.pth', 'artifacts/training/model_ensemblenet1_best.pth', 'artifacts/training/model_inception_v3_best.pth', 'artifacts/training/model_mnasnet_best.pth', 'artifacts/training/model_mobilenet_best.pth', 'artifacts/training/model_mobilenet_best.pth', 'artifacts/training/model_resnet_best.pth', 'artifacts/training/model_shufflenet_best.pth', 'artifacts/training/model_squeezenet_best.pth', 'artifacts/training/model_vit_best.pth'], hyperparams_paths=['artifacts/training/logs/hyperparameters_ensemblenet.csv', 'artifacts/training/logs/hyperparameters_ensemblenet1.csv', 'artifacts/training/logs/hyperparameters_inception_v3.csv', 'artifacts/training/logs/hyperparameters_mnasnet.csv', 'artifacts/training/logs/hyperparameters_mobilenet.csv', 'artifa

In [6]:
from ensemblenet.custom_models.ensemblenet import EnsembleNet

In [7]:
@dataclass(frozen=True)
class Models:
    resnet = models.resnet50
    inceptionnet_v3 = models.inception_v3
    ensemblenet = EnsembleNet
    mnasnet = models.mnasnet0_5
    mobilenet = models.mobilenet_v3_small
    squeezenet = models.squeezenet1_1
    shufflenet = models.shufflenet_v2_x0_5

In [None]:
model = torch.load(
    ConfigurationManager().get_test_config().best_model_paths[0],
    map_location=torch.device('cpu'),
    weights_only=False
)

[2025-06-21 09:22:38,694: INFO: utils: yaml file config/config.yaml loaded successfully:]
[2025-06-21 09:22:38,696: INFO: utils: yaml file config/params.yaml loaded successfully:]
[2025-06-21 09:22:38,697: INFO: utils: created directory at: artifacts:]
[2025-06-21 09:22:38,697: INFO: utils: created directory at: artifacts/test:]
[2025-06-21 09:22:38,698: INFO: utils: created directory at: artifacts/test/results:]


In [None]:
class Test:
    def __init__(self, config: TestConfig) -> None:
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        logger.info(f"Using device: {self.device}")


    def _load_model(self, model_path: Path) -> nn.Module:
        model = torch.load(
            model_path,
            map_location=torch.device('cpu'),
            weights_only=False
        )
        return model.to(self.device)
    
    def load_models(self) -> List[nn.Module]:
        models = {}
        for i, model_path in enumerate(self.config.best_model_paths):
            model_name = self.config.model_names[i]
            model = self._load_model(model_path)
            models[model_name] = model
            logger.info(f"Loaded model from {model_path}")
            print(f"Loaded model from {model_path}")
        return models
        
    def compute_dataset_mean_std(self, data_dir, image_size=(224, 224), batch_size=64, sample_limit=None):
        """
        Computes mean and std for all images in a directory (ImageFolder structure).
        Uses cached .npy arrays if available, otherwise computes and saves them.
        """
        mean_path = Path(data_dir) / "mean.npy"
        std_path = Path(data_dir) / "std.npy"

        if mean_path.exists() and std_path.exists():
            logger.info(f"Loading cached mean/std from {mean_path} and {std_path}")
            mean = np.load(mean_path)
            std = np.load(std_path)
            return mean.tolist(), std.tolist()

        transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor()
        ])
        dataset = datasets.ImageFolder(data_dir, transform=transform)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)

        n_images = 0
        mean = 0.
        std = 0.
        for i, (imgs, _) in enumerate(loader):
            if sample_limit and n_images >= sample_limit:
                break
            imgs = imgs.view(imgs.size(0), imgs.size(1), -1)
            mean += imgs.mean(2).sum(0)
            std += imgs.std(2).sum(0)
            n_images += imgs.size(0)
            if sample_limit and n_images >= sample_limit:
                break

        mean /= n_images
        std /= n_images

        np.save(mean_path, mean.cpu().numpy() if hasattr(mean, "cpu") else mean)
        np.save(std_path, std.cpu().numpy() if hasattr(std, "cpu") else std)
        logger.info(f"Saved computed mean/std to {mean_path} and {std_path}")

        return mean.tolist(), std.tolist()
    
    def load_test_dataset(self, data_dir: Path, image_size=(224, 224), batch_size=64, sample_limit=None):
        """
        Loads the test dataset from the specified directory.
        """
        mean, std = self.compute_dataset_mean_std(data_dir, image_size, batch_size, sample_limit)
        transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std)
        ])
        dataset = datasets.ImageFolder(data_dir, transform=transform)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)
        return loader
    
    def evaluate_model(self, model_name, model: nn.Module, dataloader: DataLoader) -> Dict[str, float]:
        """
        Evaluates the model on the given dataloader.
        Returns a dictionary with accuracy, precision, recall, f1-score, and AUC.
        """
        
        model.eval()
        all_preds = []
        all_labels = []
        # compute evaluation time
        logger.info(f"Evaluating model: {model.__class__.__name__}")
        print(f"Evaluating model: {model.__class__.__name__}")
        start_time = torch.cuda.Event(enable_timing=True) if self.device.type == 'cuda' else None
        if start_time:
            start_time.record()
        criterion = nn.CrossEntropyLoss()
        losses = []
        
        # add top5 accuracy
        top5_correct = 0
        top5_total = 0
        if start_time:
            start_time.record()
        if self.device.type == 'cuda':
            model = model.to(self.device)
            criterion = criterion.to(self.device)
        else:
            model = model.cpu()
            criterion = criterion.cpu()
        
        with torch.no_grad():
            for images, labels in dataloader:
                images = images.to(self.device)
                labels = labels.to(self.device)
                
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                losses.append(loss.item())
                # Store predictions and labels
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                # Compute top-5 accuracy
                _, top5_preds = torch.topk(outputs, 5, dim=1)
                top5_correct += (top5_preds == labels.view(-1, 1)).sum().item()
                top5_total += labels.size(0)
        # Compute top-5 accuracy
        top5_accuracy = top5_correct / top5_total if top5_total > 0 else 0
        logger.info(f"Top-5 Accuracy: {top5_accuracy:.4f}")
        
        if start_time:
            end_time = torch.cuda.Event(enable_timing=True)
            end_time.record()
            torch.cuda.synchronize()
            eval_time = start_time.elapsed_time(end_time) / 1000  # Convert to seconds
        
        loss = np.mean(losses)
        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
        recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
        f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
        # Compute confusion matrix and classification report
            
        results_path = self.config.results_dir / "results.csv"
        if not results_path.exists():
            pd.DataFrame(columns=["model", "loss", "accuracy", "top 5 accuracy", "precision", "recall", "f1"]).to_csv(results_path, index=False)
        results_df = pd.read_csv(results_path)
        new_row = {
            "model": model_name,
            "loss": loss,
            "accuracy": accuracy,
            "top 5 accuracy": top5_accuracy,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "time": eval_time if start_time else None
        }

        new_row = pd.DataFrame([new_row])
        results_df = pd.concat([results_df, new_row], ignore_index=True)
        results_df.to_csv(results_path, index=False)
        logger.info(f"Saved evaluation results to {results_path}")

    def evaluate_all_models(self):
        """
        Loads all models and evaluates them
        """
        models = self.load_models()
        test_loader = self.load_test_dataset(self.config.test_dir, 
                                             image_size=(224, 224), 
                                             batch_size=self.config.batch_size, )
        
        for model_name, model in models.items():
            logger.info(f"Evaluating model: {model_name}")
            self.evaluate_model(model_name, model, test_loader)

In [None]:
if __name__ == "__main__":
    config_manager = ConfigurationManager()
    test_config = config_manager.get_test_config()
    test_instance = Test(test_config)
    test_instance.evaluate_all_models()
    logger.info("Testing completed and metrics plots saved.")

[2025-06-21 09:22:38,979: INFO: utils: yaml file config/config.yaml loaded successfully:]
[2025-06-21 09:22:38,982: INFO: utils: yaml file config/params.yaml loaded successfully:]
[2025-06-21 09:22:38,983: INFO: utils: created directory at: artifacts:]
[2025-06-21 09:22:38,984: INFO: utils: created directory at: artifacts/test:]
[2025-06-21 09:22:38,985: INFO: utils: created directory at: artifacts/test/results:]
[2025-06-21 09:22:39,024: INFO: 2630596512: Using device: cuda:]
[2025-06-21 09:22:39,246: INFO: 2630596512: Loaded model from artifacts/training/model_ensemblenet_best.pth:]
Loaded model from artifacts/training/model_ensemblenet_best.pth
[2025-06-21 09:22:39,354: INFO: 2630596512: Loaded model from artifacts/training/model_ensemblenet1_best.pth:]
Loaded model from artifacts/training/model_ensemblenet1_best.pth
[2025-06-21 09:22:39,558: INFO: 2630596512: Loaded model from artifacts/training/model_inception_v3_best.pth:]
Loaded model from artifacts/training/model_inception_v3_b