In [10]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import cv2
from torchvision import transforms, models
from PIL import Image
import os
import random

# Path setup

In [11]:
# Path untuk model yang sudah ditraining
MODEL_PATH = r"/kaggle/input/resnetgradcam/fold_5_best_model.pth"

# Path dataset
DATASET_PATH = r"/kaggle/input/data-val"

# Path untuk menyimpan hasil Grad-CAM
OUTPUT_DIR = r"Grad-CAM Result"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Class names
CLASS_NAMES = ['other_activities', 'safe_driving', 'talking_phone', 'texting_phone', 'turning']

# Fungsi gradcam

In [12]:
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        
        # Hook untuk mendapatkan aktivasi dan gradients
        self.target_layer.register_forward_hook(self.forward_hook)
        self.target_layer.register_full_backward_hook(self.backward_hook)
    
    def forward_hook(self, module, input, output):
        self.activations = output
    
    def backward_hook(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]
    
    def generate_cam(self, input_image, target_class=None):
        # Forward pass
        output = self.model(input_image)
        
        if target_class is None:
            target_class = output.argmax(dim=1).item()
        
        # Zero gradients
        self.model.zero_grad()
        
        # Backward pass untuk class target
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        output.backward(gradient=one_hot)
        
        # Calculate weights
        gradients = self.gradients.detach().cpu().numpy()[0]
        activations = self.activations.detach().cpu().numpy()[0]
        
        weights = np.mean(gradients, axis=(1, 2))
        
        # Generate CAM
        cam = np.zeros(activations.shape[1:], dtype=np.float32)
        for i, w in enumerate(weights):
            cam += w * activations[i]
        
        # Apply ReLU
        cam = np.maximum(cam, 0)
        
        # Normalize
        cam = cam - np.min(cam)
        cam = cam / (np.max(cam) + 1e-8)
        
        return cam, output, target_class

# Load trained model

In [13]:
def load_model(model_path, num_classes=5):
    """Load trained ResNet-18 model"""
    model = models.resnet18(weights=False)
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Load checkpoint and support multiple common formats:
    # - a plain state_dict saved via torch.save(model.state_dict(), path)
    # - a checkpoint dict with keys like 'model_state_dict' or 'state_dict'
    checkpoint = torch.load(model_path, map_location=device)

    if isinstance(checkpoint, dict):
        if 'model_state_dict' in checkpoint:
            state_dict = checkpoint['model_state_dict']
        elif 'state_dict' in checkpoint:
            state_dict = checkpoint['state_dict']
        else:
            # Fallback: assume the dict *is* the state_dict
            state_dict = checkpoint
    else:
        # checkpoint is likely an OrderedDict state_dict
        state_dict = checkpoint

    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()

    return model

# Preprocessing 

In [14]:
def preprocess_image(image_path):
    """Preprocess image for ResNet-18"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path).convert('RGB')
    original_image = np.array(image)
    input_tensor = transform(image).unsqueeze(0).to(device)
    
    return input_tensor, original_image

# Generate dan visualisasi grad-cam dengan 3 subplot

In [15]:
def visualize_gradcam_simple(image_path, model, target_layer, true_class, save_path):
    
    # Preprocess image
    input_tensor, original_image = preprocess_image(image_path)
    
    # Initialize Grad-CAM
    grad_cam = GradCAM(model, target_layer)
    
    # Generate CAM
    cam, output, predicted_class = grad_cam.generate_cam(input_tensor)
    
    # Get confidence
    probabilities = torch.softmax(output, dim=1)[0].detach().cpu().numpy()
    confidence = probabilities[predicted_class]
    
    # Resize CAM to match original image
    cam_resized = cv2.resize(cam, (original_image.shape[1], original_image.shape[0]))
    cam_resized = np.uint8(255 * cam_resized)
    heatmap = cv2.applyColorMap(cam_resized, cv2.COLORMAP_JET)
    
    # Convert original (PIL -> numpy) which is in RGB to BGR so it matches OpenCV heatmap
    original_bgr = cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR)

    # Superimpose heatmap on original (both in BGR)
    superimposed_img = heatmap.astype(float) * 0.4 + original_bgr.astype(float) * 0.6
    superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)

    # Convert BGR results back to RGB for matplotlib
    superimposed_img_rgb = cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB)
    heatmap_rgb = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    
    # Class names
    class_names = ['other_activities', 'safe_driving', 'talking_phone', 
                   'texting_phone', 'turning']
    
    # Determine if prediction is correct
    is_correct = (predicted_class == true_class)
    result_text = "CORRECT" if is_correct else "WRONG"
    result_color = "green" if is_correct else "red"
    
    # Create figure with 3 subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original image
    axes[0].imshow(original_image)
    axes[0].set_title(f'Original\nTrue: {class_names[true_class]}', fontsize=12)
    axes[0].axis('off')
    
    # Heatmap
    axes[1].imshow(heatmap_rgb)
    axes[1].set_title('Grad-CAM Heatmap', fontsize=12)
    axes[1].axis('off')
    
    # Superimposed image dengan informasi prediksi
    axes[2].imshow(superimposed_img_rgb)
    pred_class_name = class_names[predicted_class]
    true_class_name = class_names[true_class]
    
    title = (f'Overlay\nPred: {pred_class_name}\n'
             f'True: {true_class_name}\n'
             f'Result: {result_text}\n'
             f'Conf: {confidence:.3f}')
    
    axes[2].set_title(title, color=result_color, fontsize=12)
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=300)
    plt.close('all')

    return is_correct, predicted_class, confidence

# Analisis prediksi sampel dan save hasil

In [16]:
def analyze_class_predictions_simple(model, target_layer):
    """Analisis prediksi untuk SEMUA sampel di setiap kelas dan simpan hasil"""
    
    print("Starting analysis of predictions for each class...")
    print("Processing ALL samples from each class...\n")
    
    # Dictionary untuk melacak hasil
    class_results = {class_name: {'correct': [], 'wrong': []} for class_name in CLASS_NAMES}
    
    # Iterasi melalui setiap kelas
    for class_idx, class_name in enumerate(CLASS_NAMES):
        print(f"\nAnalyzing class: {class_name}")
        
        class_path = os.path.join(DATASET_PATH, class_name)
        
        if not os.path.exists(class_path):
            print(f"  Class path not found: {class_path}")
            continue
        
        # Get all images in the class
        all_images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
        
        if not all_images:
            print(f"  No images found in {class_path}")
            continue
        
        print(f"  Found {len(all_images)} images to process...")
        
        # Process SEMUA images (tanpa batasan)
        for idx, img_name in enumerate(all_images, 1):
            img_path = os.path.join(class_path, img_name)
            
            try:
                # Create save path
                save_filename = f"{class_name}_{img_name.split('.')[0]}_gradcam.png"
                save_path = os.path.join(OUTPUT_DIR, save_filename)
                
                # Generate dan simpan Grad-CAM
                is_correct, predicted_class, confidence = visualize_gradcam_simple(
                    image_path=img_path,
                    model=model,
                    target_layer=target_layer,
                    true_class=class_idx,
                    save_path=save_path
                )
                
                # Store results
                result_info = {
                    'image_path': img_path,
                    'predicted_class': predicted_class,
                    'confidence': confidence,
                    'save_path': save_path
                }
                
                result_type = 'correct' if is_correct else 'wrong'
                class_results[class_name][result_type].append(result_info)
                
                status = "✓" if is_correct else "✗"
                print(f"  [{idx}/{len(all_images)}] {status} {img_name} -> {CLASS_NAMES[predicted_class]} (conf: {confidence:.3f})")
                
            except Exception as e:
                print(f"  [{idx}/{len(all_images)}] Error processing {img_name}: {str(e)}")
                continue
    
    return class_results

# Print summary analisis

In [17]:
def print_analysis_summary(class_results):
    
    print("\n" + "="*60)
    print("ANALYSIS SUMMARY")
    print("="*60)
    
    total_correct = 0
    total_wrong = 0
    
    for class_name in CLASS_NAMES:
        correct_count = len(class_results[class_name]['correct'])
        wrong_count = len(class_results[class_name]['wrong'])
        total_samples = correct_count + wrong_count
        
        total_correct += correct_count
        total_wrong += wrong_count
        
        accuracy = correct_count / total_samples if total_samples > 0 else 0
        
        print(f"{class_name:20} | Correct: {correct_count:2d} | Wrong: {wrong_count:2d} | "
              f"Total: {total_samples:2d} | Accuracy: {accuracy:6.2%}")
    
    print("-"*60)
    overall_accuracy = total_correct / (total_correct + total_wrong) if (total_correct + total_wrong) > 0 else 0
    print(f"{'OVERALL':20} | Correct: {total_correct:2d} | Wrong: {total_wrong:2d} | "
          f"Total: {total_correct + total_wrong:2d} | Accuracy: {overall_accuracy:6.2%}")

# Main execution

In [18]:
if __name__ == "__main__":
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Load model
    print("Loading model...")
    try:
        model = load_model(MODEL_PATH, num_classes=5)
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {e}")
        exit()
    
    # Get target layer
    target_layer = model.layer4[1].conv2
    
    # Analyze predictions untuk SEMUA sampel di setiap kelas
    results = analyze_class_predictions_simple(
        model=model, 
        target_layer=target_layer
    )
    
    # Print summary
    print_analysis_summary(results)
    
    print(f"\nAll Grad-CAM visualizations saved to: {OUTPUT_DIR}")

Using device: cuda
Loading model...
Model loaded successfully!
Starting analysis of predictions for each class...
Processing ALL samples from each class...


Analyzing class: other_activities
  Found 237 images to process...
  [1/237] ✓ img_17534.jpg -> other_activities (conf: 1.000)
  [2/237] ✓ img_20458.jpg -> other_activities (conf: 1.000)
  [3/237] ✓ IMG_20240930_143931282_HDR_AE.jpg -> other_activities (conf: 0.997)
  [4/237] ✓ img_23170.jpg -> other_activities (conf: 1.000)
  [5/237] ✓ 2019-04-2417-06-34.png -> other_activities (conf: 1.000)
  [6/237] ✓ IMG_20240930_141510006_HDR.jpg -> other_activities (conf: 0.983)
  [7/237] ✓ img_14778.jpg -> other_activities (conf: 1.000)
  [8/237] ✓ img_20846.jpg -> other_activities (conf: 1.000)
  [9/237] ✓ img_17696.jpg -> other_activities (conf: 1.000)
  [10/237] ✓ IMG_20240930_140112031_HDR_AE.jpg -> other_activities (conf: 1.000)
  [11/237] ✓ img_27433.jpg -> other_activities (conf: 0.998)
  [12/237] ✓ img_16688.jpg -> other_activities 