# From resnet_model_jf.ipynb

In [None]:
import os

# Define the number of epochs
num_epochs = 10

# Train the model
for epoch in range(num_epochs):
    # Train the model on the training set
    model.train()
    train_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        # Move the data to the device
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update the training loss
        train_loss += loss.item() * inputs.size(0)

        # Print status every 10 batches
        if (i + 1) % 50 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    # Save the model every 4 epochs starting from epoch 10
    if (epoch >= 3 and epoch % 2 == 0) or epoch == num_epochs - 1:
        save_path=os.path.join(os.getcwd(), f"../output/v3_test_2207_resnet50_256_epoch_{epoch+1}.pth")
        #save_path = f"saved_models/model_epoch_{epoch}.pth"
        torch.save(model.state_dict(), save_path)
        print(f"Model saved at {save_path}")

    # Evaluate the model on the test set
    model.eval()
    test_loss = 0.0
    test_acc = 0.0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_loader):
            # Move the data to the device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Update the test loss and accuracy
            test_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            test_acc += torch.sum(preds == labels.data)

    # Print the training and test loss and accuracy
    train_loss /= len(train_dataset)
    test_loss /= len(val_dataset)
    test_acc = test_acc.double() / len(val_dataset)
    print(f"Epoch [{epoch + 1}/{num_epochs}] Train Loss: {train_loss:.4f} Test Loss: {test_loss:.4f} Test Acc: {test_acc:.4f}")

    LIME model to explain resnet50
    slow and confusing, abondon

In [None]:
from lime import lime_image
from lime.wrappers.scikit_image import SegmentationAlgorithm
from skimage.segmentation import quickshift, mark_boundaries
from PIL import Image
import numpy as np
import torch
import matplotlib.pyplot as plt

# Function to get model predictions for Lime
def batch_predict(images):
    model.eval()
    pil_images = [Image.fromarray(image.astype('uint8'), 'RGB') for image in images]
    batch = torch.stack([transform(image) for image in pil_images], dim=0)
    batch = batch.to(device)
    with torch.no_grad():
        logits = model(batch)
    return torch.nn.functional.softmax(logits, dim=1).cpu().numpy()

# Load the image
# img_path = os.path.join(os.getcwd(), "../parsed_data/selected_images/MICROSOFT/MICROSOFT.20180213.91886A548BC3B41FA0C0148C47F19A07.jpg")
img_path = 'Data/KnownImages/UrlScreenshotNew/MTBANK.20190528.3C42F865D0CF7AC06FA3CB78A0C1BC67.png'
# img = Image.open(img_path).convert('RGB')
with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
                with zip_ref.open(img_path) as file:
                    img = Image.open(io.BytesIO(file.read())).convert('RGB')

# Create a Lime image explainer
explainer = lime_image.LimeImageExplainer()

# Use a faster segmentation algorithm with optimized parameters
segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=10, max_dist=200, ratio=0.2)

# Reduce the number of samples for faster explanation
explanation = explainer.explain_instance(np.array(img), 
                                         batch_predict, 
                                         top_labels=5, 
                                         hide_color=0, 
                                         num_samples=200,  # Reduced from 1000 to 200
                                         segmentation_fn=segmentation_fn)

# Show the top prediction explanation
top_label = explanation.top_labels[0]
temp, mask = explanation.get_image_and_mask(top_label, positive_only=True, num_features=10, hide_rest=False)

# Convert temp to float to allow blending
temp = temp.astype(np.float32)

# Create a dark overlay
dark_overlay = np.zeros_like(temp)
dark_overlay[:, :] = [0, 0, 0]  # Black color
alpha = 0.6  # Transparency factor for the unrecognized parts

# Blend the original image with the dark overlay based on the mask
highlighted_image = temp * mask[:, :, np.newaxis] + dark_overlay * (1 - mask[:, :, np.newaxis]) * alpha + temp * (1 - mask[:, :, np.newaxis]) * (1 - alpha)

# Clip values to ensure they are within valid range
highlighted_image = np.clip(highlighted_image, 0, 255)

# Display the image
plt.figure()
plt.imshow(highlighted_image / 255.0)
plt.axis('off')
plt.title(f"Explanation for class: {class_names[top_label]}")
plt.show()

    grad-CAM using transparency heatmap
    important parts can see, not important parts blank

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import resnet50
from PIL import Image
import matplotlib.pyplot as plt
import os
import zipfile
import io
import cv2

# Define the transform to preprocess the image
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the trained ResNet50 model
def load_model(model_path, num_classes):
    model = resnet50(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

# Function to load and preprocess the image
def load_and_preprocess_image(img_path, transform):
    # Choose from zip or folder
    with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
        with zip_ref.open(img_path) as file:
            img = Image.open(io.BytesIO(file.read())).convert('RGB')
    img = transform(img)
    img = img.unsqueeze(0)  # Add batch dimension
    return img

# Hook to extract gradients
class SaveFeatures:
    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.features = None
        self.gradients = None

    def hook_fn(self, module, input, output):
        self.features = output
        output.register_hook(self.save_gradient)

    def save_gradient(self, grad):
        self.gradients = grad

    def close(self):
        self.hook.remove()

# Grad-CAM
def generate_gradcam(model, img_tensor, target_class):
    # Get the feature extractor
    target_layer = model.layer4[-1]
    save_features = SaveFeatures(target_layer)

    # Forward pass
    output = model(img_tensor)
    one_hot_output = torch.FloatTensor(1, output.size()[-1]).zero_().to(device)
    one_hot_output[0][target_class] = 1

    # Backward pass
    model.zero_grad()
    output.backward(gradient=one_hot_output, retain_graph=True)

    # Get the gradients and the activations
    gradients = save_features.gradients.data.cpu().numpy()[0]
    activations = save_features.features.data.cpu().numpy()[0]

    # Calculate the weights
    weights = np.mean(gradients, axis=(1, 2))

    # Generate the Grad-CAM heatmap
    gradcam = np.zeros(activations.shape[1:], dtype=np.float32)
    for i, w in enumerate(weights):
        gradcam += w * activations[i]

    gradcam = np.maximum(gradcam, 0)
    gradcam = cv2.resize(gradcam, (224, 224))
    gradcam = gradcam - gradcam.min()
    gradcam = gradcam / gradcam.max()
    
    save_features.close()

    return gradcam

# Function to make predictions and print the top 5 results
def predict_and_show(model, img_path, transform, device, class_names):
    img = load_and_preprocess_image(img_path, transform)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.topk(outputs, 5, dim=1)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        top_probs = torch.topk(probs, 5, dim=1).values.cpu().numpy()[0]
        top_preds = preds.cpu().numpy()[0]

    # Display the image
    with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
        with zip_ref.open(img_path) as file:
            img_show = Image.open(io.BytesIO(file.read())).convert('RGB')
    plt.imshow(img_show)
    plt.axis('off')
    plt.show()

    # Print the top 5 predictions
    for i in range(5):
        print(f"Predicted: {class_names[top_preds[i]]} with probability {top_probs[i]:.4f}")

    # Generate and display Grad-CAM for the top predicted class
    gradcam = generate_gradcam(model, img, top_preds[0])

    # Create a grayscale heatmap
    heatmap = np.uint8(255 * gradcam)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2GRAY)

    # Normalize the heatmap to [0, 1]
    heatmap = heatmap / 255.0

    # Convert the original image to an array
    img_array = np.array(img_show.resize((224, 224))).astype(np.float32) / 255.0

    # Blend the heatmap with the original image
    transparent_img = img_array * (1 - heatmap[:, :, np.newaxis]) + heatmap[:, :, np.newaxis]

    plt.figure()
    plt.imshow(transparent_img)
    plt.axis('off')
    plt.title(f"Grad-CAM for class: {class_names[top_preds[0]]}")
    plt.show()

# Path to the trained model
model_path = os.path.join(os.getcwd(), "../output/v3_test_2207_resnet50_256_epoch_10.pth")
raw_data_path = os.getcwd() + "/../raw_data/Data.zip"

# Load the class names (assuming you have a list of class names)
class_names = test_dataset.classes

# Initialize the model and load the trained weights
num_classes = len(class_names)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = load_model(model_path, num_classes)
model = model.to(device)

# Example usage
img_path = 'Data/KnownImages/PdfScreenshotNew/ADOBE.20181121.786ABAD536B53E4DE95A8FB535CA6C3A.jpeg'
predict_and_show(model, img_path, transform, device, class_names)

    multiple image perdiction (top 5 heatmap)

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import resnet50
from PIL import Image
import matplotlib.pyplot as plt
import zipfile
import io
import cv2
import psutil

# Define the transform to preprocess the image
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the trained ResNet50 model
def load_model(model_path, num_classes):
    model = resnet50(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

# Function to load and preprocess the image
def load_and_preprocess_image(img_path, transform):
    ## Choose from zip or folder
    # with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
    #     with zip_ref.open(img_path) as file:
    #         img = Image.open(io.BytesIO(file.read())).convert('RGB')
    img = Image.open(img_path).convert('RGB')
    img = transform(img)
    img = img.unsqueeze(0)  # Add batch dimension
    return img

# Hook to extract gradients
class SaveFeatures:
    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.features = None
        self.gradients = None

    def hook_fn(self, module, input, output):
        self.features = output
        output.register_hook(self.save_gradient)

    def save_gradient(self, grad):
        self.gradients = grad

    def close(self):
        self.hook.remove()

# Grad-CAM
def generate_gradcam(model, img_tensor, target_class):
    # Get the feature extractor
    target_layer = model.layer4[-1]
    save_features = SaveFeatures(target_layer)

    # Forward pass
    output = model(img_tensor)
    one_hot_output = torch.FloatTensor(1, output.size()[-1]).zero_().to(device)
    one_hot_output[0][target_class] = 1

    # Backward pass
    model.zero_grad()
    output.backward(gradient=one_hot_output, retain_graph=True)

    # Get the gradients and the activations
    gradients = save_features.gradients.data.cpu().numpy()[0]
    activations = save_features.features.data.cpu().numpy()[0]

    # Calculate the weights
    weights = np.mean(gradients, axis=(1, 2))

    # Generate the Grad-CAM heatmap
    gradcam = np.zeros(activations.shape[1:], dtype=np.float32)
    for i, w in enumerate(weights):
        gradcam += w * activations[i]

    gradcam = np.maximum(gradcam, 0)
    gradcam = cv2.resize(gradcam, (224, 224))
    gradcam = gradcam - gradcam.min()
    gradcam = gradcam / gradcam.max()
    
    save_features.close()

    return gradcam

# Function to make predictions and print the top 5 results
def predict_and_show(model, img_paths, transform, device, class_names):
    images = [load_and_preprocess_image(img_path, transform).to(device) for img_path in img_paths]
    img_tensors = torch.cat(images)

    with torch.no_grad():
        outputs = model(img_tensors)
        _, preds = torch.topk(outputs, 5, dim=1)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        top_probs = torch.topk(probs, 5, dim=1).values.cpu().numpy()
        top_preds = preds.cpu().numpy()

    fig, axes = plt.subplots(15, 5, figsize=(20, 45))
    
    # Display the images
    for i, img_path in enumerate(img_paths):
        # with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
        #     with zip_ref.open(img_path) as file:
        #         img_show = Image.open(io.BytesIO(file.read())).convert('RGB')
        img_show = Image.open(img_path).convert('RGB')
        axes[0, i].imshow(img_show)
        axes[0, i].axis('off')
    
    # Display top predictions and Grad-CAMs
    for k in range(5):
        for j in range(5):
            axes[2*k+1, j].text(0.5, 0.5, f"{class_names[top_preds[j][k]]}: {top_probs[j][k]:.4f}", 
                                fontsize=12, ha='center', va='center')
            axes[2*k+1, j].axis('off')
            
            gradcam = generate_gradcam(model, img_tensors[j].unsqueeze(0), top_preds[j][k])
            heatmap = cv2.applyColorMap(np.uint8(255 * gradcam), cv2.COLORMAP_JET)
            heatmap = np.float32(heatmap) / 255.0

            # with zipfile.ZipFile(raw_data_path, 'r') as zip_ref:
            #     with zip_ref.open(img_paths[j]) as file:
            #         img_show = Image.open(io.BytesIO(file.read())).convert('RGB')
            img_show = Image.open(img_path).convert('RGB')

            img_array = np.array(img_show.resize((224, 224))).astype(np.float32) / 255.0
            cam = heatmap + np.float32(img_array)
            cam = cam / np.max(cam)

            axes[2*k+2, j].imshow(cam)
            axes[2*k+2, j].axis('off')

    plt.tight_layout()
    plt.show()

# Path to the trained model
model_path = os.path.join(os.getcwd(), "../output/v1_2907_resnet50_256_epoch_10.pth")
raw_data_path = os.getcwd() + "/../raw_data/Data.zip"

# Load the class names (assuming you have a list of class names)
# class_names = test_dataset.classes
class_names = ['11', 'ADOBE', 'ALIBABA', 'AMAZON', 'AMELI', 'AMERICANEXPRESS', 'AOL', 'APPLE', 'ATT', 
'BANKOFAMERICA', 'BBVA', 'BNPPARIBAS', 'BRADESCO', 'CAPITALONE', 'CHASE', 'CIBC', 'CITIBANK', 'CREDITAGRICOLE', 
'DESJARDINS', 'DHL', 'DOCUSIGN', 'DROPBOX', 'EARTHLINK', 'EBAY', 'EXCEL', 'FACEBOOK', 'FEDEX', 'GOOGLE', 'GOVUK', 
'IMPOTS', 'ING', 'INSTAGRAM', 'ITAU', 'LINKEDIN', 'MAERSK', 'MICROSOFT', 'MTBANK', 'NETFLIX', 'OFFICE365', 
'ONEDRIVE', 'ONENOTE', 'ORANGE', 'OURTIME', 'OUTLOOK', 'PAYPAL', 'RBC', 'ROUNDCUBE', 
'SANTANDER', 'SCOTIABANK', 'SHAREPOINT', 'SUNTRUST', 'USAA', 'VISA', 'WEBMAIL', 'WELLSFARGO', 'XFINITY']

# Initialize the model and load the trained weights
num_classes = len(class_names)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = load_model(model_path, num_classes)
model = model.to(device)

def get_cpu_usage(interval=1):
    """
    Get the CPU usage percentage.

    Parameters:
    interval (int): Time in seconds to wait before getting CPU usage. Default is 1 second.

    Returns:
    float: CPU usage percentage.
    """
    return psutil.cpu_percent(interval=interval)

def get_memory_usage():
    """
    Get the memory usage percentage.

    Returns:
    float: Memory usage percentage.
    """
    return psutil.virtual_memory().percent


# Example usage with a list of five image paths
img_paths = [
    '/mnt/batch/tasks/shared/LS_root/mounts/clusters/t-jiajunfu1gpu/code/Users/t-jiajunfu/EDA/../parsed_data/selected_images/11/11.20200328.257E1AF5510F02FBF0E63C10E7911532.jpg',
    '/mnt/batch/tasks/shared/LS_root/mounts/clusters/t-jiajunfu1gpu/code/Users/t-jiajunfu/EDA/../parsed_data/selected_images/11/11.20200328.B59F18E0B58D77C8BE8B1BD79B01D76D.jpg',
    '/mnt/batch/tasks/shared/LS_root/mounts/clusters/t-jiajunfu1gpu/code/Users/t-jiajunfu/EDA/../parsed_data/selected_images/11/11.20200520.2E20283D2EE4F771F5D5D3F77A30691E.jpg',
    '/mnt/batch/tasks/shared/LS_root/mounts/clusters/t-jiajunfu1gpu/code/Users/t-jiajunfu/EDA/../parsed_data/selected_images/ADOBE/ADOBE.20180507.66FF29D06DB1EE163ACA496AB5E5BA3C.jpg',
    '/mnt/batch/tasks/shared/LS_root/mounts/clusters/t-jiajunfu1gpu/code/Users/t-jiajunfu/EDA/../parsed_data/selected_images/ADOBE/ADOBE.20180811.3625648BE3987A340B5C6733043D5026.jpg'
]
predict_and_show(model, img_paths, transform, device, class_names)

cpu_usage = get_cpu_usage()
memory_usage = get_memory_usage()
print(f"CPU Usage: {cpu_usage}%")
print(f"Memory Usage: {memory_usage}%")