In [1]:
!pip install efficientnet_pytorch torchvision matplotlib tqdm ipywidgets





In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

In [7]:
#Dataset path
data_path= "healthy_spectograms"
# Custom Dataset for Spectrogram Images
class SpectrogramDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.files = sorted([f for f in os.listdir(root_dir) if f.endswith(".png")])
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ])
    def __len__(self):
        return len(self.files)
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.files[idx])
        image = Image.open(img_path).convert("RGB")
        return self.transform(image), self.files[idx]
#Create dataset and DataLoader
dataset=SpectrogramDataset(data_path)
loader=DataLoader(dataset,batch_size=16,shuffle=True)

In [8]:
# Autoencoder using EfficientNet as Encoder
class EfficientNetAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        base_model = EfficientNet.from_pretrained('efficientnet-b0').to(self.device)
        self.encoder = base_model.extract_features
        self.decoder = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.Conv2d(1280, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(64, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 3, kernel_size=3, padding=1),
            nn.Sigmoid()
        ).to(self.device)
    def forward(self, x):
        z = self.encoder(x)
        out = self.decoder(z)
        return out

In [9]:
if torch.cuda.is_available():
    print("Using GPU:", torch.cuda.current_device(), torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("Using CPU")

Using GPU: 0 NVIDIA GeForce GTX 1650


In [12]:
data_path = "healthy_spectograms"
dataset = SpectrogramDataset(data_path)
loader = DataLoader(dataset, batch_size=16, shuffle=True)

#Confirm sample shape
x,name=next(iter(loader))
print("Batch shape:",x.shape)


Batch shape: torch.Size([16, 3, 224, 224])


In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_name(0)) 
#Initialize model and optimizer
model = EfficientNetAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0

    #wrap the loader with tqdm for progress bar
    loop=tqdm(loader,desc=f"Epoch[{epoch+1}/{epochs}]")
    for imgs, _ in loop:
        imgs = imgs.to(device)
        optimizer.zero_grad()
        recon = model(imgs)
        loss = criterion(recon, imgs)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())
    
    print(f"Epoch{epoch+1}/{epochs} | Total loss: {total_loss:.4f}")

torch.save(model.state_dict(), "efficientnet_autoencoder.pth")
print("Model weights saved to efficientnet_autoencoder.pth")

NVIDIA GeForce GTX 1650
Loaded pretrained weights for efficientnet-b0


Epoch[1/100]:   4%|▍         | 26/625 [00:20<07:51,  1.27it/s, loss=1.32]


KeyboardInterrupt: 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientNetAutoencoder().to(device)
model.load_state_dict(torch.load("efficientnet_autoencoder.pth", map_location=device))
model.eval()
print("Model loaded and ready for inference")

# Preprocessing for Inference
def unnormalize(tensor):
    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 1, 1, 3)
    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 1, 1, 3)
    return tensor * std + mean

def show_reconstruction(model, loader):
    model.eval()
    imgs, _ = next(iter(loader))
    imgs = imgs.to(device)
    with torch.no_grad():
        recon = model(imgs)
    
    imgs = imgs.cpu().permute(0, 2, 3, 1)
    recon = recon.cpu().permute(0, 2, 3, 1)

    for i in range(4):
        fig, axs = plt.subplots(1, 2, figsize=(8, 4))
        axs[0].imshow(imgs[i].numpy().clip(0, 1))
        axs[0].set_title("Original")
        axs[0].axis('off')

        axs[1].imshow(recon[i].numpy().clip(0, 1))
        axs[1].set_title("Reconstructed")
        axs[1].axis('off')
        plt.show()

show_reconstruction(model, loader)

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [None]:
img_path = "path_to_image.png"
img = Image.open(img_path).convert("RGB")
input_tensor = transform(img).unsqueeze(0).to(device)

In [None]:
import numpy as np
import torch.nn.functional as F 
train_losses=[]
state_dict=torch.load("efficientnet_autoencoder_path",map_location=device)
missing,unexpected=model.load_state_dict(state_dict,strict=False)
print("Missing key:",missing)
print("Unexpected keys:".unexpected)
model.eval()
with torch.no_grad():
    for imgs, _ in loader:
        imgs=imgs.to(device)
        output=model(imgs)
        loss=loss.mean(dim=(1,2,3))
        train_losses.extend(loss.cpu().numpy())
train_losses=np.array(train_losses)
mean_loss=train_losses.mean()
std_loss=train_losses.std()
threshold=mean_loss+1*std_loss
print(f"Threshold set at : {threshold:.6f}")
print(f"Mean Loss set at: {mean_loss: .6f}")
print(f"std set at: {std_loss: .6f}")

In [None]:
with torch.no_grad():
    output_tensor = model(input_tensor)

original=input_tensor
reconstructed = output_tensor

with torch.no_grad():
    output1 = model(input_tensor)
    output2 = model(input_tensor)
    diff = F.mse_loss(output1, output2)
    print(f"Difference: {diff.item()}")

mse = F.mse_loss(reconstructed, input_tensor).item()
print(f"Reconstruction MSE Loss: {mse:.6f}")

In [None]:
original_img = original.squeeze().permute(1, 2, 0).cpu().numpy()
recon_img = reconstructed.squeeze().permute(1, 2, 0).cpu().numpy()

fig, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].imshow(original_img)
axs[0].set_title("Original Input")
axs[0].axis('off')

axs[1].imshow(recon_img)
axs[1].set_title("Reconstructed Output")
axs[1].axis('off')
plt.show()

In [None]:
if mse > threshold:
    print("Anomaly Detected!")
else:
    print("Normal Sample")