In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
from efficientnet_pytorch import EfficientNet

In [2]:
num_classes = 5  # neutral, gore, violence, nudity, substances
batch_size = 8
num_epochs = 20
learning_rate = 0.001
data_dir = r"E:\Projects\Content-moderation\Dataset"
validation_split = 0.1  
test_split = 0.1 

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [4]:
full_dataset = datasets.ImageFolder(root=os.path.join(data_dir, 'Train'), transform=data_transforms['train'])

In [5]:
num_total = len(full_dataset)
num_val = int(num_total * validation_split)
num_test = int(num_total * test_split)
num_train = num_total - num_val - num_test
print(num_total)

27731


In [6]:
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [num_train, num_val, num_test])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [7]:
model = EfficientNet.from_pretrained('efficientnet-b0')
num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs, num_classes)

Loaded pretrained weights for efficientnet-b0


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [9]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        print()

    return model

In [10]:
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=num_epochs)


Epoch 0/19
----------
train Loss: 0.5863 Acc: 0.7941
val Loss: 0.4902 Acc: 0.8561

Epoch 1/19
----------
train Loss: 0.4174 Acc: 0.8563
val Loss: 0.3579 Acc: 0.8940

Epoch 2/19
----------
train Loss: 0.3644 Acc: 0.8750
val Loss: 0.2601 Acc: 0.9196

Epoch 3/19
----------
train Loss: 0.3119 Acc: 0.8928
val Loss: 0.4494 Acc: 0.8630

Epoch 4/19
----------
train Loss: 0.2990 Acc: 0.8969
val Loss: 0.3332 Acc: 0.8943

Epoch 5/19
----------
train Loss: 0.2767 Acc: 0.9042
val Loss: 0.2466 Acc: 0.9228

Epoch 6/19
----------
train Loss: 0.2606 Acc: 0.9110
val Loss: 0.1774 Acc: 0.9470

Epoch 7/19
----------
train Loss: 0.1623 Acc: 0.9444
val Loss: 0.1125 Acc: 0.9611

Epoch 8/19
----------
train Loss: 0.1303 Acc: 0.9541
val Loss: 0.1137 Acc: 0.9600

Epoch 9/19
----------
train Loss: 0.1237 Acc: 0.9573
val Loss: 0.0881 Acc: 0.9708

Epoch 10/19
----------
train Loss: 0.1117 Acc: 0.9606
val Loss: 0.0847 Acc: 0.9722

Epoch 11/19
----------
train Loss: 0.1113 Acc: 0.9624
val Loss: 0.0807 Acc: 0.9758

Ep

In [11]:
torch.save(model.state_dict(), 'efficientnet_finetuned.pth')

# Evaluation on validation set
model.eval()
running_corrects = 0
for inputs, labels in val_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)
    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

val_acc = running_corrects.double() / len(val_loader.dataset)
print(f'Validation Accuracy: {val_acc:.4f}')

# Evaluation on test set
running_corrects = 0
for inputs, labels in test_loader:
    inputs = inputs.to(device)
    labels = labels.to(device)
    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

test_acc = running_corrects.double() / len(test_loader.dataset)
print(f'Test Accuracy: {test_acc:.4f}')

Validation Accuracy: 0.9737
Test Accuracy: 0.9722
