# Importing the necessary libraries, in our case we are using pytorch


In [1]:
# Importing necessary libraries
import torch  # PyTorch library for deep learning
from torch import nn, optim  # Neural network modules and optimization algorithms
from torch.utils.data import Dataset, DataLoader, random_split  # Tools for handling datasets
from torchvision import transforms, models  # Vision-related utilities including pre-trained models
from matplotlib import pyplot as plt  # Plotting library for visualization
from PIL import Image  # Python Imaging Library for image processing
from tqdm import tqdm  # Progress bar for tracking iterations
import os  # Operating system utilities


# Be sure to change the directory of images, and look at instructions below on how to run the model without retraining the weights


In [2]:
# Configuration Variables

# Path to the folder containing images
data_folder = './images'

# Number of classes in the dataset
classes = 5

# Learning rate for the optimizer
lr = 0.001

# Batch size for training
batch_size = 32

# Number of epochs for training
epochs = 40

# Size of the input images (assumed to be square)
image_size = 224

# Device selection based on GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Loading the VGG16 model with default weights
model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)


In [3]:
def freeze(model):
    # Function to freeze the parameters of a given model
    for param in model.parameters():
        param.requires_grad = False

# Getting the number of input features for the last layer of the model's classifier
num_features = model.classifier[-1].in_features

# Flag to determine if we want to freeze the features of the model
freeze_features = True

# If freeze_features flag is set to True, freeze the parameters of the model
if freeze_features:
    freeze(model)

# Updating the last layer of the model's classifier to output classes specific to our problem
model.classifier[-1] = nn.Linear(num_features, classes)


In [4]:
#WARNING: USE THIS AT YOUR OWN RISK!!!!!!!!!!!!!!!!!!!!!!!!
def visualize(image, label):
  image = torch.permute(image, (2, 1, 0)).numpy()
  plt.figure()
  plt.imshow(image)
  plt.title(str(label))
  plt.xticks([])
  plt.yticks([])

In [5]:
class CreateDataset(Dataset):
  def __init__(self, source_dir, transform = None):

    #Variables to keep track of
    self.labelDictionary = {}
    self.images = []
    self.labels = []
    self.transform = transform
    self.toTensor = transforms.Compose([
      transforms.ToTensor(),
    ])

    #If the source directory provided isn't a directory, return -1
    if os.path.isdir(source_dir) == False:
      print(f"{source_dir} not a valid directory")
      return -1

    i = 0
    #Go through the files inside the source_dir
    for dir in os.listdir(source_dir):
      for file in os.listdir(os.path.join(source_dir, dir)):

        if i not in self.labelDictionary:
          self.labelDictionary[i] = dir

        try:
          Image.open(os.path.join(source_dir, dir, file))
        except:
          continue
      
        self.images.append(os.path.join(source_dir, dir, file))
        self.labels.append(i)
      i += 1

  #Function that returns the size of the dataset
  def __len__(self):
    return len(self.images)

  #Function that returns the ith image and label after converting the image
  def __getitem__(self, i):

    #Opening an Image
    image = Image.open(self.images[i])
  
    label = self.labels[i]

    #All the transformations
    if self.transform is not None:
      image = self.transform(image)

    img = self.toTensor(image)
    if img.shape[0] != 3:
      rgbimg = Image.new("RGB", image.size)
      rgbimg.paste(image)
      img = self.toTensor(rgbimg)

    # image = image.float()
    return img, label
  
  def getLabels(self):
    return self.labels
  
  def getDictionary(self):
    return self.labelDictionary

In [6]:
transform = transforms.Compose([
    transforms.Resize((image_size, image_size))
])

In [7]:
data = CreateDataset(source_dir=data_folder, transform=transform)
train_dataset, valid_dataset, test_dataset = random_split(data, [0.8, 0.1, 0.1])

temp_train = train_dataset[0]
temp_valid = valid_dataset[0]
temp_test = test_dataset[0]

temp = data.getLabels()
# print(data.getDictionary()[0], temp.count(0))

# visualize(temp_train[0], temp_train[1])
# visualize(temp_valid[0], temp_valid[1])
# visualize(temp_test[0], temp_test[1])



In [8]:
phases = {
    'train': train_dataset,
    'valid': valid_dataset,
    'test': test_dataset
}

loader = {
    phase: DataLoader(ds, batch_size=batch_size, shuffle=(phase=='train'))
    for phase, ds in phases.items()
}

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
model = model.to(device)

In [10]:
def evaluate(model, loader, device, criterion, mode='validation'):
    # Set the model to evaluation mode
    model.eval()
  
    # Initialize variables to track total data, correct predictions, and total loss
    total = 0
    total_correct = 0
    total_loss = 0
  
    # Iterate through the data loader
    for i, (images, labels) in enumerate(loader[mode]):

        # Move images and labels to the specified device (CPU or GPU)
        images = images.to(device)
        labels = labels.to(device)
    
        # Disable gradient computation
        with torch.no_grad():

            # Forward pass
            outputs = model(images)
            
            # Calculate the loss
            loss = criterion(outputs, labels)
      
            # Accumulate the total loss
            total_loss += loss.item() * images.size(0)
            
            # Accumulate the total number of data points
            total += images.size(0)
            
            # Calculate the number of correct predictions
            _, predictions = outputs.max(1)
            total_correct += (labels == predictions).sum()
  
    # Calculate average loss and accuracy
    loss = total_loss / total
    accuracy = total_correct / total
    
    # Print evaluation metrics
    print(f'{mode} epoch {epoch}: Loss({loss:6.4f}) Accuracy ({accuracy:6.4f})')


In [11]:
for epoch in range(epochs):
    # Set the model to training mode
    model.train()

    # Initialize variables to track total data, correct predictions, and total loss for this epoch
    total = 0
    total_correct = 0
    total_loss = 0

    # Iterate through the training data loader
    for i, (images, labels) in tqdm(enumerate(loader['train']), total=len(loader['train'])):

        # Move images and labels to the specified device (CPU or GPU)
        images = images.to(device)
        labels = labels.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        
        # Calculate the loss
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        # Accumulate the total loss
        total_loss += loss.item() * images.size(0)
        
        # Accumulate the total number of data points
        total += images.size(0)
        
        # Calculate the number of correct predictions
        _, predictions = outputs.max(1)
        total_correct += (predictions == labels).sum()

    # Calculate average loss and accuracy for this epoch
    accuracy = total_correct / total
    loss = total_loss / total

    # Print training metrics for this epoch
    print(f'Train epoch {epoch}: Loss({loss:6.4f}) Accuracy ({accuracy:6.4f})')

    # Evaluate the model on the validation set
    evaluate(model, loader, device, criterion, mode='valid')


100%|██████████| 189/189 [02:31<00:00,  1.24it/s]


Train epoch 0: Loss(0.7539) Accuracy (0.7161)
valid epoch 0: Loss(0.5134) Accuracy (0.7974)


100%|██████████| 189/189 [02:32<00:00,  1.24it/s]


Train epoch 1: Loss(0.6186) Accuracy (0.7737)
valid epoch 1: Loss(0.4950) Accuracy (0.8066)


100%|██████████| 189/189 [02:31<00:00,  1.25it/s]


Train epoch 2: Loss(0.5838) Accuracy (0.7833)
valid epoch 2: Loss(0.4804) Accuracy (0.8291)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 3: Loss(0.5688) Accuracy (0.7864)
valid epoch 3: Loss(0.4579) Accuracy (0.8238)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 4: Loss(0.5452) Accuracy (0.7922)
valid epoch 4: Loss(0.4488) Accuracy (0.8185)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 5: Loss(0.5405) Accuracy (0.7970)
valid epoch 5: Loss(0.4529) Accuracy (0.8318)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 6: Loss(0.5470) Accuracy (0.8003)
valid epoch 6: Loss(0.4521) Accuracy (0.8278)


100%|██████████| 189/189 [02:38<00:00,  1.19it/s]


Train epoch 7: Loss(0.5312) Accuracy (0.8015)
valid epoch 7: Loss(0.4500) Accuracy (0.8278)


100%|██████████| 189/189 [02:31<00:00,  1.24it/s]


Train epoch 8: Loss(0.5109) Accuracy (0.8083)
valid epoch 8: Loss(0.4593) Accuracy (0.8238)


100%|██████████| 189/189 [02:31<00:00,  1.25it/s]


Train epoch 9: Loss(0.5306) Accuracy (0.7995)
valid epoch 9: Loss(0.4402) Accuracy (0.8305)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 10: Loss(0.5069) Accuracy (0.8099)
valid epoch 10: Loss(0.4626) Accuracy (0.8199)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 11: Loss(0.5139) Accuracy (0.8094)
valid epoch 11: Loss(0.4504) Accuracy (0.8185)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 12: Loss(0.4955) Accuracy (0.8162)
valid epoch 12: Loss(0.4449) Accuracy (0.8252)


100%|██████████| 189/189 [02:29<00:00,  1.27it/s]


Train epoch 13: Loss(0.5014) Accuracy (0.8089)
valid epoch 13: Loss(0.4374) Accuracy (0.8331)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 14: Loss(0.5027) Accuracy (0.8091)
valid epoch 14: Loss(0.4510) Accuracy (0.8278)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 15: Loss(0.4930) Accuracy (0.8177)
valid epoch 15: Loss(0.4371) Accuracy (0.8252)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 16: Loss(0.4969) Accuracy (0.8137)
valid epoch 16: Loss(0.4777) Accuracy (0.8106)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 17: Loss(0.4955) Accuracy (0.8141)
valid epoch 17: Loss(0.4517) Accuracy (0.8159)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 18: Loss(0.5022) Accuracy (0.8122)
valid epoch 18: Loss(0.4429) Accuracy (0.8318)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 19: Loss(0.4889) Accuracy (0.8104)
valid epoch 19: Loss(0.4226) Accuracy (0.8411)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 20: Loss(0.4868) Accuracy (0.8149)
valid epoch 20: Loss(0.4379) Accuracy (0.8291)


100%|██████████| 189/189 [02:29<00:00,  1.27it/s]


Train epoch 21: Loss(0.4874) Accuracy (0.8195)
valid epoch 21: Loss(0.4517) Accuracy (0.8278)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 22: Loss(0.4938) Accuracy (0.8151)
valid epoch 22: Loss(0.4479) Accuracy (0.8278)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 23: Loss(0.4796) Accuracy (0.8182)
valid epoch 23: Loss(0.4739) Accuracy (0.8252)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 24: Loss(0.4738) Accuracy (0.8243)
valid epoch 24: Loss(0.4327) Accuracy (0.8371)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 25: Loss(0.4633) Accuracy (0.8240)
valid epoch 25: Loss(0.4241) Accuracy (0.8371)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 26: Loss(0.4852) Accuracy (0.8137)
valid epoch 26: Loss(0.4274) Accuracy (0.8411)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 27: Loss(0.4806) Accuracy (0.8195)
valid epoch 27: Loss(0.4394) Accuracy (0.8305)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 28: Loss(0.4728) Accuracy (0.8207)
valid epoch 28: Loss(0.4214) Accuracy (0.8450)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 29: Loss(0.4715) Accuracy (0.8233)
valid epoch 29: Loss(0.4419) Accuracy (0.8331)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 30: Loss(0.4732) Accuracy (0.8223)
valid epoch 30: Loss(0.4557) Accuracy (0.8344)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 31: Loss(0.4645) Accuracy (0.8251)
valid epoch 31: Loss(0.4540) Accuracy (0.8252)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 32: Loss(0.4585) Accuracy (0.8261)
valid epoch 32: Loss(0.4212) Accuracy (0.8424)


100%|██████████| 189/189 [02:29<00:00,  1.26it/s]


Train epoch 33: Loss(0.4718) Accuracy (0.8235)
valid epoch 33: Loss(0.4203) Accuracy (0.8464)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 34: Loss(0.4633) Accuracy (0.8199)
valid epoch 34: Loss(0.4423) Accuracy (0.8331)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 35: Loss(0.4612) Accuracy (0.8242)
valid epoch 35: Loss(0.4353) Accuracy (0.8371)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 36: Loss(0.4699) Accuracy (0.8212)
valid epoch 36: Loss(0.4486) Accuracy (0.8318)


100%|██████████| 189/189 [02:28<00:00,  1.28it/s]


Train epoch 37: Loss(0.4619) Accuracy (0.8220)
valid epoch 37: Loss(0.4261) Accuracy (0.8424)


100%|██████████| 189/189 [02:28<00:00,  1.27it/s]


Train epoch 38: Loss(0.4718) Accuracy (0.8197)
valid epoch 38: Loss(0.4266) Accuracy (0.8411)


100%|██████████| 189/189 [02:27<00:00,  1.28it/s]


Train epoch 39: Loss(0.4599) Accuracy (0.8246)
valid epoch 39: Loss(0.4338) Accuracy (0.8344)


In [12]:
evaluate(model, loader, device, criterion, mode='test')

test epoch 39: Loss(0.5200) Accuracy (0.8106)


In [13]:
torch.save(model, "2nd-model")

- **Load the Trained Model:** Load the trained VGG16 model along with its weights using `torch.load(path_to_model)` function.


# Load the trained VGG16 model
```
model = torch.load('path_to_your_trained_model.pth')
```



# Transform the images to fit img size
```
from torchvision import transforms
from PIL import Image

preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
```
# You only have the run the first 3 cells before you call the model.eval method 

```
 The labels used in training are [drawings,hentai,neutral,porn,sexy]
 ```