In [None]:
#This script was run from Google colab, so it was necessary to mount my Google drive to access the image files and save the 
#resulting path

from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Install package to unzip .rar file
!pip install unrar

In [None]:
#Unzip the files
!unrar x '/content/drive/My Drive/celeba.rar'

In [None]:
# Specify path
PATH = '/content/celeba/'

In [None]:
#Imports
import pandas as pd
import os
import glob
import numpy as np
from tqdm import tqdm
import shutil
import pickle




#Read in the csv data
data = pd.read_csv(PATH + 'labels.csv', delim_whitespace=True)
#Create an index
data.set_index('img_name', inplace=True)
#Instead of values 1,-1 I set 0,1 (replace all -1 with 0)
data.replace(-1,0, inplace= True)
#Read in to pickle file
data.to_pickle(PATH + 'data_pkl.pkl')


# for i in ['train', 'valid']:
#     os.mkdir(os.path.join(PATH , i))

#Find the image files/names
filenames = glob.glob(PATH + 'img/*jpg')
#Random shuffle so avoid biased training
shuffle = np.random.permutation(len(filenames))

#Create dataframes
training_df = pd.DataFrame()
valid_df = pd.DataFrame()

#Create seperate folders for training and validation & show progress bar

#Split 4,500 image files into training folder
for j in tqdm(shuffle[:4500]):
    file = filenames[j].split('/')[-1]
    training_df = training_df.append( data[data.index == file])
    shutil.copy(PATH + 'img/' + file, PATH + 'train/' + file)

#Split the remaining 500 image files into validation folder
for j in tqdm(shuffle[4500:]):
    file = filenames[j].split('/')[-1]
    valid_df = valid_df.append(data[data.index == file])
    shutil.copy(PATH +'img/'+ file, PATH + 'valid/' + file)

In [None]:
#Create csv and pickle files 
training_df.to_csv(PATH + 'train.csv')
training_df.to_pickle((PATH + 'train.pkl'))

valid_df.to_csv(PATH + 'valid.csv')
valid_df.to_pickle(PATH + 'valid.pkl')

In [None]:
#Imports
import pandas as pd
import numpy as np
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import PIL
import sklearn
import torch
import torchvision
import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
import os

#Outsource computing power to Google servers to avoid computer damage
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#Define a class that will load the data when called
class Smiling_loader(Dataset):
    def __init__(self, df, img_dir, transform = None):
        self.dataframe = df
        self.img_dir = img_dir
        self.transform = transform
        self.filename = df.index
        self.label = df.smiling.values

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):

        image = Image.open(os.path.join(self.img_dir, self.filename[idx]))
        label = self.label[idx]
        sample = {'image': image, 'label': label}
        if self.transform:
            image = self.transform(sample['image'])
            sample = {'image': image, 'label': label}

        return sample

In [None]:
#Use pre-constructed model architectures accessed via urls
import torch
import torch.nn as nn
from torchvision import models

#Specify model - use vgg19 as it is most accurate
model = models.vgg16_bn(pretrained = True)

for param in model.parameters():
    param.requires_grad = True

#Change the final output layer to the number of classes required in our mdoel.
n_inputs = model.classifier[6].in_features
model.classifier[6] = nn.Sequential(
    nn.Linear(n_inputs, 2048), nn.ReLU(), nn.Dropout(0.65),
    nn.Linear(2048, 1024), nn.ReLU(),
    nn.Dropout(0.6),
    nn.Linear(1024, 512), nn.ReLU(),
    nn.Linear(512, 2))

#Send the Model to the GPU
model.to(device)

In [None]:
#Imports
import os
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms
from torchvision import models
import torch.nn.functional as F
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.optim import lr_scheduler
from torch import optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torchvision.utils import make_grid
from pprint import pprint
from tqdm import tqdm
#from Models import vgg13_bn, vgg11_bn
from imgaug import augmenters as iaa
import PIL
from PIL import Image

#Class specifying image transforms to improve performancce of model
class ImAugtransforms:
    def __init__(self):
        self.aug = iaa.Sequential([
            iaa.Sometimes(0.3, iaa.GaussianBlur(sigma=(0, 2.0))),
            iaa.Affine(rotate=(-30, 30), mode='symmetric'),
            iaa.Sometimes(0.25,
                          iaa.OneOf([iaa.Dropout(p=(0, 0.1)),
                                     iaa.CoarseDropout(0.1, size_percent=0.5)])),
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True)
        ])

    def __call__(self, img):
        img = np.array(img)
        return self.aug.augment_image(img)

#More transformations
train_trns = transforms.Compose([
    ImAugtransforms(),
    lambda x: PIL.Image.fromarray(x),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    #transforms.RandomHorizontalFlip(p=0.5),
    #transforms.RandomGrayscale(p=0.35),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

valid_trns = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),

])