In [None]:
import shutil
import random
import os

import cv2

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from tabulate import tabulate
from sklearn.model_selection import train_test_split

In [None]:
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from torchvision.utils import make_grid

In [None]:
df = pd.read_csv('../data/artists/artists.csv')

In [None]:
paintings = df.paintings.sum()
df['class_weight'] = df.paintings / paintings

In [None]:
df['name'].nunique()

In [None]:
df.head() #print(tabulate(df.head(), tablefmt="pipe", headers="keys"))

|    |   id | name               | years       | genre                        | nationality   | bio                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | wikipedia                                      |   paintings |   class_weight |
|---:|-----:|:-------------------|:------------|:-----------------------------|:--------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------|------------:|---------------:|
|  0 |    0 | Amedeo Modigliani  | 1884 - 1920 | Expressionism                | Italian       | Amedeo Clemente Modigliani (Italian pronunciation: [ameˈdɛːo modiʎˈʎaːni]; 12 July 1884 – 24 January 1920) was an Italian Jewish painter and sculptor who worked mainly in France. He is known for portraits and nudes in a modern style characterized by elongation of faces, necks, and figures that were not received well during his lifetime but later found acceptance. Modigliani spent his youth in Italy, where he studied the art of antiquity and the Renaissance. In 1906 he moved to Paris, where he came into contact with such artists as Pablo Picasso and Constantin Brâncuși. By 1912 Modigliani was exhibiting highly stylized sculptures with Cubists of the Section d'Or group at the Salon d'Automne.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | http://en.wikipedia.org/wiki/Amedeo_Modigliani |         193 |     0.0228511  |
|  1 |    1 | Vasiliy Kandinskiy | 1866 - 1944 | Expressionism,Abstractionism | Russian       | Wassily Wassilyevich Kandinsky (Russian: Васи́лий Васи́льевич Канди́нский, tr. Vasíliy Vasílʹevich Kandínskiy) (16 December [O.S. 4 December] 1866 – 13 December 1944) was a Russian painter and art theorist.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | http://en.wikipedia.org/wiki/Wassily_Kandinsky |          88 |     0.0104191  |
|  2 |    2 | Diego Rivera       | 1886 - 1957 | Social Realism,Muralism      | Mexican       | Diego María de la Concepción Juan Nepomuceno Estanislao de la Rivera y Barrientos Acosta y Rodríguez, known as Diego Rivera (Spanish pronunciation: [ˈdjeɣo riˈβeɾa]; December 8, 1886 – November 24, 1957) was a prominent Mexican painter. His large frescoes helped establish the Mexican mural movement in Mexican art. Between 1922 and 1953, Rivera painted murals in, among other places, Mexico City, Chapingo, Cuernavaca, San Francisco, Detroit, and New York City. In 1931, a retrospective exhibition of his works was held at the Museum of Modern Art in New York. Rivera had a volatile marriage with fellow Mexican artist Frida Kahlo.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | http://en.wikipedia.org/wiki/Diego_Rivera      |          70 |     0.00828795 |
|  3 |    3 | Claude Monet       | 1840 - 1926 | Impressionism                | French        | Oscar-Claude Monet (; French: [klod mɔnɛ]; 14 November 1840 – 5 December 1926) was a French painter, a founder of French Impressionist painting and the most consistent and prolific practitioner of the movement's philosophy of expressing one's perceptions before nature, especially as applied to plein air landscape painting. The term "Impressionism" is derived from the title of his painting Impression, soleil levant (Impression, Sunrise), which was exhibited in 1874 in the first of the independent exhibitions mounted by Monet and his associates as an alternative to the Salon de Paris.Monet's ambition of documenting the French countryside led him to adopt a method of painting the same scene many times in order to capture the changing of light and the passing of the seasons. From 1883, Monet lived in Giverny, where he purchased a house and property and began a vast landscaping project which included lily ponds that would become the subjects of his best-known works. In 1899, he began painting the water lilies, first in vertical views with a Japanese bridge as a central feature and later in the series of large-scale paintings that was to occupy him continuously for the next 20 years of his life. | http://en.wikipedia.org/wiki/Claude_Monet      |          73 |     0.00864314 |
|  4 |    4 | Rene Magritte      | 1898 - 1967 | Surrealism,Impressionism     | Belgian       | René François Ghislain Magritte (French: [ʁəne fʁɑ̃swa ɡilɛ̃ maɡʁit]; 21 November 1898 – 15 August 1967) was a Belgian Surrealist artist. He became well known for creating a number of witty and thought-provoking images. Often depicting ordinary objects in an unusual context, his work is known for challenging observers' preconditioned perceptions of reality. His imagery has influenced Pop art, minimalist and conceptual art.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | http://en.wikipedia.org/wiki/René_Magritte     |         194 |     0.0229695  |

In [None]:
save_folder = '../data/artists/final'

In [None]:
X, y = load_images_from_folder('../data/artists/resized')

## Transform data
Change storage structure so that we can use the ImageFolder class from pytorch.

In [None]:
def load_and_save_images_from_folder(folder, save_folder):
    
    i = 0
    for filename in os.listdir(folder):
        image_filename = os.path.join(folder,filename)
        
        #replace artist name
        if 'Albrecht_Du╠êrer' in filename:
            artist = 'Albrecht Dürer'
        else:
            artist = ' '.join(filename.split('_')[:-1])

        # check folder existence
        artist_folder = os.path.join(save_folder, artist)
        if not os.path.isdir(artist_folder):#.exists():
            os.mkdir(artist_folder)
        
        #save image
        shutil.copy(image_filename, artist_folder)
        
        i+= 1
        print(i, end ='\r')
        
load_and_save_images_from_folder('../data/artists/resized', save_folder)

8683

Es wurde ein Ordner Albrecht_Du╠êrer erstellt. Diesen haben wir manuell gelöscht.

In [None]:
dataset = ImageFolder(save_folder,transform = transforms.Compose([
    transforms.Resize((150,150)),transforms.ToTensor()
]))

In [None]:
n = len(dataset) # n = 8356
len_train = 5000
len_dev = 1678 # (8356 - 5000) / 2
len_test = 1678

In [None]:
train_data, dev_data, test_data = random_split(dataset,[len_train, len_dev, len_test])

In [None]:
batch_size = 256

train = DataLoader(train_data, batch_size = batch_size, shuffle = True, num_workers = 4)
dev = DataLoader(dev_data, batch_size = batch_size * 2, num_workers = 4)
test = DataLoader(test_data, batch_size = batch_size * 2, num_workers = 4)

In [None]:
def plot_image_grid(data_images):
    for images, labels in data_images:
        fig,ax = plt.subplots(figsize = (16,12))
        ax.set_xticks([]) # remove x-ticks
        ax.set_yticks([]) # remove y-ticks
        ax.imshow(make_grid(images,nrow=8).permute(1,2,0))
        break

# images were created with batch siez of 32 images        
plot_image_grid(train)

![](../images.png)

## Build Convolutional NN

In [None]:
class ArtistClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))

In [None]:
class ConvolutionalNN(ArtistClassificationBase):
    def __init__(self, n):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3, 32, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        
            nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Flatten(),
            nn.Linear(82944,1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512,n)
        )
    
    def forward(self, xb):
        return self.network(xb)

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)
  
def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.SGD):
    
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        
        model.train()
        train_losses = []
        for i, batch in enumerate(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            print(f'Batch Nr: {i + 1} done', end = '\r')
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    
    return history

In [None]:
num_epochs = 12
opt_func = torch.optim.Adam
lr = 0.001
n = len(train.dataset.dataset.classes)

model = ConvolutionalNN(n)

#fitting the model on training data and record the result after each epoch
history = fit(num_epochs, lr, model, train, dev, opt_func)

Epoch [0], train_loss: 3.9354, val_loss: 3.6182, val_acc: 0.1040 <br />
Epoch [1], train_loss: 3.6043, val_loss: 3.5282, val_acc: 0.1232 <br />
Epoch [2], train_loss: 3.4250, val_loss: 3.3825, val_acc: 0.1479 <br />
Epoch [3], train_loss: 3.2884, val_loss: 3.2186, val_acc: 0.1915 <br />
Epoch [4], train_loss: 3.1745, val_loss: 3.1242, val_acc: 0.2005 <br />
Epoch [5], train_loss: 3.1179, val_loss: 3.1436, val_acc: 0.2027 <br />
Epoch [6], train_loss: 2.9982, val_loss: 2.9989, val_acc: 0.2457 <br />
Epoch [7], train_loss: 2.9017, val_loss: 2.9357, val_acc: 0.2417 <br />
Epoch [8], train_loss: 2.7487, val_loss: 2.8826, val_acc: 0.2597 <br />
Epoch [9], train_loss: 2.5905, val_loss: 2.8124, val_acc: 0.2795 <br />
Epoch [10], train_loss: 2.3774, val_loss: 2.8283, val_acc: 0.2758 <br />
Epoch [11], train_loss: 2.1264, val_loss: 2.9200, val_acc: 0.2785 <br />
Epoch [12], train_loss: 1.8482, val_loss: 3.2579, val_acc: 0.2582 <br />

## Accuracy

In [None]:
result = evaluate(model, test) # calculate accuracy
result

{'val_loss': 3.6311185359954834, 'val_acc': 0.09573063254356384}