# Projeto - Classificação de Pinturas com CNN

## Imports das bibliotecas necessárias

In [26]:
# instalação da lib do Kaggle (kagglehub) para realizar download
!pip install kagglehub



In [1]:
# imports de bibliotecas usadas
import kagglehub
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision.transforms import v2

## Importação do [dataset do Kaggle](https://www.kaggle.com/datasets/steubk/wikiart)

In [2]:
# download a partir do kaggle (usando biblioteca kagglehub)

path = kagglehub.dataset_download("steubk/wikiart")
print("Path to dataset files:", path) # Path to dataset files: /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1

Downloading from https://www.kaggle.com/api/v1/datasets/download/steubk/wikiart?dataset_version_number=1...


100%|██████████| 31.4G/31.4G [06:30<00:00, 86.2MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1


In [3]:
list_directory = os.listdir(path)
print()
print(len(list_directory), list_directory)


29 ['Fauvism', 'Baroque', 'Early_Renaissance', 'Analytical_Cubism', 'Symbolism', 'Color_Field_Painting', 'Synthetic_Cubism', 'Romanticism', 'Ukiyo_e', 'Action_painting', 'New_Realism', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Contemporary_Realism', 'Impressionism', 'Mannerism_Late_Renaissance', 'Abstract_Expressionism', 'Pop_Art', 'Pointillism', 'Minimalism', 'Realism', 'Naive_Art_Primitivism', 'Rococo', 'High_Renaissance', 'Northern_Renaissance', 'wclasses.csv', 'Expressionism', 'classes.csv']


In [4]:
# classes.csv
path = '/root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/'
dataset_path = os.path.join(path, 'classes.csv')
df = pd.read_csv(dataset_path)

### Para obtenção de melhores resultados, em comparação com os obtidos usando Keras, algumas medidas vão ser tomadas:

- Retirada de imagens que apresentam mais de uma classificação

In [5]:
# retirada de imagens com mais de uma classificação
# exclusão desses dados é feita no sistema (antes de carregamento dos dados para um dataset)

# lista para salvar índices das instâncias a serem excluídas do df (pois os dados correspondentes foram excluídos)
instance_remove = []

for index, instance in df.iterrows():
  file_path = str(instance['filename'])
  image_classes = str(instance['genre'])

  # busca uma vírgula na string das classes da pintura
  if (image_classes.find(',') != -1):
    # encontrou vírgula
    file_path = os.path.join(path, file_path)
    try:
      os.remove(file_path)
      print('Removed: ', file_path)
    except Exception as e:
      print('Error: failed', file_path)
    # adiciona à lista o índice da linha para ser excluída do df
    instance_remove.append(index)

# retira as linhas com base nos índices
df = df.drop(instance_remove)
# reorganiza índices; drop=True faz com que uma coluna Index NÃO seja inserida
df = df.reset_index(drop=True)

Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/brice-marden_untitled-press-series-1972-1.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_al-lazar-man-in-a-hotel-room-1954.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_harold-rosenberg-1956.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_john-f-kennedy-1962.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_john-f-kennedy-1963-1.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_john-f-kennedy-1963.jpg
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism/elaine-de-kooning_portrait-of-jack-greenbaum-1959.jpg
Removed:  /root/.cache/kagglehub/da

- Retirada de classes com poucas instâncias

In [6]:
len(df), df['genre'].value_counts()

(78642,
 genre
 ['Impressionism']                 12847
 ['Realism']                       10534
 ['Romanticism']                    6896
 ['Expressionism']                  6280
 ['Post Impressionism']             6274
 ['Baroque']                        4202
 ['Symbolism']                      4174
 ['Art Nouveau Modern']             4155
 ['Abstract Expressionism']         2574
 ['Northern Renaissance']           2550
 ['Naive Art Primitivism']          2299
 ['Rococo']                         2070
 ['Cubism']                         2002
 ['Color Field Painting']           1486
 ['Pop Art']                        1447
 ['Early Renaissance']              1387
 ['High Renaissance']               1339
 ['Mannerism Late Renaissance']     1275
 ['Minimalism']                     1250
 ['Ukiyo e']                        1159
 ['Fauvism']                         747
 ['Pointillism']                     490
 ['Contemporary Realism']            481
 ['New Realism']                     312
 

In [7]:
# vamos retirar algumas das classes que apresentam poucas instâncias de dados
# Fauvism, Pointillism, Contemporary Realism, New Realism, Synthetic Cubism, Analytical Cubism, Action painting
#  ['Fauvism']                         747
#  ['Pointillism']                     490
#  ['Contemporary Realism']            481
#  ['New Realism']                     312
#  ['Synthetic Cubism']                216
#  ['Analytical Cubism']               110
#  ['Action painting']                  86

# a retirada das classes é feita antes da captura do dataset a partir do diretório

import os
import shutil

list_directory = os.listdir(path)
print(len(list_directory), list_directory)

direc_remove = ['Fauvism', 'Pointillism', 'Contemporary_Realism', 'New_Realism', 'Synthetic_Cubism', 'Analytical_Cubism', 'Action_painting']

for folder in direc_remove:
  folder_path = os.path.join(path, folder)
  try:
    if os.path.isdir(folder_path):
      shutil.rmtree(folder_path)
      print('Removed: ', path + folder, end='. ')
  except Exception as e:
    print('Error: failed', path + folder, end='. ')

list_directory = os.listdir(path)
print()
print(len(list_directory), list_directory)

29 ['Fauvism', 'Baroque', 'Early_Renaissance', 'Analytical_Cubism', 'Symbolism', 'Color_Field_Painting', 'Synthetic_Cubism', 'Romanticism', 'Ukiyo_e', 'Action_painting', 'New_Realism', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Contemporary_Realism', 'Impressionism', 'Mannerism_Late_Renaissance', 'Abstract_Expressionism', 'Pop_Art', 'Pointillism', 'Minimalism', 'Realism', 'Naive_Art_Primitivism', 'Rococo', 'High_Renaissance', 'Northern_Renaissance', 'wclasses.csv', 'Expressionism', 'classes.csv']
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Fauvism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Pointillism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Contemporary_Realism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/New_Realism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Synthetic_Cubism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Ana

- Escolha de classes que apresentem imagens com padrão visível
  - no dataset, classes como 'Pop Art', 'Fauvism', apresentam dados muito variados e de difícil verificação do padrão presente. Para essas classes, o modelo teve baixa precisão em classificá-las. Assim, a medida de decisão de quais classes serão retiradas com base nesse aspecto vai ser realizada.
  - classes que serão retiradas com base nessa análise:

In [8]:
list_directory = os.listdir(path)
print(len(list_directory), list_directory)

direc_remove = ['Expressionism', 'Pop_Art', 'Naive_Art_Primitivism', 'Abstract_Expressionism']

for folder in direc_remove:
  folder_path = os.path.join(path, folder)
  try:
    if os.path.isdir(folder_path):
      shutil.rmtree(folder_path)
      print('Removed: ', path + folder, end='. ')
  except Exception as e:
    print('Error: failed', path + folder, end='. ')

list_directory = os.listdir(path)
print()
print(len(list_directory), list_directory)

22 ['Baroque', 'Early_Renaissance', 'Symbolism', 'Color_Field_Painting', 'Romanticism', 'Ukiyo_e', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Impressionism', 'Mannerism_Late_Renaissance', 'Abstract_Expressionism', 'Pop_Art', 'Minimalism', 'Realism', 'Naive_Art_Primitivism', 'Rococo', 'High_Renaissance', 'Northern_Renaissance', 'wclasses.csv', 'Expressionism', 'classes.csv']
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Expressionism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Pop_Art. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Naive_Art_Primitivism. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Abstract_Expressionism. 
18 ['Baroque', 'Early_Renaissance', 'Symbolism', 'Color_Field_Painting', 'Romanticism', 'Ukiyo_e', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Impressionism', 'Mannerism_Late_Renaissance', 'Minimalism', 'Realism', 'Rococo', 'High_Renaissance', 'Northern_Re

- União de classes semelhante
  - Há 4 classes relativas à Renascença; os diretórios serão unidos em somente um

In [14]:
renaissances_dirs = ['Mannerism_Late_Renaissance', 'Early_Renaissance', 'High_Renaissance', 'Northern_Renaissance']
one_renaissance_dir = path + '/Renaissance'

os.makedirs(one_renaissance_dir, exist_ok=True)

for dir in renaissances_dirs:
  # une o caminho em path aos diretórios
  source_path = os.path.join(path, dir)
  # verifica se caminho existe
  if os.path.exists(source_path):
    for file_name in os.listdir(source_path):
      file_name = os.path.join(source_path, file_name)
      if os.path.isfile(file_name):
        try:
          shutil.copy(file_name, one_renaissance_dir)
          print('Cópia do arquivo realizada: ', file_name)
        except Exception as e:
          print('Erro ao copiar arquivo: ', file_name)
          print('Erro:', e)
  else:
    print(f'Diretório não encontrado: {source_path}')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/carlo-crivelli_saint-mary-magdalene.jpg
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/sandro-botticelli_the-story-of-virginia-1504(1).jpg
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/antonello-da-messina_portrait-of-a-man-1.jpg
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/antonello-da-messina_st-jerome-penitet.jpg
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/domenico-veneziano_the-stigmatization-of-st-francis.jpg
Cópia do arquivo realizada:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance/sandro-botticelli_the-triumph-of-mordecai-from-the-story-of-esther-14

In [15]:
list_directory = os.listdir(one_renaissance_dir)
print()
print(len(list_directory), list_directory)


6558 ['lucas-cranach-the-elder_paradise-1536.jpg', 'michelangelo_ignudo-9.jpg', 'paolo-veronese_landscape-1.jpg', 'albrecht-durer_the-whore-of-baylon-1498.jpg', 'hans-holbein-the-younger_duke-anton-the-good-of-lorraine-1543.jpg', 'albrecht-altdorfer_the-battle-of-issus-fragment-1529-11.jpg', 'martin-schongauer_altarpiece-of-the-dominicans-the-mystical-hunt.jpg', 'albrecht-durer_apostle-bartholomew.jpg', 'jan-van-hemessen_parable-of-the-prodigal-son-2.jpg', 'petrus-christus_annunciation-and-nativity-1452.jpg', 'jean-hey_portrait-of-francis-de-chateaubriand.jpg', 'michelangelo_the-prophet-joel-1509.jpg', 'el-greco_mount-sinai-1570.jpg', 'jean-fouquet_theodoric-victory-over-the-danes-1460.jpg', 'fra-angelico_scenes-from-the-life-of-christ-1452.jpg', 'lucas-cranach-the-elder_charity.jpg', 'albrecht-durer_the-annunciation-1511.jpg', 'andrea-del-sarto_study-of-the-figures-behind-a-balustrade.jpg', 'paolo-veronese_sts-mark-james-and-jerome-with-the-dead-christ-borne-by-angels-1582.jpg', 'mas

In [16]:
list_directory = os.listdir(path)
print(len(list_directory), list_directory)

renaissances_dirs = ['Mannerism_Late_Renaissance', 'Early_Renaissance', 'High_Renaissance', 'Northern_Renaissance']

for folder in renaissances_dirs:
  folder_path = os.path.join(path, folder)
  try:
    if os.path.isdir(folder_path):
      shutil.rmtree(folder_path)
      print('Removed: ', path + folder, end='. ')
  except Exception as e:
    print('Error: failed', path + folder, end='. ')

list_directory = os.listdir(path)
print()
print(len(list_directory), list_directory)

19 ['Baroque', 'Early_Renaissance', 'Renaissance', 'Symbolism', 'Color_Field_Painting', 'Romanticism', 'Ukiyo_e', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Impressionism', 'Mannerism_Late_Renaissance', 'Minimalism', 'Realism', 'Rococo', 'High_Renaissance', 'Northern_Renaissance', 'wclasses.csv', 'classes.csv']
Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Mannerism_Late_Renaissance. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Early_Renaissance. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/High_Renaissance. Removed:  /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/Northern_Renaissance. 
15 ['Baroque', 'Renaissance', 'Symbolism', 'Color_Field_Painting', 'Romanticism', 'Ukiyo_e', 'Post_Impressionism', 'Art_Nouveau_Modern', 'Cubism', 'Impressionism', 'Minimalism', 'Realism', 'Rococo', 'wclasses.csv', 'classes.csv']


## Captura do dataset a partir do ImageFolder e pré-processamento dos dados com Torch

In [19]:
# pipeline de pré-processamento

pipeline_preprocess = v2.Compose([
    # redimensionamento das imagens; usa antialias para suavizar imagem (diminui perda de detalhes)
    v2.Resize(size=(220, 220), antialias=True),

    # seleciona uma porção da imagem de forma aleatória
    v2.RandomCrop(size=(220, 220)),

    # aleatoriamente gira a imagem
    v2.RandomHorizontalFlip(p=0.5),

    # normalização da imagem - pixels têm valores dentro de um intervalo bem delimitado de [0, 1]
    # porque: melhor convergência do modelo, previne sobreajuste/overfitting, suporta transferência de aprendizado
    v2.ToDtype(torch.float32, scale=True), # necessário que a imagem seja do tipo float para ser normalizada
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # processo de normalização em si
])

In [20]:
# captura do dataset aplicando o pré-processamento
data = torchvision.datasets.ImageFolder(path, transform=pipeline_preprocess)
data

Dataset ImageFolder
    Number of datapoints: 64368
    Root location: /root/.cache/kagglehub/datasets/steubk/wikiart/versions/1/
    StandardTransform
Transform: Compose(
                 Resize(size=[220, 220], interpolation=InterpolationMode.BILINEAR, antialias=True)
                 RandomCrop(size=(220, 220), pad_if_needed=False, fill=0, padding_mode=constant)
                 RandomHorizontalFlip(p=0.5)
                 ToDtype(scale=True)
                 Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False)
           )

In [21]:
data.classes

['Art_Nouveau_Modern',
 'Baroque',
 'Color_Field_Painting',
 'Cubism',
 'Impressionism',
 'Minimalism',
 'Post_Impressionism',
 'Realism',
 'Renaissance',
 'Rococo',
 'Romanticism',
 'Symbolism',
 'Ukiyo_e']

## DataLoader e Resample (downsample)