# Art Movements Aggregator

We want to aggregate art movements into a few classes, primarily due to the existence of movements with very few observations.

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
paintings = pd.read_csv('data/athenaeum_painting_filtered.csv')
authors = pd.read_csv('data/athenaeum_authors.csv')

In [3]:
paintings.head()

Unnamed: 0,medium,painting_location,height,article_type,painting_url,painting_dates,painting_title,author_id,painting_id,width,height_px,width_px
0,oil on canvas,Private collection,16.2,Painting,http://www.the-athenaeum.org/art/display_image...,1915,"Zucchini, Tomatoes and Eggplant",24,104839,25.0,842,1280
1,oil on canvas,Private collection,,Painting,http://www.the-athenaeum.org/art/display_image...,1899-1901,"Waterloo Bridge, Hazy Sun",13,3831,,710,1103
2,oil on canvas,Art Institute of Chicago (United States - Chi...,65.0,Painting,http://www.the-athenaeum.org/art/display_image...,1900,"Waterloo Bridge, Grey Weather",13,3268,93.0,571,800
3,oil on canvas,Ordrupgaard Collection - Copenhagen (Denmark ...,,Painting,http://www.the-athenaeum.org/art/display_image...,1899-1901,"Waterloo Bridge, Grey Weather",13,3817,,682,1066
4,oil on canvas,Ashmolean Museum of Art and Archaeology - Univ...,41.0,Painting,http://www.the-athenaeum.org/art/display_image...,1871,Windmill near Zaandam,13,3076,72.5,590,1056


In [4]:
authors.head()

Unnamed: 0,first_name,last_name,death_year,bio_url,nationality,art_movement,author_id,bio_info,birth_year
0,Pierre Auguste,Renoir,1919.0,http://www.the-athenaeum.org/people/detail.php...,French,Impressionist,24,French Impressionist Painter.\nOne of the most...,1841
1,Oscar-Claude,Monet,1926.0,http://www.the-athenaeum.org/people/detail.php...,French,Impressionist,13,"French Impressionist painter (Paris, 14 Novemb...",1840
2,John Singer,Sargent,1925.0,http://www.the-athenaeum.org/people/detail.php...,American,Impressionist,368,American impressionist painter.\nA prolific ar...,1856
3,Joseph Mallord William,Turner,1851.0,http://www.the-athenaeum.org/people/detail.php...,English,Romantic,1315,English romantic painter and watercolourist.\n...,1775
4,Vincent Willem van,Gogh,1890.0,http://www.the-athenaeum.org/people/detail.php...,Dutch,Post-Impressionist,789,Dutch Post-Impressionist painter who remains o...,1853


In [5]:
authors = authors.dropna(subset = ['art_movement'])

In [6]:
authors.shape

(1391, 9)

In [7]:
paintings_with_movement = paintings[['author_id', 'painting_id']]\
        .merge(authors[['author_id', 'art_movement']], on = 'author_id', how = 'inner')

In [8]:
paintings.shape

(206550, 12)

In [9]:
paintings_with_movement.shape

(95799, 3)

In [10]:
paintings_with_movement.head()

Unnamed: 0,author_id,painting_id,art_movement
0,24,104839,Impressionist
1,24,95487,Impressionist
2,24,6031,Impressionist
3,24,5957,Impressionist
4,24,4573,Impressionist


### From http://www.the-athenaeum.org/art/movements.php:

    Abstract art (1914-1960)
        - Suprematism (1913-1927)
        - De Stijl (1917-1931)
        - Abstraction-Création (1931-1936)
        - Abstract Expressionism (1940-1960)
    Academism (1850-1910)
    Art Nouveau (1890-1910)
    Ashcan school (1908-1920)
    Baroque (1590-1720)
        - Caravaggisti (1590-1640)
        - Dutch Golden Age (1600-1680)
        - Classicism (1610-1700)
    Early Netherlandish (1400-1530)
    Expressionism (1905-1940)
        - Fauvism (1904-1912)
        - Die Brücke (1905-1912)
        - Cubism (1907-1925)
        - Futurism (1909-1944)
        - Der Blaue Reiter (1910-1914)
        - Dada (1916-1924)
        - Bauhaus (1919-1932)
    Impressionism (1863-1920)
        - Skagen Painters (1878-1910)
        - Old Lyme Colony (1900-1915)
    Neoclassicism (1760-1820)
    Pop Art (1955-1975)
    Post-Impressionism (1885-1914)
        - Pointilism (1885-1910)
        - Les Nabis (1888-1905)
    Pre-Raphaelites (1848-1910)
    Realism (1830-1900)
        - Hudson River School (1825-1870)
        - Barbizon school (1830-1880)
        - Peredvizhniki (1863-1890)
    Renaissance (1440-1590)
        - Early Renaissance (1420-1500)
        - High Renaissance (1500-1530)
        - Northern Renaissance (1500-1590)
        - Mannerism (1520-1600)
    Rococo (1710-1780)
    Romanticism (1790-1850)
        - Nazarene (1809-1850)
    Surrealism (1924-1970)
    Symbolism (1870-1910)
    Veduta (1720-1770)

In [11]:
def convert_art_movement(movement):
    return art_movement_conversor_key.get(movement, movement)

In [12]:
def get_new_movement_counts(df):
    return df['art_movement'].apply(convert_art_movement).value_counts()

In [13]:
art_movement_conversor_key = {'Nazarene': 'Romantic',
                             'Abstraction-Création': 'Abstract',
                             'High Renaissance': 'Renaissance',
                             'Futurist': 'Expressionist',
                             'Bauhaus': 'Expressionist',
                             'De Stijl': 'Abstract',
                             'Fauvist': 'Expressionist',
                             'Early Renaissance': 'Renaissance',
                             'Suprematist': 'Abstract',
                             'Pointilist': 'Post-Impressionist',
                             'Mannerism': 'Renaissance',
                             'Caravaggisti': 'Baroque',
                             'Nabi': 'Post-Impressionist',
                             'Skagen': 'Impressionist',
                             'Northern Renaissance': 'Renaissance',
                             'Old Lyme Colony': 'Impressionist',
                             'Barbizon': 'Realist',
                             'Peredvizhniki': 'Realist',
                             'Hudson River School': 'Realist',
                             'Dutch Golden Age': 'Baroque'}

# Unhandled: Surrealist, Art Nouveau, Early Netherlandish, Abstract, Veduta, Ashcan, Pre-Raphaelite, Classicist,
# Neoclassicist, Symbolist, Rococo, Expressionist, Romantic, Academist, Renaissance, Post-Impressionist, Realist,
# Baroque, Impressionist

In [14]:
get_new_movement_counts(paintings_with_movement)[::-1]

Surrealist               159
Art Nouveau              225
Early Netherlandish      348
Abstract                 667
Veduta                  1090
Ashcan                  1134
Pre-Raphaelite          1568
Classicist              1934
Neoclassicist           2448
Symbolist               3660
Rococo                  4770
Expressionist           5144
Romantic                5523
Academist               6081
Renaissance             8484
Post-Impressionist      9943
Realist                11458
Baroque                13028
Impressionist          18135
Name: art_movement, dtype: int64

In [15]:
authors_new_movement = authors.loc[:, ['author_id', 'art_movement']]
authors_new_movement['sup_art_movement'] = authors_new_movement['art_movement'].apply(convert_art_movement)
paintings_to_export = paintings.merge(authors_new_movement, how = 'inner', on = 'author_id')

In [16]:
paintings_to_export.head()

Unnamed: 0,medium,painting_location,height,article_type,painting_url,painting_dates,painting_title,author_id,painting_id,width,height_px,width_px,art_movement,sup_art_movement
0,oil on canvas,Private collection,16.2,Painting,http://www.the-athenaeum.org/art/display_image...,1915,"Zucchini, Tomatoes and Eggplant",24,104839,25.0,842,1280,Impressionist,Impressionist
1,oil on canvas,Private collection,25.5,Painting,http://www.the-athenaeum.org/art/display_image...,1872,Young Woman with an Umbrella,24,95487,19.4,1280,981,Impressionist,Impressionist
2,oil on canvas,Private collection,,Painting,http://www.the-athenaeum.org/art/display_image...,1899,Yvonne and Jean,24,6031,,880,1053,Impressionist,Impressionist
3,oil on canvas,Musée de l'Orangerie (France - Paris),,Painting,http://www.the-athenaeum.org/art/display_image...,1897,Yvonne and Christine Lerolle at the Piano,24,5957,,752,983,Impressionist,Impressionist
4,oil on canvas,Private collection,76.0,Painting,http://www.the-athenaeum.org/art/display_image...,1886,Young Woman with Crane,24,4573,61.0,1070,854,Impressionist,Impressionist


In [17]:
paintings_to_export.to_csv('data/athenaeum_painting_movement.csv', index = False)

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
paintings_movement_train, paintings_movement_test = train_test_split(paintings_to_export, test_size = 0.3,
                                                                     stratify = paintings_to_export['sup_art_movement'])

In [20]:
paintings_movement_train.to_csv('data/athenaeum_painting_movement_train.csv', index = False)

In [21]:
paintings_movement_test.to_csv('data/athenaeum_painting_movement_test.csv', index = False)