In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io

MAIN MISSION: See if it is possible to accuratly predict in which genre is a painting in, if we compare the pictures in certain ways (E.g. colors, contrasts, light)

In [2]:
artists=pd.read_csv("artists.csv",index_col=["id"])
postimp=artists[((artists['genre'].str.contains("Post-Impressionism")==True))]
#We'll drop all the unnecessary columns
postimp.drop(columns=["years","nationality","bio","wikipedia","paintings"])

Unnamed: 0_level_0,name,genre
id,Unnamed: 1_level_1,Unnamed: 2_level_1
8,Vincent van Gogh,Post-Impressionism
28,Henri Matisse,"Impressionism,Post-Impressionism"
33,Henri de Toulouse-Lautrec,Post-Impressionism
35,Camille Pissarro,"Impressionism,Post-Impressionism"
38,Paul Cezanne,Post-Impressionism
41,Georges Seurat,Post-Impressionism
46,Paul Gauguin,"Symbolism,Post-Impressionism"


In [3]:
#reading in all the relevant paintings from the resized 
#we're using the last name
authors=[]
for author in postimp.name:
    authors.append(author.split(" ")[-1])
authors

['Gogh',
 'Matisse',
 'Toulouse-Lautrec',
 'Pissarro',
 'Cezanne',
 'Seurat',
 'Gauguin']

In [4]:
import random
import os


postimp_paintings=pd.DataFrame(columns=["Postimp","Pixeldata","Changed_data"])
for painting in os.listdir("paintings"):
    if painting.split("_")[-2] in authors:
        new_row = pd.Series(data={"Postimp": 1, "Pixeldata": skimage.io.imread(fname="paintings/" + painting)})
        postimp_paintings=postimp_paintings.append(new_row,ignore_index=True)   
    else:
        new_row = pd.Series(data={"Postimp": 0, "Pixeldata": skimage.io.imread(fname="paintings/" + painting)})
        postimp_paintings=postimp_paintings.append(new_row,ignore_index=True)

In [5]:
print(postimp_paintings.shape)

(8355, 3)


In [None]:
changed_data = pd.DataFrame(columns=["Changed_data"])
for i in range(len(postimp_paintings["Pixeldata"])):
    chosen_rows = []
    image=np.array(postimp_paintings["Pixeldata"][i]).copy()
    while len(chosen_rows)<len(image)/1.1:
        row_no= random.randint(0,len(image)-1)
        if row_no not in chosen_rows:
            chosen_rows.append(row_no)
    chosen_rows.sort(reverse=True)
    for i in chosen_rows:
        image = np.delete(image,i,0)
    changed_data = changed_data.append({"Changed_data": image,},ignore_index=True)
postimp_paintings.Changed_data = changed_data["Changed_data"]
postimp_paintings.drop(columns=["Pixeldata"],inplace=True)

In [None]:
data = postimp_paintings.copy()
data = data.rename(columns={"Postimp": "Post-Impressionism","Changed_data": "Pixels"})
skimage.io.imshow(data.Pixels[0])
plt.show()
skimage.io.imshow(data.Pixels[2])
plt.show()

https://www.johndcook.com/blog/2009/08/24/algorithms-convert-color-grayscale/ \
How to turn pictures to grayscale

In [None]:
from math import sqrt
def rgb2gray(image):
    for i in range(len(image)):
        for j in range(len(image[i])):
            try:
                image[i][j]=(image[i][j][0]+ image[i][j][1]+image[i][j][2])/3
            except:
                return image
    return image


In [None]:
def brightness(image):
    bright_average = 0
    bright_values = []
    for i in range(len(image)):
        for j in range(len(image[i])):
            bright_sum = 0
            #if picture is black and white, then it'll go in there 
            try:
                bright_sum += 0.299 * image[i][j][0] + 0.587 * image[i][j][1] + 0.114 * image[i][j][2]
            except:
                bright_sum += image[i][j]
        bright_values.append(bright_sum)
    for value in bright_values:
        bright_average += value
    bright_average=bright_average/len(bright_values)
    if bright_average >= 80:
        return 'light'
    elif bright_average > 40:
        return 'medium'
    else:
        return 'dark'
    
def contrast(gray_image):
    high_cont=0
    med_cont=0
    low_cont=0
    for i in range(len(gray_image)):
        high=0
        med=0
        low=0
        for j in range(len(gray_image[i]-6)):
            try:
                contrast=abs(int(gray_image[i][j])-int(gray_image[i][j+5]))
            except:
                contrast=60
            try:
                if contrast >= 80:
                    high += 1
                elif contrast >= 50:
                    med += 1
                else:
                    low += 1
            except:
                med +=1
        if high >= med & high >=low:
            high_cont+=1
        elif med >= high & med >=low:
            med_cont+=1
        else:
            low_cont+=1
    if high_cont >= med_cont & high_cont >=low_cont:
        return "high"
    elif med_cont >= high_cont & med_cont >=low_cont:
        return "medium"
    else:
        return "low"

Why are we using certain functions?\
    https://mymodernmet.com/post-impressionism/
    https://drawpaintacademy.com/post-impressionism/
    
    Main ideas behind post-impressionism: 
        1)EMOTIONAL SYMBOLISM 
        2)EVOCATIVE COLOR 
        3)DISTINCTIVE BRUSHSTROKES 
        4)POINTILLISM 
        5)JAPONISME 
        6)PRIMITIVISM 
        7)UNNATURAL USE OF LIGHT
    
Points 1, 5 and 6 are representing the characteristics of the subject matter for post-impressionst art. Points 2,3,4 and 7 however distinctly reference what is actually on the easel (est: molbert). So we will be implementing functions that can bring out these properties. 
    
EVOCATIVE COLOR: 
    We will be analizing the color palette to determine, how the unnatural use of color can determine whether or not a painting is a part of the post-impressionistic movement
    
DISTINCTIVE BRUSHSTROKES: \
    We will be analizing sharp contrasts of color in small distances. This should be an indication, that colors are not gradually blended and mixed, but the brushstrokes are visible and can be detected by a computer
    
POINTILLISM: \
    This is a lot like finding the distinctive brushstrokes, but here we will be looking at the painting in a smaller scale 
    
UNNATURAL USE OF LIGHT: \
    Like with the color scheme of the paintings, we will be looking for sharp contrasts, but here we are taking into account the lightness or darkness, which again, are not gradually blending, but strongly contradict oneanother

In [None]:
from math import sqrt 
    
#see funktsioon otsib üles keskmise värvi, aga võtab iga rgb osa ruutu ja siis l õpus leiab ruutjuure
#this function finds the "average color" of the square
def color_sq(image):
    average_colors_sq = []
    for i in range(len(image)):
        r = 0
        g = 0
        b = 0
        for j in range(len(image[i])):
            try:
                r += int(image[i][j][0])*int(image[i][j][0])
                g += int(image[i][j][1])*int(image[i][j][1])
                b += int(image[i][j][2])*int(image[i][j][2])
            except:
                return "dull"
        r = round(sqrt(r/len(image[i][j])))
        g = round(sqrt(g/len(image[i][j])))
        b = round(sqrt(b/len(image[i][j])))
        average_colors_sq.append((r+g+b)/3)
    average_color=0
    for rgb in average_colors_sq:
        average_color += rgb
    average_color = average_color/len(average_colors_sq)
    if average_color > 128:
        return "colorful"
    elif average_color > 64:
        return "average"
    else:
        return "dull"

In [None]:
at_data = pd.DataFrame(columns=["Bright_light","Bright_med","Bright_dark", "Contrast_high","Contrast_medium",
                             "Contrast_low", "Colorsq_light","Colorsq_med","Colorsq_dark"])
i= 0
brli = []
brmed = []
brdr = []
cohi = []
comed = []
colo = []
clsqli = []
clsqmed = []
clsqdr = []
while i < ((data.shape)[0]):
    print(i)
    bright = brightness(data.iloc[:,1][i])
    if bright == "light":
        brli.append(1)
        brmed.append(0)
        brdr.append(0)
    elif bright == "medium":
        brli.append(0)
        brmed.append(1)
        brdr.append(0)
    else:
        brli.append(0)
        brmed.append(0)
        brdr.append(1)
    
    con = contrast(rgb2gray(data.iloc[:,1][i]))
    if con == "high":
        cohi.append(1)
        comed.append(0)
        colo.append(0)
    elif con == "medium":
        cohi.append(0)
        comed.append(1)
        colo.append(0)
    else:
        cohi.append(0)
        comed.append(0)
        colo.append(1)
    
    colorsq = color_sq(data.iloc[:,1][i])
    if colorsq == "light":
        clsqli.append(1)
        clsqmed.append(0)
        clsqdr.append(0)
    elif colorsq == "medium":
        clsqli.append(0)
        clsqmed.append(1)
        clsqdr.append(0)
    else:
        clsqli.append(0)
        clsqmed.append(0)
        clsqdr.append(1)
        
    i+=1

at_data["Bright_light"] = brli
at_data["Bright_med"] = brmed
at_data["Bright_dark"] = brdr
at_data["Colorsq_light"] = clsqli
at_data["Colorsq_med"] = clsqmed
at_data["Colorsq_dark"] = clsqdr
at_data["Contrast_high"] = cohi
at_data["Contrast_medium"] = comed
at_data["Contrast_low"] = colo
at_data.head(30)

In [None]:
alldata = pd.concat([data, at_data], axis=1, sort=False)
at_data["Contrast_medium"] = comed
alldata.head()

In [None]:
paintings = alldata.drop(columns=["Pixels"])
target = paintings["Post-Impressionism"]
data = paintings.drop(columns=["Post-Impressionism"])
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

X_train, X_test, y_train, y_test = train_test_split(data,target, train_size = 0.7,random_state=0)
y_train=y_train.astype('int')
y_test=y_test.astype('int')

In [None]:
model = KNeighborsClassifier(n_neighbors = 3)
model.fit(X_train,y_train)
acc = accuracy_score(y_test, model.predict(X_test))
acc

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

svm_rbf_1 = SVC(kernel='rbf', gamma='auto').fit(X_train,y_train)
acc2 = accuracy_score(y_test, svm_rbf_1.predict(X_test))
print(acc2)
svm_rbf_2 = SVC(kernel='rbf', gamma='auto').fit(X_train,y_train)
acc = accuracy_score(y_test, svm_rbf_2.predict(X_test))
print(acc)

In [None]:
rf_1 = RandomForestClassifier(n_estimators=100,max_depth=4,random_state=0).fit(X_train,y_train)
acc = accuracy_score(y_test, rf_1.predict(X_test))
print(acc)
rf_2 = RandomForestClassifier(n_estimators=100,max_depth=4,random_state=0).fit(X_train,y_train)
acc = accuracy_score(y_test, rf_2.predict(X_test))
print(acc)

In [None]:
svm_1 = SVC(kernel='linear').fit(X_train,y_train)
acc = accuracy_score(y_test, svm_1.predict(X_test))
print(acc)
svm_2 = SVC(kernel='linear').fit(X_train,y_train)
acc = accuracy_score(y_test, svm_2.predict(X_test))
print(acc)

In [None]:
svm_poly_1 = SVC(kernel='poly',degree=2,gamma='auto').fit(X_train,y_train)
acc = accuracy_score(y_test, svm_poly_1.predict(X_test))
print(acc)
svm_poly_2 = SVC(kernel='poly',degree=2,gamma='auto').fit(X_train,y_train)
acc = accuracy_score(y_test, svm_poly_2.predict(X_test))
print(acc)