In [2]:
import pandas as pd
import cv2
import tensorflow as tf
import numpy as np
from sklearn import model_selection
from sklearn.model_selection import train_test_split
import keras
import matplotlib.pyplot as plt

In [11]:
# GLOBAL VARIABLES & DATASET LOAD
IMAGE_SIZE = 256
BATCH_SIZE = 8
EPOCHS = 20

df = pd.read_csv("markdown_3.csv",
    names=["filename", "dates", "material_1", "material_2",
           "material_3", "technique", "stamps", "casing"], dtype={'casing': bool})
df.head()

filename = df['filename'].to_list() # make list of all file paths to images
#filename

In [12]:
df = df.replace(np.nan, None)

In [13]:
df

Unnamed: 0,filename,dates,material_1,material_2,material_3,technique,stamps,casing
0,dataset\0001.jpg,XV в.,Дерево,Левкас,,Темпера,False,False
1,dataset\0002.jpg,XVI в.,Дерево,Левкас,,Темпера,False,False
2,dataset\0003.jpg,XV в.,Дерево,Левкас,,Темпера,False,False
3,dataset\0004.jpg,XVI в.,Дерево,Левкас,,Темпера,False,False
4,dataset\0005.jpg,XVI–XVII в.,Дерево,Левкас,Паволока,Темпера,False,False
...,...,...,...,...,...,...,...,...
1123,dataset\1124.jpg,XVII в.,Дерево,Левкас,Паволока,Темпера,False,False
1124,dataset\1125.jpg,XVI в.,Дерево,,,Темпера,False,False
1125,dataset\1126.jpg,XVII в.,Дерево,Левкас,Паволока,Темпера,False,False
1126,dataset\1127.jpg,XVI в.,Дерево,,,Темпера,False,False


In [3]:
# CHANGING IMAGE SIZE TO SQUARE
import os
def padding(path):
    # read image
    img = cv2.imread(os.path.join(os.path.curdir, path))
    old_h, old_w, channels = img.shape

    # create new image of desired size and color (white) for padding
    new_w = max(old_h, old_w)
    new_h = max(old_h, old_w)
    color = (255,255,255)
    result = np.full((new_h,new_w, channels), color, dtype=np.uint8)

    # compute center offset
    x_center = (new_w - old_w) // 2
    y_center = (new_h - old_h) // 2

    # copy img image into center of result image
    result[y_center:y_center+old_h, 
        x_center:x_center+old_w] = img

    return result

In [4]:
# PREPROCESSING TRAINING DATA
pic_matrix = [] # will contain numerical representation of images

for i in filename:
    image = padding(i)
    image = cv2.resize(np.array(image), (IMAGE_SIZE, IMAGE_SIZE))
    image = image/255
    pic_matrix.append(image)

# pic_matrix

In [9]:
dates = df['dates'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(dates)) # словарь

for i in dates:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

['XIII в.', 'ХIХ в.', 'XIV в.', 'XVIII–ХIХ в.', 'XVI в.', 'XII в.', 'XVII–XVIII в.', 'XV в.', 'XIX–XX в.', 'XVIII в.', 'XX в.', 'XVII в.', 'XVI–XVII в.']
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}


In [8]:
material_1 = df['material_1'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(material_1)) # словарь

for i in material_1:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

['Холст', 'Дерево', 'Медный сплав', 'Известковая штукатурка', 'Бронза']
{0, 1, 2, 3, 4}


In [3]:
material_2 = df['material_2'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(material_2)) # словарь

for i in material_2:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

[nan, 'Жесть', 'Бумага', 'Левкас', 'Посеребренье', 'Эмаль']
{0, 1, 2, 3, 4, 5}


In [4]:
material_3 = df['material_3'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(material_3)) # словарь

for i in material_3:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

[nan, 'Позолота', 'Бархат', 'Паволока']
{0, 1, 2, 3}


In [5]:
technique = df['technique'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(technique)) # словарь

for i in technique:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

[nan, 'Масло', 'Темпера', 'Смешанная', 'Чеканка', 'Хромолитография', 'Резьба', 'Литье']
{0, 1, 2, 3, 4, 5, 6, 7}


In [6]:
stamps = df['stamps'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(stamps)) # словарь

for i in stamps:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

[False, True]
{0, 1}


In [7]:
casing = df['casing'].to_list() # make list of all dates

#dates
label = [] # will contain numerical representaion of dates
l = list(set(casing)) # словарь

for i in casing:
    label.append(l.index(i)) # each date will equal its index in dict

print(l)
print(set(label))

[False, True]
{0, 1}


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(pic_matrix, label, test_size=0.2)
# del(pic_matrix)

In [10]:
# ds_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
# ds_train = ds_train.batch(BATCH_SIZE, drop_remainder=True)

# ds_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
# ds_test = ds_train.batch(BATCH_SIZE, drop_remainder=True)

ds_train = tf.data.Dataset.from_tensor_slices((pic_matrix, label))
ds_train = ds_train.batch(BATCH_SIZE, drop_remainder=True)

In [11]:
class_num = len(l) # number of classes (in this case, number of different dates)

model = keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation="relu", input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation="relu" ),
    tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu" ),
    tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(class_num, activation='softmax'), #num of classes from len(l)
])

In [None]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

model.summary()
history = model.fit(ds_train, epochs=EPOCHS)

In [None]:
# VISUALISATION OF RESULTS
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [2]:
a = int(7/2)

print(a)

3
