In [3]:
#Import necissary libraries, see readMe installations for additional guidance
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt


In [4]:
'''
Load the MetMeuseumData.csv into the dataframe called df
'''
df = pd.read_csv('../DATA/MetMuseumData.csv')  # adjust filename if needed

In [5]:
'''
In the dataframe each row represent a single art image. For each art image there is a column titled image_path which points to the appropriate image stored in the images folder found within the data folder.

This function goes through each of those images, and makes them into a processed tensor image so the image is machine readable by the model. To explore this idea more reference the "What is a tensor image" handout found within the Articles Folder.
'''
X = []
for path in df['image_path']:
    img = load_img(path, target_size=(224, 224))  # ensure RGB, size is correct
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)  # ResNet50 expects preprocessed input
    X.append(img_array)

X = np.array(X)

In [6]:
'''
Store binary label encodings in the variable y, where drawings -> 1, paintings -> 0
'''
y = df['label'].values  

In [7]:
#Prepare data for training and testing with 80, 20 split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#Initiation ResNet Model with input shape that matches the insput shape of images. 
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid') 
])

In [9]:
# Freeze base model layers to add layers ontop
# You could consider unfreezing if you want to attempt to fine tune the model at a more granular level but this is good for the purpose of the case study
for layer in base_model.layers:
    layer.trainable = False

In [10]:
#Compile the model declared above 
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


#Setup early stopping so if model stops improving at a certain rate it will stop early before over fitting to the data when called 
early_stop = EarlyStopping(patience=3, restore_best_weights=True)


In [11]:
#Fit the model to the data with 10 epochs, feel free to experiement with this value 
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32, callbacks=[early_stop])


Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 4s/step - accuracy: 0.3803 - loss: 0.9058 - val_accuracy: 0.4845 - val_loss: 0.8084
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 3s/step - accuracy: 0.5464 - loss: 0.7066 - val_accuracy: 0.6460 - val_loss: 0.6858
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 3s/step - accuracy: 0.7297 - loss: 0.5828 - val_accuracy: 0.6957 - val_loss: 0.5987
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 3s/step - accuracy: 0.7737 - loss: 0.5334 - val_accuracy: 0.7702 - val_loss: 0.5309
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 3s/step - accuracy: 0.8543 - loss: 0.4381 - val_accuracy: 0.8137 - val_loss: 0.4840
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 3s/step - accuracy: 0.8786 - loss: 0.4024 - val_accuracy: 0.8385 - val_loss: 0.4440
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━━━

In [None]:
#Evaluate the overall accuracy of the image classification model 
loss, acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {acc:.2f}")

In [None]:
# Create graphs to visuazlies the performance of the model over epochs to see how it improves over time. 
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Model Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()