In [None]:
import matplotlib.pyplot as plt
import requests
from PIL import Image
import pandas as pd
from sklearn.model_selection import train_test_split
import cv2
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
import os 
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from keras.regularizers import l2
from keras.layers import Convolution2D, Dropout, Dense, Flatten, Activation, GlobalAveragePooling2D
from keras.models import Sequential
from keras.models import Model,  load_model
from tensorflow.keras.applications.resnet import preprocess_input
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.utils import class_weight
from keras.metrics import Precision, Recall
from sklearn.metrics import roc_auc_score
from keras.applications.resnet50 import decode_predictions

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook as tqdm


In [None]:
artworks_df = pd.read_parquet('https://kuleuven-datathon-2023.s3.eu-central-1.amazonaws.com/data/Artwork.parquet.gzip')
generated_df = pd.read_parquet('https://kuleuven-datathon-2023.s3.eu-central-1.amazonaws.com/data/Generated.parquet.gzip')

In [None]:
artworks_df = artworks_df[artworks_df['rating']>0 ]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# https://drive.google.com/drive/folders/1O22SRGsRG4RWNEk0zj84Orda4Xs3d8fU?usp=sharing
os.chdir('/content/drive/MyDrive/datathon_images_23/rt/')
x_img = []
for i in tqdm(os.listdir()):
      img = cv2.imread(i)   
      img = cv2.resize(img,(224,224)) # The VGG model just accept images on the 224x224 format
      x_img.append(img)

  0%|          | 0/4546 [00:00<?, ?it/s]

In [None]:
# https://drive.google.com/drive/folders/1McDyyAV7Lx4QnCWuqWQos-rQKs6DlG7k?usp=sharing
#os.chdir('/content/drive/MyDrive/datathon_images_23/no_rt/')
#x_img_no_rt = []
#for i in tqdm(os.listdir()):
#      img = cv2.imread(i)   
#      img = cv2.resize(img,(224,224)) # The VGG model just accept images on the 224x224 format
#      x_img_no_rt.append(img)

  0%|          | 0/5971 [00:00<?, ?it/s]

In [None]:
# https://drive.google.com/drive/folders/1uv5v2qZco7KYuGAxhgVz5OTMah9RcLXB?usp=sharing
os.chdir('/content/drive/MyDrive/datathon_images_23/AI/')
x_img_ai = []
for i in tqdm(os.listdir()):
      img = cv2.imread(i)   
      img = cv2.resize(img,(224,224)) # The VGG model just accept images on the 224x224 format
      x_img_ai.append(img)

  0%|          | 0/3446 [00:00<?, ?it/s]

In [None]:
x_joined = x_img +  x_img_ai # +    x_img_no_rt 

In [None]:
y_joined = np.concatenate((np.repeat(1, len(artworks_df), axis=0), 
                           np.repeat(0, len(generated_df)-1, axis=0)) )

In [None]:
# Split the data into train and test sets (80-20 split)
x_trainval, x_test, y_trainval, y_test = train_test_split(x_joined, y_joined, test_size=0.2, random_state=10)

# Split the train set into train and validation sets (75-25 split)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.25, random_state=10)


In [None]:
# Compute class weights
class_weights = class_weight.compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(y_train),
                                        y = y_train                                                    
                                    )

# Convert class weights to a dictionary for use in model.fit()
class_weights_dict = dict(enumerate(class_weights))

In [None]:
le = preprocessing.LabelEncoder()
y_train = le.fit_transform(y_train)
y_val = le.fit_transform(y_val)
y_train = tf.keras.utils.to_categorical(y_train, num_classes=2)
y_val = tf.keras.utils.to_categorical(y_val, num_classes=2)

In [None]:
x_train = np.array(x_train)
x_val = np.array(x_val) 
y_train = np.array(y_train)
y_val = np.array(y_val)

In [None]:
print("x_train Shape: ", x_train.shape) 
print("x_val Shape: ", x_val.shape)
print("y_train Shape: ", y_train.shape) 
print("y_val Shape: ", y_val.shape)

x_train Shape:  (4794, 224, 224, 3)
x_val Shape:  (1599, 224, 224, 3)
y_train Shape:  (4794, 2)
y_val Shape:  (1599, 2)


In [None]:
# Data Augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True 
)

In [None]:
# Data Augmentation

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
 

In [None]:
# Define the pre-trained model
pretrained_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the pre-trained layers
for layer in pretrained_model.layers:
    layer.trainable = False


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Add a custom output layer for classification
resnet = pretrained_model.output
resnet = tf.keras.layers.GlobalAveragePooling2D()(resnet)
resnet  = tf.keras.layers.Dense(2, activation='softmax')(resnet)

In [None]:
# Define the model
model_resnet = tf.keras.Model(inputs=pretrained_model.input, outputs=resnet)

In [None]:
# Compile the model with mean squared error loss and mean absolute error metric
model_resnet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', Precision(), Recall(), 'AUC'])

In [None]:
# add early stopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5)

In [None]:
# Train the model with the ImageDataGenerator
history = model_resnet.fit(
    train_datagen.flow(x_train, y_train, batch_size=60),
    epochs=20,
    steps_per_epoch = len(x_train) // 60,
    validation_data= val_datagen.flow(x_val, y_val, batch_size=60),
    validation_steps  =len(x_val) // 60,
    callbacks=[early_stopping],
    class_weight=class_weights_dict
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# https://drive.google.com/drive/folders/1-hkLWuflKAtd7iKRNwHIhaK-vTR5hoDQ?usp=sharing
model_resnet.save('/content/drive/MyDrive/datathon_images_23/trained/resnet50_trained.h5')

In [None]:
# Load model
# https://drive.google.com/drive/folders/1-hkLWuflKAtd7iKRNwHIhaK-vTR5hoDQ?usp=sharing
model = load_model('/content/drive/MyDrive/datathon_images_23/trained/resnet50_trained.h5')

In [None]:
# Load the test data and labels

In [None]:
# Load the test data and labels
le_1 = preprocessing.LabelEncoder()
y_test_1 = le.fit_transform(y_test)
y_test_1 = tf.keras.utils.to_categorical(y_test_1, num_classes=2)
y_test_1 = np.array(y_test_1)

In [None]:
# Define the test data generator
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)


In [None]:
# Generate augmented images for the test set
test_generator = test_datagen.flow(
    np.array(x_test), y_test,
    batch_size=60,
    shuffle=False
)

In [None]:
test_metrics = model_resnet.evaluate(test_datagen.flow(
    np.array(x_test),y_test_1,
    batch_size=60,
    shuffle=False
) )



In [None]:
test_metrics

[0.08302640914916992,
 0.9706066250801086,
 0.9706066250801086,
 0.9706066250801086,
 0.9918256998062134]

In [None]:
def decode_binary_predictions(preds, top=1):
    # Return top predicted class label and score for binary classification
    class_indices = ['AI Generated', 'No AI Generated']  # Replace with your actual class labels
    results = []
    for pred in preds:
        top_indices = pred.argsort()[-top:][::-1]
        result = [(class_indices[i], pred[i]) for i in top_indices]
        results.append(result)
    return results

In [None]:
# Load image
#img = cv2.imread('/content/drive/MyDrive/datathon_images_23/no_rt/10517.jpg') 
#img = cv2.imread('/content/drive/MyDrive/datathon_images_23/rt/10210.jpg')
img = cv2.imread('/content/drive/MyDrive/datathon_images_23/AI/2047.jpg')
img = cv2.resize(img,(224,224))
img_np = np.array(img) 
 

In [None]:
# Preprocessing
img_batch = np.expand_dims(img_np, axis = 0)
processed_image = preprocess_input(img_batch.copy()) 

In [None]:
predictions = model.predict(processed_image) 
predictions



array([[1.0000000e+00, 3.1215173e-11]], dtype=float32)

In [None]:
decoded_preds = decode_binary_predictions(predictions, top=1)
print(decoded_preds)

[[('AI Generated', 1.0)]]
