In [2]:
import tensorflow as tf

import keras

import pandas

import sklearn

import matplotlib

from PIL import Image

import numpy as np

In [3]:
@tf.function
def scale_resize_image(image):
    image = tf.image.convert_image_dtype(image, tf.float32) # equivalent to dividing image pixels by 255
    image = tf.image.resize(image, (112, 112)) # Resizing the image to 224x224 dimention
    return (image)

In [4]:
def get_data_CNN(part, nb_parts):
    
    begin_index = int((1890/nb_parts)*part)
    middle_index = int(((1890/nb_parts)/4)*3)
    end_index = int((1890/nb_parts)*(part + 1))
    
    print("begin_index: " + str(begin_index))
    print("middle_index: " + str(middle_index))
    print("end_index: " + str(end_index))
    
    batch_size = end_index - begin_index
    
    print("batch_size: " + str(batch_size))
    
    
    # HANDLE IMPORTS

    import pandas
    import numpy as np
    import tensorflow as tf
    from PIL import Image
    
    # DOWNLOAD DATA FROM CSV

    images = pandas.read_csv('Data/an_art_images.csv')
    data = pandas.read_csv('Data/an_art_block_data.csv')
    
    # CONVERT DATA TO NUMPY ARRAYS

    images_array = np.zeros((batch_size,112,112,3))
    scores_array = np.zeros((batch_size,1))
    
    # IMAGES

    for i in range(batch_size):
        image_location = images[['image_filename']].values[begin_index + i][0]
        im = Image.open('Data/Images/Final_resized_selection_batch1/' + image_location)
        im = np.array(im)
        im = scale_resize_image(im)
        images_array[i] = im
        
    # SCORES

    avg_data = data.groupby(['image_id']).mean()

    for i in range(batch_size):
        scores_array[i] = avg_data[['response']].values[begin_index + i][0]

    abs_max = np.amax(np.abs(scores_array))
    scores_array = scores_array/abs_max
    
    # SPLIT DATA IN TWO PARTS FOR TRAINING

    scores_array_train = scores_array[:middle_index]
    images_array_train = images_array[:middle_index]

    scores_array_val = scores_array[middle_index:]
    images_array_val = images_array[middle_index:]
    
    print("scores_array_val.shape: " + str(scores_array_val.shape))
    print("images_array_val.shape: " + str(images_array_val.shape))
    
    
    return (scores_array_train,images_array_train), (scores_array_val, images_array_val)

In [5]:
def get_test_data_CNN():
    begin_index = 1890
        
    # HANDLE IMPORTS

    import pandas
    import numpy as np
    import tensorflow as tf
    from PIL import Image
    
    # DOWNLOAD DATA FROM CSV

    images = pandas.read_csv('Data/an_art_images.csv')
    data = pandas.read_csv('Data/an_art_block_data.csv')
    
    # CONVERT DATA TO NUMPY ARRAYS

    images_array = np.zeros((100,112,112,3))
    scores_array = np.zeros((100,1))
    
    # IMAGES

    for i in range(100):
        image_location = images[['image_filename']].values[begin_index + i][0]
        im = Image.open('Data/Images/Final_resized_selection_batch1/' + image_location)
        im = np.array(im)
        im = scale_resize_image(im)
        images_array[i] = im
        
    # SCORES

    avg_data = data.groupby(['image_id']).mean()

    for i in range(100):
        scores_array[i] = avg_data[['response']].values[begin_index + i][0]

    abs_max = np.amax(np.abs(scores_array))
    scores_array = scores_array/abs_max
    
    return (scores_array,images_array)
    

In [8]:
def get_data_SOM_pretrain():
    
    # HANDLE IMPORTS
    
    from sklearn.cluster import DBSCAN
    import pandas
    import numpy as np
    import tensorflow as tf
    from PIL import Image
    
     # DOWNLOAD DATA FROM CSV

    participants = pandas.read_csv('Data/participant_table.csv')
    images = pandas.read_csv('Data/an_art_images.csv')
    data = pandas.read_csv('Data/an_art_block_data.csv')
    
    # CREATE CLUSTER ARRAY
    
    cluster_array = np.zeros((34, 1990))
    
    for i in range(20160): # for each response, check participant and painting and add to cluster_array
        participant_index = int((data[['participant_id']].values[i][0])/3 - 1)
        image_index = int((data[['image_id']].values[i][0])/3 - 1)
        cluster_array[participant_index][image_index] = data[['response']].values[i][0]
    
    abs_max = np.amax(np.abs(cluster_array))
    cluster_array = cluster_array/abs_max
    cluster_array[np.isnan(cluster_array)]=0.5 # replace NaN values with average
    
    return cluster_array

In [9]:
def unison_shuffled_copies(a, b, c):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p], c[p]

In [11]:
def get_data_SOM_network_final():
    
    batch_size = int(20160)
    
    begin_index = 0
    middle_index = int((batch_size/4)*3)
    end_index = batch_size - 250
    
    # HANDLE IMPORTS
    
    from sklearn.cluster import DBSCAN
    import pandas
    import numpy as np
    import tensorflow as tf
    from PIL import Image
    
     # DOWNLOAD DATA FROM CSV

    participants = pandas.read_csv('Data/participant_table.csv')
    images = pandas.read_csv('Data/an_art_images.csv')
    data = pandas.read_csv('Data/an_art_block_data.csv')
    
    # CREATE CLUSTER ARRAY
    
    cluster_array = np.zeros((34, 1990))
    
    for i in range(20160): # for each response, check participant and painting and add to cluster_array
        participant_index = int((data[['participant_id']].values[i][0])/3 - 1)
        image_index = int((data[['image_id']].values[i][0])/3 - 1)
        cluster_array[participant_index][image_index] = data[['response']].values[i][0]
    
    #abs_max = np.amax(np.abs(cluster_array))
    #cluster_array = cluster_array/abs_max
    cluster_array = (cluster_array)/(np.max(cluster_array))
    cluster_array[np.isnan(cluster_array)]=0.1 # replace NaN values with average
    
    # CONVERT DATA TO NUMPY ARRAYS
    
    rating_array = np.zeros((batch_size,1))
    image_array = np.zeros((batch_size,112,112,3))
    clusterinfo_array = np.zeros((batch_size,1990))
    
    for i in range(batch_size):
    
        #ratings

        rating_array[i] = (data[['response']].values[begin_index + i][0])


        #images

        image_index = int((data[['image_id']].values[begin_index + i][0])/3 - 1)
        image_location = images[['image_filename']].values[image_index][0]
        im = Image.open('Data/Images/Final_resized_selection_batch1/' + image_location)
        im = np.array(im)
        im = scale_resize_image(im)
        image_array[i] = im

        #clusters

        participant_index = int((data[['participant_id']].values[begin_index + i][0])/3 - 1)
        clusterinfo_array[i] = cluster_array[participant_index]
        
    rating_array[np.isnan(rating_array)] = 0.1
    image_array[np.isnan(image_array)] = 0.1

    #abs_max = np.amax(np.abs(rating_array))
    #rating_array = rating_array/abs_max
    rating_array = (rating_array)/(np.max(rating_array))
    
    clusterinfo_array[np.isnan(clusterinfo_array)]=0.5 # replace NaN values with average
    rating_array[np.isnan(rating_array)]=0.5
    image_array[np.isnan(image_array)]=0.5
    
    # SPLIT DATA INTO PARTS FOR TRAINING
    
    rating_array_train = rating_array[:middle_index]
    image_array_train = image_array[:middle_index]
    cluster_array_train = clusterinfo_array[:middle_index]

    rating_array_val = rating_array[middle_index:end_index]
    image_array_val = image_array[middle_index:end_index]
    cluster_array_val = clusterinfo_array[middle_index:end_index]
    
    rating_array_test = rating_array[end_index:]
    image_array_test = image_array[end_index:]
    cluster_array_test = clusterinfo_array[end_index:]
    
    return [cluster_array_train,image_array_train,rating_array_train], [cluster_array_val,image_array_val,rating_array_val], [cluster_array_test, image_array_test, rating_array_test]

In [10]:
def get_data_manual_cluster_network_final():
    
    batch_size = int(20160)
    
    begin_index = 0
    middle_index = int((batch_size/4)*3)
    end_index = batch_size - 250
    
    
    # HANDLE IMPORTS
    
    from sklearn.cluster import DBSCAN
    import pandas
    import numpy as np
    import tensorflow as tf
    from PIL import Image
    
     # DOWNLOAD DATA FROM CSV

    participants = pandas.read_csv('Data/participant_table.csv')
    images = pandas.read_csv('Data/an_art_images.csv')
    data = pandas.read_csv('Data/an_art_block_data.csv')
    
    # CREATE CLUSTER ARRAY
    
    cluster_array = np.zeros((34, 1990))
    
    for i in range(20160): # for each response, check participant and painting and add to cluster_array
        participant_index = int((data[['participant_id']].values[i][0])/3 - 1)
        image_index = int((data[['image_id']].values[i][0])/3 - 1)
        cluster_array[participant_index][image_index] = data[['response']].values[i][0]
        
    cluster_array[np.isnan(cluster_array)]=0.0
    
    abs_max = np.amax(np.abs(cluster_array))
    cluster_array = cluster_array/abs_max
    cluster_array[np.isnan(cluster_array)]=0.5 # replace NaN values with average
    
    clustering = DBSCAN(eps=12, min_samples=2).fit(cluster_array)
    
    # CONVERT DATA TO NUMPY ARRAYS
    
    rating_array = np.zeros((batch_size,1))
    image_array = np.zeros((batch_size,112,112,3))
    clusterinfo_array = np.zeros((batch_size,1))
    
    for i in range(batch_size):
    
        #ratings

        rating_array[i] = (data[['response']].values[begin_index + i][0])


        #images

        image_index = int((data[['image_id']].values[begin_index + i][0])/3 - 1)
        image_location = images[['image_filename']].values[image_index][0]
        im = Image.open('Data/Images/Final_resized_selection_batch1/' + image_location)
        im = np.array(im)
        im = scale_resize_image(im)
        image_array[i] = im

        #clusters

        participant_index = int((data[['participant_id']].values[begin_index + i][0])/3 - 1)
        clusterinfo_array[i] = clustering.labels_[participant_index]
        
    # REPLACE ZEROES WITH SMALL NUMBERS
    
    rating_array = np.where(rating_array == 0, 0.1, rating_array)
    image_array = np.where(image_array == 0, 0.1, image_array)
    rating_array[np.isnan(rating_array)] = 0.1
    image_array[np.isnan(image_array)] = 0.1
    
    abs_max = np.amax(np.abs(clusterinfo_array))
    clusterinfo_array = np.abs(clusterinfo_array)/abs_max
    rating_array = (rating_array)/(np.max(rating_array))
    #abs_max = np.amax(np.abs(rating_array))
    #rating_array = rating_array/abs_max
    
    clusterinfo_array[np.isnan(clusterinfo_array)]=0.5 # replace NaN values with average
    rating_array[np.isnan(rating_array)]=0.5
    image_array[np.isnan(image_array)]=0.5
    
    # SHUFFLE DATA
    
    [rating_array, image_array, clusterinfo_array] = unison_shuffled_copies(rating_array, image_array, clusterinfo_array)
    
    # SPLIT DATA INTO PARTS FOR TRAINING
    
    rating_array_train = rating_array[:middle_index]
    image_array_train = image_array[:middle_index]
    cluster_array_train = clusterinfo_array[:middle_index]

    rating_array_val = rating_array[middle_index:end_index]
    image_array_val = image_array[middle_index:end_index]
    cluster_array_val = clusterinfo_array[middle_index:end_index]
    
    rating_array_test = rating_array[end_index:]
    image_array_test = image_array[end_index:]
    cluster_array_test = clusterinfo_array[end_index:]
    
    return [cluster_array_train,image_array_train,rating_array_train], [cluster_array_val,image_array_val,rating_array_val], [cluster_array_test, image_array_test, rating_array_test]