In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
from tensorflow.keras.preprocessing import image
import tensorflow as tf
from scipy.stats import norm
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization, concatenate, Conv2D, MaxPooling2D
from tensorflow.keras.models import Model


In [2]:
# Load metadata
metadata_path = "C:\\Users\\kevin\\OneDrive\\문서\\website\\intern\\dataset-master\\csv_files\\targets393_metadata.csv"
metadata = pd.read_csv(metadata_path, encoding='ISO-8859-1')

# Load image file paths
image_folder = "C:\\Users\\kevin\\OneDrive\\문서\\website\\intern\\targets393\\targets"
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.png')]

In [3]:
# Filter metadata to match image filenames
image_names = [os.path.basename(f) for f in image_files]
matched_metadata = metadata[metadata['filename'].isin(image_names)]

In [4]:
# List all image files in the folder
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.png')]

# Extract sizes of the images
image_sizes = [Image.open(img_path).size for img_path in image_files]

# Convert sizes to a DataFrame for easy analysis
df = pd.DataFrame(image_sizes, columns=['Width', 'Height'])

# Count the occurrences of each unique size
size_counts = df.groupby(['Width', 'Height']).size().reset_index(name='Counts').sort_values(by='Counts', ascending=False)

print(size_counts.head())

     Width  Height  Counts
142   1000     563       7
311   1000    1000       6
240   1000     773       5
271   1000     868       5
177   1000     647       5


In [5]:
from PIL import Image
import os

# Path to your image folder
image_folder = "C:\\Users\\kevin\\OneDrive\\문서\\website\\intern\\targets393\\targets"

# Output folder to save resized images
output_folder = "C:\\Users\\kevin\\OneDrive\\문서\\website\\intern\\targets393\\resized_targets"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# List all image files in the folder
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.png')]

# Resize images
target_size = (1000, 1000)
for img_path in image_files:
    with Image.open(img_path) as img:
        img_resized = img.resize(target_size, Image.ANTIALIAS)
        output_path = os.path.join(output_folder, os.path.basename(img_path))
        img_resized.save(output_path)

print(f"Resized images saved to {output_folder}")


  img_resized = img.resize(target_size, Image.ANTIALIAS)


Resized images saved to C:\Users\kevin\OneDrive\문서\website\intern\targets393\resized_targets


In [6]:
# Compute d prime scores for dataset
def d_prime(hit_rate, false_alarm_rate):
    hit_rate = max(0.01, min(0.99, hit_rate))
    false_alarm_rate = max(0.01, min(0.99, false_alarm_rate))
    return norm.ppf(hit_rate) - norm.ppf(false_alarm_rate)

def compute_dprime_for_dataset(df, hit_rate_col, far_col):
    df['d_prime'] = df.apply(lambda row: d_prime(row[hit_rate_col] / 100, row[far_col] / 100), axis=1)
    return df

In [7]:
df_with_dprime = compute_dprime_for_dataset(matched_metadata.copy(), 'mem: at-a-glance HR (%)', 'mem: at-a-glance FAR (%)')
encoded_data = pd.get_dummies(df_with_dprime, columns=[
    'source', 'category', 'vistype', 'title location', 'attr: black&white',
    'attr: human recognizable object', 'attr: human depiction'
])

# Filter metadata to retain relevant features
filtered_data = encoded_data.drop(columns=[
    'mem: at-a-glance HR (%)', 'mem: at-a-glance FAR (%)', 'mem: prolonged HR (%)', 
    'category_G', 'category_I', 'message redundancy', 'title location_Top-right', 
    'vistype_Diagrams', 'vistype_Trees and Networks', 'vistype_Grid / Matrix', 
    'data redundancy', 'title location_Top'
])

In [8]:
# Select columns with significant correlation to d_prime
correlations = filtered_data.corr()['d_prime'].drop('d_prime').sort_values()
significant_correlations = correlations[correlations.abs() > 0.1]
filtered_significant_columns = [col for col in significant_correlations.index if not col.startswith('source_')]
filtered_dataframe = filtered_data[filtered_significant_columns]

  correlations = filtered_data.corr()['d_prime'].drop('d_prime').sort_values()


In [9]:
# Load and preprocess images
def load_and_preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(256, 256))
    return image.img_to_array(img) / 255.0

In [10]:
image_data = np.array([load_and_preprocess_image(img_path) for img_path in image_files])

# Ensure dataframes and images are aligned
ordered_filtered_dataframe = filtered_dataframe.set_index(df_with_dprime['filename']).loc[image_names].reset_index(drop=True)


In [11]:
# Split data into training and testing sets
y_values = df_with_dprime.set_index('filename').loc[image_names]['d_prime'].values
X_meta_train, X_meta_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    ordered_filtered_dataframe.values, image_data, y_values, test_size=0.2, random_state=42
)

In [12]:
ordered_filtered_dataframe

Unnamed: 0,attr: human recognizable object_n,attr: human depiction_n,vistype_Lines,vistype_Bars,attr: visual density,attr: human depiction_y,attr: # distinct colors,attr: data-ink ratio,attr: human recognizable object_y
0,1,1,0,1,1,0,3,2,0
1,1,1,0,1,1,0,2,1,0
2,0,1,0,0,1,0,1,1,1
3,1,1,0,0,2,0,2,2,0
4,0,1,0,0,1,0,1,1,1
...,...,...,...,...,...,...,...,...,...
388,1,1,0,1,1,0,2,1,0
389,1,1,1,0,2,0,2,2,0
390,0,1,0,0,1,0,2,2,1
391,1,1,0,1,1,0,2,1,0


In [13]:
input_image = Input(shape=(256, 256, 3), name="input_image")
x = Conv2D(64, (3, 3), activation='relu')(input_image)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
flatten_layer = Flatten()(x)

input_meta = Input(shape=(9,), name="input_meta")
dense1 = Dense(16, activation='relu')(input_meta)
batch_norm1 = BatchNormalization()(dense1)
dropout1 = Dropout(0.5)(batch_norm1)
dense2 = Dense(8, activation='relu')(dropout1)
batch_norm2 = BatchNormalization()(dense2)
dropout2 = Dropout(0.5)(batch_norm2)

merged = concatenate([flatten_layer, dropout2])
merged_dense1 = Dense(16, activation='relu')(merged)
merged_batch_norm1 = BatchNormalization()(merged_dense1)
merged_dropout1 = Dropout(0.5)(merged_batch_norm1)
merged_dense2 = Dense(16, activation='relu')(merged_dropout1)
merged_batch_norm2 = BatchNormalization()(merged_dense2)
merged_dropout2 = Dropout(0.5)(merged_batch_norm2)

output_layer = Dense(1, activation='sigmoid')(merged_dropout2)
min_dprime = 0.00
max_dprime = 3.00
range_dprime = max_dprime - min_dprime
output = tf.keras.layers.Lambda(lambda x: x * range_dprime + min_dprime)(output_layer)

model = Model(inputs=[input_image, input_meta], outputs=output)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [24]:
history = model.fit(
    [X_img_train, X_meta_train], y_train, 
    validation_data=([X_img_test, X_meta_test], y_test),
    epochs=5, batch_size=32
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
model.save_weights("visualization_prediction_model_weights.h5")