In [43]:
from PIL import Image
import os
import numpy as np
import pandas as pd
import csv
from sklearn.preprocessing import LabelEncoder

In [44]:
alex_images_path = "Alex_Kelly_Pics/Alex"
kelly_images_path = "Alex_Kelly_Pics/Kelly"
test_images_path = "Alex_Kelly_Pics/TestSet"
image_labels_path = "Kelly_and_Alex_Image_Labels - Sheet1.csv"

In [45]:
def sorted_image_names(path):
    return sorted(os.listdir(path), key=lambda x: int(''.join(filter(str.isdigit, x))))

In [46]:
alex_image_names = sorted_image_names(alex_images_path)
kelly_image_names = sorted_image_names(kelly_images_path)
test_image_names = sorted_image_names(test_images_path)

image_names = alex_image_names + kelly_image_names

In [47]:
labels = pd.read_csv(image_labels_path)
le = LabelEncoder().fit(labels["Landscape"])
labels["Landscape"] = le.transform(labels["Landscape"])
labels

Unnamed: 0,image_name,Human,Castle,Indoors,Landscape,Woman,Daytime,Children,Sunset,Flower,Animal,Building,Mask,Gray-Hair,Fire,Food_drink
0,Alex-Image01.png,1,0,0,3,0,1,1,0,0,1,0,0,0,0,0
1,Alex-Image02.png,1,0,0,3,0,1,1,0,0,0,1,0,0,0,0
2,Alex-Image03.png,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
3,Alex-Image04.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
4,Alex-Image05.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,Kelly-Image225.png,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0
481,Kelly-Image226.png,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0
482,Kelly-Image227.png,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0
483,Kelly-Image228.png,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0


In [48]:
labels['Photographer'] = labels['image_name'].str.split('-', n = 1).str[0]

In [49]:
le = LabelEncoder().fit(labels["Photographer"])
labels["Photographer"] = le.transform(labels["Photographer"])

In [50]:
labels

Unnamed: 0,image_name,Human,Castle,Indoors,Landscape,Woman,Daytime,Children,Sunset,Flower,Animal,Building,Mask,Gray-Hair,Fire,Food_drink,Photographer
0,Alex-Image01.png,1,0,0,3,0,1,1,0,0,1,0,0,0,0,0,0
1,Alex-Image02.png,1,0,0,3,0,1,1,0,0,0,1,0,0,0,0,0
2,Alex-Image03.png,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0
3,Alex-Image04.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
4,Alex-Image05.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,Kelly-Image225.png,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,1
481,Kelly-Image226.png,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1
482,Kelly-Image227.png,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,1
483,Kelly-Image228.png,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1


In [51]:
def get_largest_size(folder):
    biggest_image = [0, 0]
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img.size[0] > biggest_image[0]: 
            biggest_image[0] = img.size[0]
        if img.size[1] > biggest_image[1]:
            biggest_image[1] = img.size[1]
    return biggest_image

In [52]:
alex_im_size = get_largest_size(alex_images_path)
kelly_im_size = get_largest_size(kelly_images_path)
test_im_size = get_largest_size(test_images_path)

print("Max Image Sizes:", max(alex_im_size, kelly_im_size, test_im_size))

Max Image Sizes: [667, 400]


In [53]:
def load_images_from_folder(folder, resize_shape=(700, 440)):
    images = []
    image_names = []
    count = 0
    for filename in os.listdir(folder):
        count += 1
        img = Image.open(os.path.join(folder, filename)).convert('RGB')
        img = img.resize(resize_shape)
        if img is not None:
            img_array = np.array(img)
            img_array = img_array.flatten()
            attr = labels[labels["image_name"] == filename].drop("image_name", axis = 1).to_numpy()
            img_array = np.append(img_array, attr)
            images.append(img_array)
            
        if count % int(len(os.listdir(folder)) / 10) == 0:
            print(f"{count / len(os.listdir(folder))*100}% Complete")
    return images

In [54]:
alex_images = load_images_from_folder(alex_images_path)
kelly_images = load_images_from_folder(kelly_images_path)
test_images = load_images_from_folder(test_images_path)

9.765625% Complete
19.53125% Complete
29.296875% Complete
39.0625% Complete
48.828125% Complete
58.59375% Complete
68.359375% Complete
78.125% Complete
87.890625% Complete
97.65625% Complete
9.606986899563319% Complete
19.213973799126638% Complete
28.82096069868996% Complete
38.427947598253276% Complete
48.03493449781659% Complete
57.64192139737992% Complete
67.24890829694323% Complete
76.85589519650655% Complete
86.46288209606988% Complete
96.06986899563319% Complete
10.0% Complete
20.0% Complete
30.0% Complete
40.0% Complete
50.0% Complete
60.0% Complete
70.0% Complete
80.0% Complete
90.0% Complete
100.0% Complete


In [55]:
merged_dataset = np.concatenate([alex_images, kelly_images])
merged_dataset

array([[153, 138, 137, ...,   0,   0,   0],
       [215, 206, 197, ...,   0,   0,   0],
       [103, 113, 121, ...,   0,   0,   0],
       ...,
       [ 82,  92,  78, ...,   0,   0,   1],
       [  5,   7,   6, ...,   0,   0,   1],
       [127, 124, 106, ...,   0,   0,   1]], dtype=int64)

In [56]:
num_features = merged_dataset.shape[1]

num_attributes = labels.shape[1] - 1

columns = [f'pixel_{i}' for i in range(num_features - num_attributes)] + labels.drop("image_name", axis=1).columns.tolist()


merged_df = pd.DataFrame(merged_dataset, columns=columns)

merged_df

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,Children,Sunset,Flower,Animal,Building,Mask,Gray-Hair,Fire,Food_drink,Photographer
0,153,138,137,151,137,136,147,135,134,147,...,1,0,0,1,0,0,0,0,0,0
1,215,206,197,206,199,193,183,183,183,155,...,1,0,0,0,1,0,0,0,0,0
2,103,113,121,102,112,120,99,111,117,95,...,0,0,0,0,1,0,0,0,0,0
3,91,143,198,91,144,198,91,145,198,90,...,0,0,0,0,0,0,0,0,0,0
4,65,125,184,60,115,174,53,100,159,62,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,120,115,84,120,115,84,120,115,84,120,...,1,0,0,0,1,0,0,0,1,1
481,223,224,208,223,224,208,223,224,208,223,...,0,0,0,1,1,0,1,0,0,1
482,82,92,78,83,92,78,84,93,79,88,...,1,0,0,0,1,0,0,0,0,1
483,5,7,6,5,7,6,6,8,7,7,...,0,1,0,0,0,0,0,0,0,1


In [None]:
merged_df.to_csv('Complete_training_data.csv')
