In [33]:
from PIL import Image
import os
import numpy as np
import pandas as pd
import csv
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt

In [34]:
alex_images_path = "Alex_Kelly_Pics/Alex"
kelly_images_path = "Alex_Kelly_Pics/Kelly"
test_images_path = "Alex_Kelly_Pics/TestSet"
image_labels_path = "Kelly_and_Alex_Image_Labels - Sheet1.csv"
neither_images_path = "Neither_pics"

In [35]:
def sorted_image_names(path):
    return sorted(os.listdir(path), key=lambda x: int(''.join(filter(str.isdigit, x))))

In [36]:
alex_image_names = sorted_image_names(alex_images_path)
kelly_image_names = sorted_image_names(kelly_images_path)
test_image_names = sorted_image_names(test_images_path)

image_names = alex_image_names + kelly_image_names

In [37]:
labels = pd.read_csv(image_labels_path)
le = LabelEncoder().fit(labels["Landscape"])
labels["Landscape"] = le.transform(labels["Landscape"])
labels

Unnamed: 0,image_name,Human,Castle,Indoors,Landscape,Woman,Daytime,Children,Sunset,Flower,Animal,Building,Mask,Gray-Hair,Fire,Food_drink
0,Alex-Image01.png,1,0,0,3,0,1,1,0,0,1,0,0,0,0,0
1,Alex-Image02.png,1,0,0,3,0,1,1,0,0,0,1,0,0,0,0
2,Alex-Image03.png,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
3,Alex-Image04.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
4,Alex-Image05.png,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,Kelly-Image225.png,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0
481,Kelly-Image226.png,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0
482,Kelly-Image227.png,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0
483,Kelly-Image228.png,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0


In [38]:
labels['Photographer'] = labels['image_name'].str.split('-', n = 1).str[0]

In [39]:
le = LabelEncoder().fit(labels["Photographer"])
labels["Photographer"] = le.transform(labels["Photographer"])

In [40]:
def get_largest_size(folder):
    biggest_image = [0, 0]
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img.size[0] > biggest_image[0]: 
            biggest_image[0] = img.size[0]
        if img.size[1] > biggest_image[1]:
            biggest_image[1] = img.size[1]
    return biggest_image

In [41]:
alex_im_size = get_largest_size(alex_images_path)
kelly_im_size = get_largest_size(kelly_images_path)
test_im_size = get_largest_size(test_images_path)

print("Max Image Sizes:", max(alex_im_size, kelly_im_size, test_im_size))

Max Image Sizes: [667, 400]


In [42]:
def load_images_from_folder(folder, resize_shape=(700, 440)):
    images = []
    image_names = []
    count = 0
    for filename in os.listdir(folder):
        count += 1
        img = Image.open(os.path.join(folder, filename)).convert('RGB')
        img = img.resize(resize_shape)
        if img is not None:
            img_array = np.array(img)
            img_array = img_array.flatten()
            attr = labels[labels["image_name"] == filename].drop("image_name", axis = 1).to_numpy()
            img_array = np.append(img_array, attr)
            images.append(img_array)
            
        if count % int(len(os.listdir(folder)) / 10) == 0:
            print(f"{count / len(os.listdir(folder))*100}% Complete")
    return images

In [43]:
alex_images = load_images_from_folder(alex_images_path)
kelly_images = load_images_from_folder(kelly_images_path)
test_images = load_images_from_folder(test_images_path)


9.765625% Complete
19.53125% Complete
29.296875% Complete
39.0625% Complete
48.828125% Complete
58.59375% Complete
68.359375% Complete
78.125% Complete
87.890625% Complete
97.65625% Complete
9.606986899563319% Complete
19.213973799126638% Complete
28.82096069868996% Complete
38.427947598253276% Complete
48.03493449781659% Complete
57.64192139737992% Complete
67.24890829694323% Complete
76.85589519650655% Complete
86.46288209606988% Complete
96.06986899563319% Complete
10.0% Complete
20.0% Complete
30.0% Complete
40.0% Complete
50.0% Complete
60.0% Complete
70.0% Complete
80.0% Complete
90.0% Complete
100.0% Complete


In [46]:
len(alex_images[0])

924016

In [14]:
test_images_array = np.array(test_images)
del(test_images)
test_images = test_images_array[:, 0:924000]
test_images = test_images / 255.0
test_images = test_images.reshape(test_images.shape[0], 700, 440, 3)

In [15]:
merged_dataset = np.concatenate([alex_images, kelly_images])
del(alex_images, kelly_images) # Free up memory
merged_dataset

array([[153, 138, 137, ...,   0,   0,   0],
       [215, 206, 197, ...,   0,   0,   0],
       [103, 113, 121, ...,   0,   0,   0],
       ...,
       [ 82,  92,  78, ...,   0,   0,   1],
       [  5,   7,   6, ...,   0,   0,   1],
       [127, 124, 106, ...,   0,   0,   1]], dtype=int64)

In [18]:
num_attributes = labels.shape[1] - 1

attribute_index = list(range(924000, 924000+num_attributes))

attributes = [col for col in labels.columns if col != "image_name"]

attr_dict = dict(zip(attributes, attribute_index))

# CNN - Sunset

In [19]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Sunset"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

In [None]:
model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Sunset model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
Sunset_classifications = model.predict(test_images)

threshold = .5
Sunset_binary_classifications = (Sunset_classifications > threshold).astype(int)

Sunset_binary_classifications

# CNN - Flower

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Flower"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

In [None]:
model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Flower model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
Flower_classifications = model.predict(test_images)

threshold = .5
Flower_binary_classifications = (Flower_classifications > threshold).astype(int)

Flower_binary_classifications

# CNN - Animal

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Animal"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Animal model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
Animal_classifications = model.predict(test_images)

threshold = .5
Animal_binary_classifications = (Animal_classifications > threshold).astype(int)

Animal_binary_classifications

# CNN - Building

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Building"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Building model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
Building_classifications = model.predict(test_images)

threshold = .5
Building_binary_classifications = (Building_classifications > threshold).astype(int)

Building_binary_classifications

# CNN - Mask



In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Building"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Building model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
Mask_classifications = model.predict(test_images)

threshold = .5
Mask_binary_classifications = (Mask_classifications > threshold).astype(int)

Mask_binary_classifications

# CNN - Gray-Hair

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Gray-Hair"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Gray-hair model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
gray_classifications = model.predict(test_images)

threshold = .5
gray_binary_classifications = (gray_classifications > threshold).astype(int)

gray_binary_classifications

# CNN - Fire

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Fire"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Fire model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
fire_classifications = model.predict(test_images)

threshold = .5
fire_binary_classifications = (fire_classifications > threshold).astype(int)

fire_binary_classifications

# CNN - Food_drink

In [None]:
X = merged_dataset[:, 0:924000]
X = X / 255.0
y = merged_dataset[:, attr_dict["Food_drink"]]

X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape(X_train.shape[0],700,440,3)
X_test = X_test.reshape(X_test.shape[0],700,440,3)

annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0)
styles=[':','-.','--','-',':','-.','--','-',':','-.','--','-']

model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 20, 
validation_data = (X_test, y_test), callbacks=[annealer], verbose=2)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['val_accuracy'])
plt.title('Food_drink model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
axes = plt.gca()
axes.set_ylim([0.60,1])
plt.show()

In [None]:
food_classifications = model.predict(test_images)

threshold = .5
food_binary_classifications = (food_classifications > threshold).astype(int)

food_binary_classifications

# PART 2

In [1]:
from PIL import Image
import os
import numpy as np
import pandas as pd
import csv
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt

In [2]:
alex_images_path = "Alex_Kelly_Pics/Alex"
kelly_images_path = "Alex_Kelly_Pics/Kelly"
test_images_path = "Alex_Kelly_Pics/TestSet"
image_labels_path = "Kelly_and_Alex_Image_Labels - Sheet1.csv"
neither_images_path = "Neither_pics"

In [13]:
from tensorflow.keras.utils import to_categorical
def load_images_from_folder(folder, label, resize_shape=(700, 440)):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename)).convert('RGB')
        img = img.resize(resize_shape)
        if img is not None:
            img_array = np.array(img)
            images.append(img_array)
            labels.append(label)
    return images, labels

alex_images, alex_labels = load_images_from_folder(alex_images_path, label=0)
kelly_images, kelly_labels = load_images_from_folder(kelly_images_path, label=1)
neither_images, neither_labels = load_images_from_folder(neither_images_path, label=2)

all_images = np.array(alex_images + kelly_images + neither_images)
all_labels = np.array(alex_labels + kelly_labels + neither_labels)
del(alex_images, kelly_images, neither_images)

all_labels = to_categorical(all_labels, num_classes=3)

all_images = all_images / 255.0




In [21]:
X_train, X_test, y_train, y_test = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)

MemoryError: Unable to allocate 3.78 GiB for an array with shape (549, 440, 700, 3) and data type float64

In [None]:
model = Sequential()
model.add(Conv2D(24,kernel_size=5,padding='same',activation='relu',
        input_shape=(700,440,3)))
model.add(MaxPool2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=32, epochs = 1, validation_data = (X_test, y_test), callbacks=LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x, verbose=0), verbose=2)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

In [None]:
test_probs = model.predict(test_images)

test_preds = np.argmax(test_probs, axis=1)

test_image_labels = np.argmax(test_preds, axis=1)