In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Softmax
from tensorflow.keras.optimizers import SGD
# import tensorflow_datasets as tfds



In [2]:
attrs = pd.read_csv("/kaggle/input/attributes-formated/attr_formated.txt", index_col=0) # ./data/attr_formated.txt
# attrs

In [3]:
### separating image name and its attributes into a single dataset
images = tf.data.Dataset.from_tensor_slices(attrs.index)
attributes = tf.data.Dataset.from_tensor_slices(attrs[[str(i) for i in range(40)]])
data = tf.data.Dataset.zip((images, attributes))

In [4]:
### verify correct format of dataset
# for i in data.take(1):
#     print(i)

In [5]:
### setting labels on images
def labeling(img_name, attributes):
    image = tf.io.read_file("/kaggle/input/celeb-a/img_align_celeba/img_align_celeba/" + img_name) # ./data/img_align_celeba/
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [150, 150])
    image /= 255.0
    return image, attributes

# final corrected dataset with form: (image, attribute_labels)
data = data.map(labeling)

# for image, attribute in data_labeled.take(1):
#     plt.imshow(image)
#     plt.show()
#     print(attribute)

In [6]:
### Tried to make data partitions in a different way
# train_dataset, val_dataset, test_dataset = tfds.load("celeb_a", split=['train[:80%]','validation[80%:90%]','test[90%:]'], shuffle_files=True)

In [7]:
### shuffling data
data_size = int(data.cardinality())
train_size, val_size = int(data_size*0.8//1), int(data_size*0.1//1)
data = data.shuffle(data_size, seed=3)

### partitioning data
train_dataset = data.take(train_size).batch(32)
val_dataset = data.skip(train_size).take(val_size).batch(32)
test_dataset = data.skip(train_size+val_size).take(val_size)

In [8]:
epochs = 10
batch_size = 32

In [9]:
model = Sequential()

model.add(Conv2D(10, (3, 3), input_shape=(150, 150, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(20, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(30, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Dropout(0.2))
model.add(Dense(40, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 10)      280       
                                                                 
 max_pooling2d (MaxPooling2  (None, 74, 74, 10)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 20)        1820      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 36, 36, 20)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 30)        5430      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 17, 17, 30)        0

In [None]:
# sgd = SGD(learning_rate=0.001)
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
history= model.fit(train_dataset, 
                   shuffle=True, 
                   batch_size=batch_size, 
                   epochs=epochs, 
                   validation_data=val_dataset, 
                   verbose=1)

Epoch 1/10


In [None]:
model.save("Try1.hdf5")