In [5]:
import numpy as np
import pandas as pd
import os
import PIL
import PIL.Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
batch_size = 32
img_height = 48
img_width = 48

data_dir = "D:\Documentos\TheBridge\\bridge_datascience_JorgeGarcia\Kaggle_Feeling\\train"

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 6176 files belonging to 2 classes.
Using 4941 files for training.
Found 6176 files belonging to 2 classes.
Using 1235 files for validation.


In [3]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [7]:
train_ds

<PrefetchDataset shapes: ((None, 48, 48, 3), (None,)), types: (tf.float32, tf.int32)>

## 1. Primer Modelo

In [48]:
from tensorflow.keras.applications.vgg16 import VGG16

base_model = VGG16(input_shape = (img_height, img_height, 3),
                  include_top=False,
                  weights = 'imagenet')

for layer in base_model.layers:
    layer.trainable = False

    
##### FULLY CONNECTED LAYER #####
# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer = 'adam', loss = 'binary_crossentropy',metrics = ['acc'])

In [49]:
vgghist = model.fit(train_ds,
                    validation_data = val_ds,
                    epochs = 10,
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 2. Segundo Modelo

In [20]:
model_2 = keras.Sequential([
    keras.layers.Conv2D(filters=100,
                        kernel_size=(3, 3),
                        input_shape=(48, 48, 3),
                        padding='valid'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.2),
    keras.layers.Conv2D(filters=48,
                        kernel_size=(3, 3),
                        padding='same'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.2),
    keras.layers.Flatten(),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1 , activation='sigmoid')
])
model_2.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 46, 46, 100)       2800      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 23, 23, 100)       0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 23, 23, 100)       0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 23, 23, 48)        43248     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 11, 11, 48)        0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 11, 11, 48)        0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 5808)             

In [21]:
model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
model_2.fit(train_ds, validation_data = val_ds, epochs = 5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2518f5ec808>

## 3. Predicciones

In [34]:
# Parte 1
test_df = pd.read_csv('test_set.csv')
test_df.path = test_df.path.apply(lambda x: x[5:])
list_dir = list(test_df.path)

# Parte 2
test_dir = "D:\Documentos\TheBridge\\bridge_datascience_JorgeGarcia\Kaggle_Feeling\\test"

images = [tf.keras.preprocessing.image.load_img(test_dir + os.sep + filename, target_size=(img_height, img_width)) for filename in list_dir]
test_ds = np.array([tf.keras.preprocessing.image.img_to_array(img) for img in images])
test_ds = tf.data.Dataset.from_tensors(test_ds)

# Parte 3
predictions = model.predict(test_ds)
results = np.array([np.max(predictions[i]) for i in range(len(predictions))])

id_col = test_df['id_img']

submission = pd.DataFrame({'id_img':id_col, 'label':results}, index=range(len(id_col)))
submission.label = submission.label.apply(lambda x: 1 if x >= 0.5 else 0)
submission.label = submission.label.apply(lambda x: 'happy' if x == 0 else 'sadness')

submission.head()

In [None]:
# Parte 1
test_df = pd.read_csv('test_set.csv')
test_df.path = test_df.path.apply(lambda x: x[5:])
list_dir = list(test_df.path)

In [35]:
test_dir = "D:\Documentos\TheBridge\\bridge_datascience_JorgeGarcia\Kaggle_Feeling\\test"

images = [tf.keras.preprocessing.image.load_img(test_dir + os.sep + filename, target_size=(img_height, img_width)) for filename in list_dir]
test_ds = np.array([tf.keras.preprocessing.image.img_to_array(img) for img in images])
test_ds = tf.data.Dataset.from_tensors(test_ds)

In [50]:
predictions = model.predict(test_ds)
results = np.array([np.max(predictions[i]) for i in range(len(predictions))])

sample = pd.read_csv('sample_submission.csv')
id_col = sample['id_img']

submission = pd.DataFrame({'id_img':id_col, 'label':results}, index=range(len(id_col)))
submission.label = submission.label.apply(lambda x: 1 if x >= 0.5 else 0)
submission.label = submission.label.apply(lambda x: 'happy' if x == 0 else 'sadness')

submission.head()

Unnamed: 0,id_img,label
0,18341,happy
1,13176,sadness
2,23945,sadness
3,15968,happy
4,18382,happy


In [46]:
submission.label.unique()

array(['happy', 'sadness'], dtype=object)

In [70]:
model.save('VGG16_Adamax_10Epochs.tf')

INFO:tensorflow:Assets written to: VGG16_20Epochs.tf\assets


In [39]:
import urllib.request
from PIL import Image

def chequeator(df_to_submit):
    """
    Esta función se asegura de que tu submission tenga la forma requerida por Kaggle.
    
    Si es así, se guardará el dataframe en un `csv` y estará listo para subir a Kaggle.
    
    Si no, LEE EL MENSAJE Y HAZLE CASO.
    
    Si aún no:
    - apaga tu ordenador, 
    - date una vuelta, 
    - enciendelo otra vez, 
    - abre este notebook y 
    - leelo todo de nuevo. 
    Todos nos merecemos una segunda oportunidad. También tú.
    """
    sample = pd.read_csv("sample_submission.csv")
    if df_to_submit.shape == sample.shape:
        if df_to_submit.columns.all() == sample.columns.all():
            if df_to_submit.id_img.all() == sample.id_img.all():
                print("You're ready to submit!")
                df_to_submit.to_csv("submission.csv", index = False) #muy importante el index = False
                urllib.request.urlretrieve("https://i.kym-cdn.com/photos/images/facebook/000/747/556/27a.jpg", "gfg.png")     
                img = Image.open("gfg.png")
                img.show()   
            else:
                print("Check the ids and try again")
        else:
            print("Check the names of the columns and try again")
    else:
        print("Check the number of rows and/or columns and try again")
        print("\nMensaje secreto de Clara: No me puedo creer que después de todo este notebook hayas hecho algún cambio en las filas de `diamonds_test.csv`. Lloro.")

In [51]:
chequeator(submission)

You're ready to submit!
