In [0]:
# cd '/content/drive/My Drive/Colab Notebooks/Amazon/Data'

In [1]:
!curl -L https://www.dropbox.com/s/qsdq7sx946t39pa/amazon.tar?dl=1 -o amazon.tar

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  637M  100  637M    0     0  10.5M      0  0:01:00  0:01:00 --:--:-- 11.8M


In [2]:
!tar xvf amazon.tar

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
amazon/train-jpg/train_19718.jpg
amazon/train-jpg/train_38480.jpg
amazon/train-jpg/train_16787.jpg
amazon/train-jpg/train_621.jpg
amazon/train-jpg/train_28212.jpg
amazon/train-jpg/train_29928.jpg
amazon/train-jpg/train_30979.jpg
amazon/train-jpg/train_12790.jpg
amazon/train-jpg/train_37996.jpg
amazon/train-jpg/train_4564.jpg
amazon/train-jpg/train_423.jpg
amazon/train-jpg/train_15263.jpg
amazon/train-jpg/train_28557.jpg
amazon/train-jpg/train_17206.jpg
amazon/train-jpg/train_28792.jpg
amazon/train-jpg/train_12157.jpg
amazon/train-jpg/train_3899.jpg
amazon/train-jpg/train_34358.jpg
amazon/train-jpg/train_9458.jpg
amazon/train-jpg/train_15756.jpg
amazon/train-jpg/train_6353.jpg
amazon/train-jpg/train_2794.jpg
amazon/train-jpg/train_34451.jpg
amazon/train-jpg/train_11776.jpg
amazon/train-jpg/train_26449.jpg
amazon/train-jpg/train_15881.jpg
amazon/train-jpg/train_21669.jpg
amazon/train-jpg/train_31322.jpg
amazon/train-jpg/tra

In [0]:
import pandas as pd
import numpy as np
np.random.seed(0)

import cv2

from tqdm.notebook import tqdm
import os

from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.layers import Dense, Dropout, Flatten
# from tensorflow.keras.applications import ResNet50, VGG16

from sklearn.metrics import fbeta_score
from tensorflow.keras.optimizers import Adam

import gc

In [42]:
import tensorflow.keras.applications
dir(tensorflow.keras.applications)

['DenseNet121',
 'DenseNet169',
 'DenseNet201',
 'InceptionResNetV2',
 'InceptionV3',
 'MobileNet',
 'MobileNetV2',
 'NASNetLarge',
 'NASNetMobile',
 'ResNet101',
 'ResNet101V2',
 'ResNet152',
 'ResNet152V2',
 'ResNet50',
 'ResNet50V2',
 'VGG16',
 'VGG19',
 'Xception',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_sys',
 'densenet',
 'imagenet_utils',
 'inception_resnet_v2',
 'inception_v3',
 'mobilenet',
 'mobilenet_v2',
 'nasnet',
 'resnet',
 'resnet50',
 'resnet_v2',
 'vgg16',
 'vgg19',
 'xception']

In [0]:
base_models = ['DenseNet121', 'DenseNet169', 'DenseNet201', 'InceptionResNetV2', 'InceptionV3', 'MobileNet', 'MobileNetV2', 'NASNetLarge', 'NASNetMobile', 'ResNet101', 'ResNet101V2', 'ResNet152', 'ResNet152V2', 'ResNet50', 'ResNet50V2', 'VGG16', 'VGG19', 'Xception']

In [0]:
for model in base_models:
  exec('from tensorflow.keras.applications import {}'.format(model))

In [45]:
ls

[0m[01;34mamazon[0m/  [01;34msample_data[0m/


In [46]:
df_train = pd.read_csv('amazon/train_v2.csv')
df_train.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [0]:
def read_data(df_train, labels, resize=(32, 32)):
    X_train = []
    y_train = []

    label_map = {l: i for i, l in enumerate(labels)}
    inv_label_map = {i: l for l, i in label_map.items()}

    for f, tags in tqdm(df_train.values, miniters=1000):
        if False == os.path.exists('amazon/train-jpg/{}.jpg'.format(f)): continue

        img = cv2.imread('amazon/train-jpg/{}.jpg'.format(f))
        targets = np.zeros(len(label_map))
        for t in tags.split(' '):
            targets[label_map[t]] = 1 

        X_train.append(cv2.resize(img, resize))
        y_train.append(targets)
        
    y_train = np.array(y_train, np.uint8)
    X_train = np.array(X_train, np.float16) / 255.
    
    split = 35000
    
    # X_train, X_test, y_train, y_test
    return X_train[:split], X_train[split:], y_train[:split], y_train[split:]

def score_model(model):
  y_pred = model.predict(X_test, batch_size=128)
  score = fbeta_score(y_test, np.array(y_pred) > 0.2, beta=2, average='samples')

  print("Test score (f1): ", score)
  print("Error: %.2f%%" % (100-score*100))

def draw_learning_curve(history, key='accuracy', ylim=(0.8, 1.01)):
  plt.figure(figsize=(12,6))
  plt.plot(history.history[key])
  plt.plot(history.history['val_' + key])
  plt.title('Learning Curve')
  plt.ylabel(key.title())
  plt.xlabel('Epoch')
  plt.ylim(ylim)
  plt.legend(['train', 'test'], loc='best')
  plt.show()

In [48]:
all_labels = df_train['tags'].map(lambda x: x.split(' ')).values
labels = list(set([y for x in all_labels for y in x]))

print( len(labels), labels )

17 ['partly_cloudy', 'agriculture', 'selective_logging', 'cloudy', 'haze', 'bare_ground', 'blooming', 'water', 'clear', 'blow_down', 'cultivation', 'habitation', 'conventional_mine', 'artisinal_mine', 'primary', 'road', 'slash_burn']


In [49]:
X_train, X_test, y_train, y_test = read_data(df_train, labels, resize=(64, 64))

HBox(children=(IntProgress(value=0, max=40479), HTML(value='')))




In [50]:
X_train.shape

(35000, 64, 64, 3)

In [51]:
model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(64, 64, 3)),
        Conv2D(32, kernel_size=(3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),

        Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),

        Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(128, kernel_size=(3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),

        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(256, kernel_size=(3, 3), activation='relu'),
        Conv2D(256, kernel_size=(3, 3), activation='relu'),
        MaxPool2D(pool_size=(2, 2)),

        Flatten(), 

        Dense(256, activation='relu'),
        Dense(17, activation='sigmoid') 
        ])


model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 62, 62, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 31, 31, 64)        18496     
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 29, 29, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 14, 14, 128)      

In [0]:
history = model.fit(X_train, y_train,
          batch_size=128,
          epochs=5,
          verbose=1,
          validation_data=(X_test, y_test))

print()
score_model(model)

Epoch 1/5

In [0]:
history

##VGG16

In [16]:
del X_train, X_test, y_train, y_test
gc.collect()

X_train, X_test, y_train, y_test = read_data(df_train, labels, resize=(48, 48))

HBox(children=(IntProgress(value=0, max=40479), HTML(value='')))




In [0]:
def add_mlp_to_base_model(base_model):
  for layer in base_model.layers:
    layer.trainable = False
    
  model = Sequential([
      base_model,
      
      Flatten(), 
          
      Dense(128, activation='relu'),
      Dropout(0.2),
      Dense(17, activation='sigmoid')
      
  ])

  return model

In [36]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(48, 48, 3))
model = add_mlp_to_base_model(base_model)

optimizer = Adam(0.003, decay=0.0005)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 1, 1, 512)         14714688  
_________________________________________________________________
flatten_4 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               65664     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 17)                2193      
Total params: 14,782,545
Trainable params: 67,857
Non-trainable params: 14,714,688
_________________________________________________________________


In [37]:
history = model.fit(X_train, y_train,
          batch_size=128,
          epochs=5,
          verbose=1,
          validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [38]:
for layer in model.layers[0].layers[:-5]:
  layer.trainable = True

optimizer = Adam(0.0003, decay=0.000005)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

history = model.fit(X_train, y_train,
          batch_size=128,
          epochs=10,
          verbose=1,
          validation_data=(X_test, y_test))

print()
score_model(model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test score (f1):  0.9105096677075974
Error: 8.95%


In [28]:
base_models

['DenseNet121',
 'DenseNet169',
 'DenseNet201',
 'InceptionResNetV2',
 'InceptionV3',
 'MobileNet',
 'MobileNetV2',
 'NASNetLarge',
 'NASNetMobile',
 'ResNet101',
 'ResNet101V2',
 'ResNet152',
 'ResNet152V2',
 'ResNet50',
 'ResNet50V2',
 'VGG16',
 'VGG19',
 'Xception']

##ResNet50

In [32]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(48, 48, 3))
model = add_mlp_to_base_model(base_model)

optimizer = Adam(0.003, decay=0.0005)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2, 2, 2048)        23587712  
_________________________________________________________________
flatten_3 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               1048704   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 17)                2193      
Total params: 24,638,609
Trainable params: 1,050,897
Non-trainable params: 23,587,712
_______________________

In [33]:
history = model.fit(X_train, y_train,
          batch_size=128,
          epochs=5,
          verbose=1,
          validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [0]:
for layer in model.layers[0].layers[:-5]:
  layer.trainable = True

optimizer = Adam(0.0003, decay=0.000005)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [35]:
history = model.fit(X_train, y_train,
          batch_size=128,
          epochs=5,
          verbose=1,
          validation_data=(X_test, y_test))
print()
score_model(model)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Test score (f1):  0.8844726448166651
Error: 11.55%
