
# load dataframe

In [1]:
import pandas as pd

In [2]:
data= pd.read_csv('../EncodedData.csv')

In [3]:
alltags = [ 'clear', 'cloudy', 'haze', 'partly cloudy',        #the cloud coverage goes first!     
           'agriculture', 'artisinal mine', 'bare ground', 'blooming', 
           'blow down', 'cultivation', 'habitation', 'primary', 'road', 
        'selective logging', 'conventional mine', 'slashu burn','water']

In [4]:
data.head(1)

Unnamed: 0,image_name,tags,taglist,clear,cloudy,haze,partly cloudy,agriculture,artisinal mine,bare ground,...,blow down,cultivation,habitation,primary,road,selective logging,conventional mine,slashu burn,water,result_id
0,train_0,haze primary,"['haze', 'primary']",0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,'00100000000100000'


## tweak the names of the files so it's compatible

In [5]:
data['image_name'] = data['image_name'] + '.jpg'

In [6]:
data['image_name'].head(1)

0    train_0.jpg
Name: image_name, dtype: object

# load and preprocess data

## preprocessing

[the documents for imagedatagenerator](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator#random_transform)

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#instantiate the data generator and input the transformatons you want

datagen = ImageDataGenerator(rescale = 1./255,  #normalized
                             rotation_range = 45,
                             zoom_range= 0.3,   #so it will zoom to  0.7 and 1.3 times
                             brightness_range = [0.7, 1.3]
                            )


test_datagen=ImageDataGenerator(rescale = 1./255,  #normalized
                             rotation_range = 45,
                             zoom_range= 0.3,   #so it will zoom to  0.7 and 1.3 times
                             brightness_range = [0.7, 1.3],
                            )

## loading the images! 

I took 1200 +1 images for this one

In [46]:
#the X
train_generator = datagen.flow_from_dataframe(
                                            dataframe=data[0:200],
                                            directory="../raw_data/train-jpg/",
                                            x_col="image_name",
                                            y_col=alltags,
                                            batch_size=32,
                                            seed=42,
                                            shuffle=False,
                                            class_mode="raw",  #look into this later
                                            target_size=(128,128),
    validation_split = 0.1
)

Found 200 validated image filenames.


In [47]:
#the validation set  
valid_generator=test_datagen.flow_from_dataframe(
                                                dataframe=data[200:250],
                                                directory="../raw_data/train-jpg/",
                                                x_col="image_name",
                                                y_col=alltags,
                                                batch_size=32,
                                                seed=42,
                                                shuffle=False,
                                                class_mode="raw",
                                                target_size=(128,128)
)

Found 50 validated image filenames.


In [48]:
#the test set
test_generator=test_datagen.flow_from_dataframe(
                                                dataframe=data[250:300],
                                                directory="../raw_data/train-jpg/",
                                                x_col="image_name",
                                                y_col=alltags,
                                                batch_size=32,
                                                seed=42,
                                                shuffle=False,
                                                class_mode="raw",
                                                target_size=(128,128)
)

Found 50 validated image filenames.


In [11]:
image_number = 300   #the 1200th image

In [12]:
#check an image for prediction!

sample_predict=test_datagen.flow_from_dataframe(
                                                dataframe=data[image_number:image_number+1],
                                                directory="../raw_data/train-jpg/",
                                                x_col="image_name",
#                                                y_col=alltags,
                                                batch_size=32,
                                                seed=42,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(128,128)
)

#store the image for later check
from tensorflow.keras.preprocessing.image import load_img

sample= data['image_name'][image_number]
sample_image = load_img(f'../raw_data/train-jpg/{sample}')

Found 1 validated image filenames.


-----

# make the model

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

In [25]:
# VGG16

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np


#def VGG_16(weights_path=None):

model = Sequential()
model.add(Conv2D(input_shape=(128,128,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())

model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=17, activation="softmax"))

In [26]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [27]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_24 (Conv2D)           (None, 128, 128, 64)      1792      
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 128, 128, 64)      36928     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 64, 64, 128)       73856     
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 64, 64, 128)       147584    
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 32, 32, 128)       0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 32, 32, 256)      

# run the model

In [49]:
history = model.fit(train_generator,
          batch_size= 32,
          validation_data=valid_generator,
          epochs = 5
         )

Epoch 1/5
1/7 [===>..........................] - ETA: 0s - loss: 90558638111850496.0000 - accuracy: 0.2500

KeyboardInterrupt: 

In [None]:
history.history.keys()

# evaluate the model

In [30]:
model.evaluate(test_generator)



[183494793756672.0, 0.1599999964237213]

In [None]:
import matplotlib.pyplot as plt
def plot_loss_accuracy(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='best')
    plt.show()
    
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='best')
    plt.show()

    plot_loss_accuracy(history)

# predict the model

In [None]:
a= model.predict(sample_predict)

In [None]:
a

In [None]:
#check which image it was
sample_image

In [None]:
#it's properties are:
data.iloc[image_number,3:-1]

# save the model

In [None]:
from tensorflow.keras import models

models.save_model(model, 
                  'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',  #filepath
                  overwrite=True, 
                  include_optimizer=True, #important hehe
)     