In [37]:
import os
import numpy as np
import pandas as pd
import json
import cv2

import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub

from keras import layers, models, optimizers, regularizers
from keras.applications import EfficientNetB0
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# import matplotlib.pyplot as plt

In [54]:
# test if running the GPU version of tensorflow
tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

True

In [52]:
# loads directory of 3 datasets
dir_train = pd.read_csv('dataset/EuroSAT/train.csv')
dir_valid = pd.read_csv('dataset/EuroSAT/validation.csv')
dir_test = pd.read_csv('dataset/EuroSAT/test.csv')


Unnamed: 0,index,Filename,Label,ClassName
0,258,Pasture/Pasture_1481.jpg,5,Pasture
1,2134,Forest/Forest_2638.jpg,1,Forest
2,3518,Highway/Highway_875.jpg,3,Highway
3,3048,River/River_434.jpg,8,River
4,3996,SeaLake/SeaLake_2930.jpg,9,SeaLake
...,...,...,...,...
5395,3735,SeaLake/SeaLake_1943.jpg,9,SeaLake
5396,4451,AnnualCrop/AnnualCrop_211.jpg,0,AnnualCrop
5397,892,Industrial/Industrial_1428.jpg,4,Industrial
5398,4738,AnnualCrop/AnnualCrop_2571.jpg,0,AnnualCrop


In [40]:
# ## verify all 3 directories are equally sliced across 10 categories

# temp1 = dir_test['Label'].value_counts() / dir_test.shape[0]
# temp2 = dir_train['Label'].value_counts() / dir_train.shape[0]
# temp3 = dir_valid['Label'].value_counts() / dir_valid.shape[0]
# df_temp = pd.concat([temp1, temp2, temp3], axis=1)
# df_temp

# ## evenly sliced by category

In [50]:
img_dir = pd.concat([dir_train, dir_valid], ignore_index=False)
img_dir = img_dir.iloc[:,1:-1].reset_index().drop(['index'], axis=1)
print(img_dir.shape)
img_dir = img_dir.iloc[:1000, :] # limit sample size when testing
img_dir

(24300, 3)


Unnamed: 0,Filename,Label,ClassName
0,AnnualCrop/AnnualCrop_142.jpg,0,AnnualCrop
1,HerbaceousVegetation/HerbaceousVegetation_2835...,2,HerbaceousVegetation
2,PermanentCrop/PermanentCrop_1073.jpg,6,PermanentCrop
3,Industrial/Industrial_453.jpg,4,Industrial
4,HerbaceousVegetation/HerbaceousVegetation_1810...,2,HerbaceousVegetation
...,...,...,...
995,Residential/Residential_2931.jpg,7,Residential
996,HerbaceousVegetation/HerbaceousVegetation_2176...,2,HerbaceousVegetation
997,Forest/Forest_166.jpg,1,Forest
998,AnnualCrop/AnnualCrop_2425.jpg,0,AnnualCrop


In [42]:
# Load images and labels/classes
images = []
classes = []
# labels.typeof()

base_path = 'dataset/EuroSAT/'
for index, row in img_dir.iterrows():
    img_path = os.path.join(base_path, row['Filename'])
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    images.append(img)
    classes.append(row['ClassName'])

# Normalize images
images = np.array(images) / 255.0
labels = pd.get_dummies(classes).values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Set up the data augmentation
data_generator = ImageDataGenerator(
    rotation_range=180,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

In [43]:
MODEL_PATH = "https://tfhub.dev/sayakpaul/convnext_base_21k_1k_224_fe/1"

# hub_layer = hub.KerasLayer(model_path, trainable=False)

def get_model(model_path=MODEL_PATH, res=224, num_classes=10):
    hub_layer = hub.KerasLayer(model_path, trainable=False)
    model = keras.Sequential(
        [
            keras.layers.InputLayer((res, res, 3)),
            hub_layer,
            # layers.Dense(512, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
            # layers.BatchNormalization(),
            # layers.Dropout(0.5),
            layers.Dense(256, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
            layers.BatchNormalization(),
            # layers.Dropout(0.5),
            keras.layers.Dense(num_classes, activation="softmax"),
        ]
    )
    return model

model_1 = get_model()
model_1.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_7 (KerasLayer)  (None, 1024)              87566464  
                                                                 
 dense_9 (Dense)             (None, 256)               262400    
                                                                 
 batch_normalization_3 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dense_10 (Dense)            (None, 10)                2570      
                                                                 
Total params: 87,832,458
Trainable params: 265,482
Non-trainable params: 87,566,976
_________________________________________________________________


In [44]:
# Compile the model
model_1.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 10

history = model_1.fit(data_generator.flow(X_train, y_train, batch_size=batch_size), validation_data=(X_test, y_test), epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [45]:
# Evaluate the model
y_pred = model_1.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes))
print(confusion_matrix(y_true, y_pred_classes))

              precision    recall  f1-score   support

           0       0.95      0.80      0.87        25
           1       0.84      0.97      0.90        32
           2       0.85      0.71      0.77        24
           3       0.94      0.77      0.85        22
           4       0.87      0.93      0.90        14
           5       0.71      0.83      0.77        18
           6       0.75      0.75      0.75        16
           7       0.94      0.89      0.92        19
           8       0.86      0.90      0.88        21
           9       0.75      1.00      0.86         9

    accuracy                           0.85       200
   macro avg       0.85      0.86      0.85       200
weighted avg       0.86      0.85      0.85       200

[[20  0  0  0  0  2  2  0  0  1]
 [ 0 31  1  0  0  0  0  0  0  0]
 [ 0  4 17  0  0  2  0  0  0  1]
 [ 1  0  0 17  2  0  1  0  1  0]
 [ 0  0  0  0 13  0  0  1  0  0]
 [ 0  0  0  1  0 15  1  0  1  0]
 [ 0  0  1  0  0  1 12  0  1  1]
 [ 0  1  1

##### If you want to save the model: 

In [56]:
model_1.save("model_1")

# ## if want to load later on: 
# from tensorflow.keras.models import load_model
# loaded_model = load_model("model_1")



INFO:tensorflow:Assets written to: model_1\assets


INFO:tensorflow:Assets written to: model_1\assets


## Draft / Archive (Plz disregard)

In [46]:
# import matplotlib.pyplot as plt # somehow this just doesnt work

In [47]:
# testing/debugging
print(classes[0])
images[0].shape

AnnualCrop


(224, 224, 3)

In [48]:
# ## Use this when running for 1st time
# MODEL_PATH = "https://tfhub.dev/sayakpaul/convnext_base_21k_1k_224_fe/1"
# model = hub.load(MODEL_PATH)
# # saved_model_path = 'model/'
# # tf.saved_model.save(model, saved_model_path)

# # ## Use this for subsequent runs when model already loaded locally
# # saved_model_path = 'model/'
# # model = tf.saved_model.load(saved_model_path)