In [37]:
# ENEL 525 Final Project
# Author: Tania Rizwan, UCID: 30115533
# Date: December 18, 2024

In [7]:
# Imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping



In [8]:
# Import data
data_dir = "UCMerced_LandUse/Images"
classes = sorted(os.listdir(data_dir))

X = [] # Features
Y = [] # Labels
for index, class_name in enumerate(classes):
    # print(index, class_name)
    class_path = os.path.join(data_dir, class_name) # Example: UCMerced_LandUse/Images/agricultural
    images = os.listdir(class_path)
    num_images = len(images) // 4  # Use only half of the images in this class

    for image in images[:num_images]:
        image_path = os.path.join(class_path, image) # Get image path
        image = Image.open(image_path).resize([256, 256]) # Load image
        image_arr = np.array(image) / 255.0 # Normalize vals to [0, 1]
        X.append(image_arr)
        Y.append(index)
    

# Convert X and Y to numpy arrays
X = np.array(X)
Y = np.array(Y)

print(f"Reduced dataset size: {X.shape[0]} images across {len(classes)} classes.")


Reduced dataset size: 525 images across 21 classes.


In [9]:
# Train, Test and Validation Split (70%, 15%, 15%)
X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size = 0.3, random_state = 42) # Temp will be split into val and test
X_test, X_val, Y_test, Y_val = train_test_split(X_temp, Y_temp, test_size = 0.5, random_state = 42) # 15% of overall data for each

# Convert labels to one-hot
Y_train_onehot = to_categorical(Y_train, num_classes = 21)
Y_val_onehot = to_categorical(Y_val, num_classes = 21)
Y_test_onehot = to_categorical(Y_test, num_classes = 21)

# Print dataset shapes
print(f"Training data: {X_train.shape}, Labels: {Y_train.shape}")
print(f"Validation data: {X_val.shape}, Labels: {Y_val.shape}")
print(f"Testing data: {X_test.shape}, Labels: {Y_test.shape}")

Training data: (367, 256, 256, 3), Labels: (367,)
Validation data: (79, 256, 256, 3), Labels: (79,)
Testing data: (79, 256, 256, 3), Labels: (79,)


In [10]:
# Create model
# CNN structure: conv layer with ReLu, pooling layer, flatten (input), dense, output (softmax), use one-hot encoding

model = Sequential([
    Conv2D(32, (3, 3), activation = 'relu', input_shape = (256, 256, 3)), # num filters (kernels), kernel size, ... , size = (h, w, c = 3 for RGB image)
    BatchNormalization(),
    MaxPool2D(pool_size = (2, 2)), # Reduce dimensions by half. Have a 2 x 2 kernel 

    Conv2D(64, (3, 3), activation = 'relu'), # More filters to extract intricate features
    BatchNormalization(),
    MaxPool2D(pool_size = (2, 2)),

    Conv2D(128, (3, 3), activation = 'relu'),
    BatchNormalization(),
    MaxPool2D(pool_size = (2, 2)),

    Flatten(), # Flatten to 1D before passing to connected layer
    Dense(128, activation = 'relu'),
    Dropout(0.7), # Drop 50% of neurons randomly during training,
    Dense(21, activation = 'softmax') # One hot encoding
])

# Compile model
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-5), 
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True),
    metrics = ['accuracy']
    )

# Early Stopping

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)



reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-6
)


# Train model
history = model.fit(
    X_train, 
    Y_train_onehot,
    validation_data = (X_val, Y_val_onehot),
    batch_size = 32, # Number of samples trained before updating weights
    callbacks=[reduce_lr, early_stopping],
    epochs = 40, # Iterations over the dataset
    verbose = 2 # Show progress for each epoch
)

# Print model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/40


  output, from_logits = _get_logits(


12/12 - 30s - 3s/step - accuracy: 0.0763 - loss: 5.5136 - val_accuracy: 0.0380 - val_loss: 3.0263 - learning_rate: 1.0000e-05
Epoch 2/40
12/12 - 28s - 2s/step - accuracy: 0.1335 - loss: 3.2536 - val_accuracy: 0.0506 - val_loss: 3.1034 - learning_rate: 1.0000e-05
Epoch 3/40
12/12 - 29s - 2s/step - accuracy: 0.2071 - loss: 2.7132 - val_accuracy: 0.0506 - val_loss: 3.2858 - learning_rate: 1.0000e-05
Epoch 4/40

Epoch 4: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.
12/12 - 30s - 3s/step - accuracy: 0.3025 - loss: 2.4313 - val_accuracy: 0.0506 - val_loss: 3.4960 - learning_rate: 1.0000e-05
Epoch 5/40
12/12 - 28s - 2s/step - accuracy: 0.3406 - loss: 2.1237 - val_accuracy: 0.0506 - val_loss: 3.6975 - learning_rate: 5.0000e-06
Epoch 6/40
12/12 - 27s - 2s/step - accuracy: 0.3978 - loss: 2.0321 - val_accuracy: 0.0506 - val_loss: 3.8817 - learning_rate: 5.0000e-06


In [11]:
# Evaluate model 
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, Y_test_onehot, verbose=1)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 396ms/step - accuracy: 0.0756 - loss: 3.0191
Test Loss: 3.0149848461151123
Test Accuracy: 0.08860759437084198
