### Import libraries

In [30]:
import os
import numpy as np
import pandas as pd
import glob as gb
import cv2
import matplotlib.pyplot as plt
import shutil
import tensorflow as tf
from tqdm import tqdm
from PIL import Image
from tensorflow import keras
from keras import layers
from keras import models
from keras import Input

In [3]:
train_dataset_path = r"D:\Project\PG Project\Data\Image Data\seg_train"

test_dataset_path = r"D:\Project\PG Project\Data\Image Data\seg_test"

predict_dataset_path = r"D:\Project\PG Project\Data\Image Data\seg_pred\seg_pred"

### Checking data information

In [5]:
train_dataset_checking_path = os.path.join(train_dataset_path, 'seg_train')

for folder in os.listdir(train_dataset_checking_path):
    folder_path = os.path.join(train_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    print(f"For training data, found {len(files)} images in folder {folder}")
print()

test_dataset_checking_path = os.path.join(test_dataset_path, 'seg_test')

for folder in os.listdir(test_dataset_checking_path):
    folder_path = os.path.join(test_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    print(f"For testing data, found {len(files)} images in folder {folder}")
print()

files = gb.glob(os.path.join(predict_dataset_path, '*.jpg'))
print(f"For predicting data, found {len(files)} images")

For training data, found 2190 images in folder buildings
For training data, found 2263 images in folder forest
For training data, found 2387 images in folder glacier
For training data, found 2495 images in folder mountain
For training data, found 2270 images in folder sea
For training data, found 2381 images in folder street

For testing data, found 437 images in folder buildings
For testing data, found 473 images in folder forest
For testing data, found 549 images in folder glacier
For testing data, found 523 images in folder mountain
For testing data, found 510 images in folder sea
For testing data, found 501 images in folder street

For predicting data, found 7288 images


In [6]:
size = []
for folder in os.listdir(train_dataset_checking_path):
    folder_path = os.path.join(train_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    for file in files: 
        image = plt.imread(file)
        size.append(image.shape)
print(' Image size\t Count')
pd.Series(size).value_counts()

 Image size	 Count


(150, 150, 3)    13986
Name: count, dtype: int64

In [7]:
size = []
for folder in os.listdir(test_dataset_checking_path):
    folder_path = os.path.join(test_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    for file in files: 
        image = plt.imread(file)
        size.append(image.shape)
print(' Image size\t Count')
pd.Series(size).value_counts()

 Image size	 Count


(150, 150, 3)    2993
Name: count, dtype: int64

In [8]:
size = []
files = gb.glob(os.path.join(predict_dataset_path, '*.jpg'))
for file in files: 
    image = plt.imread(file)
    size.append(image.shape)
print(' Image size\t Count')
pd.Series(size).value_counts()

 Image size	 Count


(150, 150, 3)    7288
Name: count, dtype: int64

### Retain (150,150,3)-sized images

In [10]:
Removed_image_dir = os.path.join(r'D:\项目\Comp702\Data\Image Data', 'Removed_train_images')
os.makedirs(Removed_image_dir, exist_ok=True)

moved_count1 = 0
moved_count2 = 0
moved_count3 = 0

for folder in os.listdir(train_dataset_checking_path):
    folder_path = os.path.join(train_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    for file in files:
        with Image.open(file) as img:
            img = img.convert('RGB')  # 转换为 RGB
            if img.size != (150, 150):
                # 构建新文件名，包含类别前缀
                new_name = f"{folder}_{os.path.basename(file)}"
                shutil.move(file, os.path.join(Removed_image_dir, new_name))
                moved_count1 += 1



for folder in os.listdir(test_dataset_checking_path):
    folder_path = os.path.join(test_dataset_checking_path, folder)
    files = gb.glob(os.path.join(folder_path, '*.jpg'))
    for file in files:
        with Image.open(file) as img:
            img = img.convert('RGB')  # 转换为 RGB
            if img.size != (150, 150):
                # 构建新文件名，包含类别前缀
                new_name = f"{folder}_{os.path.basename(file)}"
                shutil.move(file, os.path.join(Removed_image_dir, new_name))
                moved_count2 += 1


files = gb.glob(os.path.join(predict_dataset_path, '*.jpg'))
for file in files:
        with Image.open(file) as img:
            img = img.convert('RGB')  # 转换为 RGB
            if img.size != (150, 150):
                # 构建新文件名，包含类别前缀
                new_name = f"{folder}_{os.path.basename(file)}"
                shutil.move(file, os.path.join(Removed_image_dir, new_name))
                moved_count3 += 1

print(f"moving {moved_count1} train_images to {Removed_image_dir}")
print(f"moving {moved_count2} test_images to {Removed_image_dir}")
print(f"moving {moved_count3} predict_images to {Removed_image_dir}")

moving 0 train_images to D:\项目\Comp702\Data\Image Data\Removed_train_images
moving 0 test_images to D:\项目\Comp702\Data\Image Data\Removed_train_images
moving 0 predict_images to D:\项目\Comp702\Data\Image Data\Removed_train_images


### Label Mappings

In [7]:
class_names = ['buildings','forest','glacier','mountain','sea','street']
class_labels = {class_name:i for i, class_name in enumerate(class_names)}
print(class_labels)

number_classes = len(class_names)

IMAGE_SIZE = (150,150)

{'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}


### Loading the data

In [10]:
train_dir = r"D:\Project\PG Project\Data\Image Data\seg_train\seg_train"
test_dir = r"D:\Project\PG Project\Data\Image Data\seg_test\seg_test"

def load_dataset():
    # create list of datasets
    datasets = [train_dir, test_dir]
    output = []
    
    for dataset in datasets:
        
        images1 = []
        labels1 = []
        print(f"loading {dataset}")
        
        for folder in os.listdir(dataset):
            # assign labels to each folder images
            label = class_labels[folder]
            for file in tqdm(os.listdir(os.path.join(dataset,folder))):
                image_path = os.path.join(os.path.join(dataset, folder), file)
                # read the image files stored in image_path
                image_file = cv2.imread(image_path)
                image_file = cv2.cvtColor(image_file, cv2.COLOR_BGR2RGB)
                image_file = cv2.resize(image_file, IMAGE_SIZE)
                
                images1.append(image_file)
                labels1.append(label)
                
        # convert the images and labels list to numpy array
        images1 = np.array(images1, dtype = 'float32')
        labels1 = np.array(labels1, dtype = 'int32')
        
        output.append((images1, labels1))
        print("Images file have been loaded")
                
    return output 

In [12]:
((train_images, train_labels), (test_images, test_labels)) = load_dataset()

loading D:\Project\PG Project\Data\Image Data\seg_train\seg_train


100%|██████████| 2190/2190 [00:09<00:00, 241.79it/s]
100%|██████████| 2263/2263 [00:09<00:00, 232.38it/s]
100%|██████████| 2387/2387 [00:09<00:00, 252.33it/s]
100%|██████████| 2495/2495 [00:09<00:00, 257.42it/s]
100%|██████████| 2270/2270 [00:08<00:00, 260.62it/s]
100%|██████████| 2381/2381 [00:09<00:00, 246.23it/s]


Images file have been loaded
loading D:\Project\PG Project\Data\Image Data\seg_test\seg_test


100%|██████████| 437/437 [00:01<00:00, 245.09it/s]
100%|██████████| 473/473 [00:01<00:00, 238.29it/s]
100%|██████████| 549/549 [00:02<00:00, 243.26it/s]
100%|██████████| 523/523 [00:02<00:00, 250.67it/s]
100%|██████████| 510/510 [00:02<00:00, 245.75it/s]
100%|██████████| 501/501 [00:02<00:00, 240.29it/s]


Images file have been loaded


In [14]:
print("train dataset size",len(train_images), len(train_labels))
print("test dataset size",len(test_images), len(test_labels))

train dataset size 13986 13986
test dataset size 2993 2993


### Normalisation

In [17]:
train_images = train_images / 255.0
test_images = test_images / 255.0

### Build the model

In [32]:
model = models.Sequential()
model.add(Input(shape=(150, 150, 3)))
model.add(layers.Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D((2,2)))
model.add(layers.Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D((2,2)))
model.add(layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D((2,2)))
model.add(layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D((2,2)))
model.add(layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(layers.MaxPool2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(6, activation='softmax'))
model.summary()