In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_485.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_348.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_154.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_257.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_317.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_593.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_223.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_133.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_430.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_415.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_108.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_019.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_091.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_021.jpg
/kaggle/input/nwpu-resis-small/dataset/mountain/mountain_087.jpg
/kaggle/input/nwpu-resis-

In [12]:
import os

# List the contents of the /kaggle/input/ directory
input_dir = '/kaggle/input/'
for item in os.listdir(input_dir):
    print(item)


nwpu-resis-small


## Import packages

In [None]:
import numpy as np
import os
import cv2
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from skimage.feature import hog
from sklearn.decomposition import PCA
from keras.utils import img_to_array
from sklearn.model_selection import train_test_split

### Loading and Preprocessing Images from Dataset Directory

In [13]:
# Path to the dataset
dataset_path = '/kaggle/input/nwpu-resis-small/dataset'

def load_images_from_folder(folder_path, img_size=(224, 224)):
    images = []
    labels = []
    for category in os.listdir(folder_path):
        category_path = os.path.join(folder_path, category)
        if os.path.isdir(category_path):  # Ensure it's a directory
            for img_name in os.listdir(category_path):
                img_path = os.path.join(category_path, img_name)
                try:
                    img = image.load_img(img_path, target_size=img_size)
                    img_array = img_to_array(img)
                    img_array = preprocess_input(img_array)
                    images.append(img_array)
                    labels.append(category)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(images), np.array(labels)



### Splitting Dataset and Encoding Labels for Training and Testing

In [14]:

def split_data(images, labels, test_size=0.2, random_state=42):
    return train_test_split(images, labels, test_size=test_size, random_state=random_state, stratify=labels)

# Load dataset
X, y = load_images_from_folder(dataset_path)

# Split into train and test sets
X_train, X_test, y_train, y_test = split_data(X, y)

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)



### Feature Extraction Using VGG16 Fully Connected Layers

In [15]:
# Feature Extraction using VGG
def extract_vgg_features(images, batch_size=100):
    vgg_model = VGG16(weights='imagenet', include_top=True)
    fc_model = Model(inputs=vgg_model.input, outputs=vgg_model.get_layer('fc2').output)
    features = []
    for batch_start in range(0, len(images), batch_size):
        batch_end = min(batch_start + batch_size, len(images))
        batch_images = images[batch_start:batch_end]
        batch_features = fc_model.predict(batch_images)
        features.append(batch_features)
    return np.vstack(features)

# Extract features for training and testing data
X_train_fc_features = extract_vgg_features(X_train)
X_test_fc_features = extract_vgg_features(X_test)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 0us/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 5s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 6s/ste

### Convolutional Layer Feature Extraction Using Pre-trained VGG16

In [16]:
# Convolutional Layers Feature Extraction
def extract_conv_features(model, images, batch_size=5):
    features = []
    for batch_start in range(0, len(images), batch_size):
        batch_end = min(batch_start + batch_size, len(images))
        batch_images = images[batch_start:batch_end]
        batch_features = model.predict(batch_images)
        batch_features = batch_features.reshape(batch_features.shape[0], -1)
        features.append(batch_features)
    return np.vstack(features)

vgg_model_conv = VGG16(weights='imagenet', include_top=False, pooling='avg')
conv_model = Model(inputs=vgg_model_conv.input, outputs=vgg_model_conv.output)

X_train_conv_features = extract_conv_features(conv_model, X_train)
X_test_conv_features = extract_conv_features(conv_model, X_test)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/

### Classical Feature Extraction (HOG & SIFT) and Classification with Random Forest

In [17]:
# Classical Feature Extraction Methods
def extract_hog_features(images):
    hog_features = []
    for img in images:
        img_gray = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_RGB2GRAY)
        feature = hog(img_gray, block_norm='L2-Hys')
        hog_features.append(feature)
    return np.array(hog_features)

X_train_hog_features = extract_hog_features(X_train)
X_test_hog_features = extract_hog_features(X_test)

def extract_sift_features(images):
    sift = cv2.SIFT_create()
    sift_features = []
    for img in images:
        img_gray = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_RGB2GRAY)
        kp, des = sift.detectAndCompute(img_gray, None)
        if des is not None:
            des = des.flatten()
            if des.shape[0] < 128:
                des = np.pad(des, (0, 128 - des.shape[0]), 'constant')
            sift_features.append(des[:128])
        else:
            sift_features.append(np.zeros(128))
    return np.array(sift_features)

X_train_sift_features = extract_sift_features(X_train)
X_test_sift_features = extract_sift_features(X_test)

# Combine Features and Train Classifier
def train_and_evaluate(X_train_features, X_test_features, y_train, y_test):
    # Reduce dimensionality if necessary
    pca = PCA(n_components=100)  # Reduced PCA components to save memory
    X_train_features_pca = pca.fit_transform(X_train_features)
    X_test_features_pca = pca.transform(X_test_features)

    # Train a Random Forest classifier
    clf = RandomForestClassifier(n_estimators=50, random_state=42)  # Reduced number of trees to save memory
    clf.fit(X_train_features_pca, y_train)

    # Predictions
    y_pred = clf.predict(X_test_features_pca)

    # Determine unique classes and ensure target_names are consistent
    unique_classes = np.unique(y_test)
    target_names = label_encoder.classes_

    if len(unique_classes) == 1:
        print(f"Only one class detected in y_test. Cannot evaluate properly.")
        return y_pred, None, None

    # Check if target_names matches unique classes
    if len(unique_classes) != len(target_names):
        print(f"Warning: Number of classes in target_names does not match the number of unique classes.")

    # Evaluation metrics
    try:
        report = classification_report(y_test, y_pred, target_names=target_names, output_dict=True)
    except ValueError as e:
        print(f"Error in classification_report: {e}")
        report = None

    accuracy = accuracy_score(y_test, y_pred)
    
    return y_pred, report, accuracy



### Evaluating Individual Features (VGG, Convolutional, HOG, SIFT) with Random Forest Classifier

In [19]:
# Evaluate features separately
print("Evaluating VGG Features:")
y_pred_vgg, report_vgg, accuracy_vgg = train_and_evaluate(X_train_fc_features, X_test_fc_features, y_train_encoded, y_test_encoded)
if report_vgg is not None:
    print("VGG Features Classification Report:")
    print(report_vgg)
print("VGG Features Accuracy:", accuracy_vgg)

print("Evaluating Convolutional Features:")
y_pred_conv, report_conv, accuracy_conv = train_and_evaluate(X_train_conv_features, X_test_conv_features, y_train_encoded, y_test_encoded)
if report_conv is not None:
    print("Convolutional Features Classification Report:")
    print(report_conv)
print("Convolutional Features Accuracy:", accuracy_conv)

print("Evaluating HOG Features:")
y_pred_hog, report_hog, accuracy_hog = train_and_evaluate(X_train_hog_features, X_test_hog_features, y_train_encoded, y_test_encoded)
if report_hog is not None:
    print("HOG Features Classification Report:")
    print(report_hog)
print("HOG Features Accuracy:", accuracy_hog)

print("Evaluating SIFT Features:")
y_pred_sift, report_sift, accuracy_sift = train_and_evaluate(X_train_sift_features, X_test_sift_features, y_train_encoded, y_test_encoded)
if report_sift is not None:
    print("SIFT Features Classification Report:")
    print(report_sift)
print("SIFT Features Accuracy:", accuracy_sift)

Evaluating VGG Features:
VGG Features Classification Report:
{'beach': {'precision': 0.9251700680272109, 'recall': 0.9714285714285714, 'f1-score': 0.9477351916376306, 'support': 140}, 'desert': {'precision': 0.8936170212765957, 'recall': 0.9, 'f1-score': 0.8967971530249109, 'support': 140}, 'forest': {'precision': 0.9856115107913669, 'recall': 0.9785714285714285, 'f1-score': 0.9820788530465949, 'support': 140}, 'industrial': {'precision': 0.9928571428571429, 'recall': 0.9928571428571429, 'f1-score': 0.9928571428571429, 'support': 140}, 'mountain': {'precision': 0.927007299270073, 'recall': 0.9071428571428571, 'f1-score': 0.9169675090252708, 'support': 140}, 'sea_ice': {'precision': 0.9779411764705882, 'recall': 0.95, 'f1-score': 0.963768115942029, 'support': 140}, 'accuracy': 0.95, 'macro avg': {'precision': 0.950367369782163, 'recall': 0.9500000000000001, 'f1-score': 0.9500339942555965, 'support': 840}, 'weighted avg': {'precision': 0.9503673697821629, 'recall': 0.95, 'f1-score': 0.95


| **Feature Type**        | **Accuracy** | **Best Class** | **Best Class F1-score** | **Worst Class** | **Worst Class F1-score** |
|-------------------------|--------------|----------------|-------------------------|----------------|--------------------------|
| **VGG Features**         | 0.9500       | Industrial      | 0.9929                  | Mountain        | 0.9169                   |
| **Convolutional Features**| 0.9631      | Industrial      | 0.9894                  | Mountain        | 0.9319                   |
| **HOG Features**         | 0.6298       | Forest          | 0.9139                  | Mountain        | 0.4528                   |
| **SIFT Features**        | 0.2429       | Sea Ice         | 0.3203                  | Beach           | 0.1812                   |
