In [17]:
import sys
import os

# Since this notebook is in the notebooks folder, move up one level to the project root.
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print("Project root added to sys.path:", project_root)


Project root added to sys.path: c:\Users\rober\Desktop\516FP\EuroSat_Segmentation_Project


In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Import classifier and feature extraction functions.
from src.classification.classifier import (
    extract_combined_features, 
    extract_segmented_features,
    LandUseClassifier,
    RobustLandUseClassifier
)

print("Libraries imported successfully.")


Libraries imported successfully.


In [None]:
K = 2          # Number of segments/clusters for segmentation
M = 2          # Fuzziness parameter for fuzzy C‑means
SAMPLE_SIZE = 10000  # Number of training images to sample (adjust as needed)

In [28]:
# Define the CSV directory path (relative from notebooks folder)
csv_dir = os.path.join("..", "data", "raw", "EuroSAT")
train_csv_path = os.path.join(csv_dir, "train.csv")
val_csv_path   = os.path.join(csv_dir, "validation.csv")
test_csv_path  = os.path.join(csv_dir, "test.csv")

# Load CSV files using pandas
train_df = pd.read_csv(train_csv_path, index_col=0)
val_df   = pd.read_csv(val_csv_path, index_col=0)
test_df  = pd.read_csv(test_csv_path, index_col=0)

print("Train CSV columns:", train_df.columns)
print("Train CSV head:")
print(train_df.head())

Train CSV columns: Index(['Filename', 'Label', 'ClassName'], dtype='object')
Train CSV head:
                                                Filename  Label  \
16257                      AnnualCrop/AnnualCrop_142.jpg      0   
3297   HerbaceousVegetation/HerbaceousVegetation_2835...      2   
17881               PermanentCrop/PermanentCrop_1073.jpg      6   
2223                       Industrial/Industrial_453.jpg      4   
4887   HerbaceousVegetation/HerbaceousVegetation_1810...      2   

                  ClassName  
16257            AnnualCrop  
3297   HerbaceousVegetation  
17881         PermanentCrop  
2223             Industrial  
4887   HerbaceousVegetation  


In [29]:
def load_image(image_path):
    """Load an image and convert it to RGB."""
    img = cv2.imread(image_path)
    if img is None:
        print("Error loading image:", image_path)
        return None
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def extract_features_raw(img_path):
    """Extract features from the raw (full) image."""
    img = load_image(img_path)
    if img is None:
        return None
    return extract_combined_features(img)

def extract_features_kmeans(img_path, k=K):
    """Extract features from the image using k-means segmentation."""
    img = load_image(img_path)
    if img is None:
        return None
    # Use extract_segmented_features with method 'kmeans'
    return extract_segmented_features(img, k=k, m=M, method='kmeans')

def extract_features_fuzzy(img_path, k=K, m=M):
    """Extract features from the image using fuzzy C-means segmentation."""
    img = load_image(img_path)
    if img is None:
        return None
    # Use extract_segmented_features with method 'fuzzy'
    return extract_segmented_features(img, k=k, m=m, method='fuzzy')

In [30]:
# Sample training data (you may adjust SAMPLE_SIZE)
sample_train = train_df.sample(n=SAMPLE_SIZE, random_state=42)

X_raw = []
X_kmeans = []
X_fuzzy = []
y = []

for idx, row in sample_train.iterrows():
    # "Filename" already contains a relative path, e.g., "AnnualCrop/AnnualCrop_142.jpg"
    img_filename = row["Filename"]
    img_path = os.path.join("..", "data", "raw", "EuroSAT", img_filename)
    
    features_raw = extract_features_raw(img_path)
    features_km = extract_features_kmeans(img_path, k=K)
    features_fuzzy = extract_features_fuzzy(img_path, k=K, m=M)
    
    if features_raw is not None and features_km is not None and features_fuzzy is not None:
        X_raw.append(features_raw)
        X_kmeans.append(features_km)
        X_fuzzy.append(features_fuzzy)
        y.append(row["ClassName"])

X_raw = np.array(X_raw)
X_kmeans = np.array(X_kmeans)
X_fuzzy = np.array(X_fuzzy)
y = np.array(y)

print("Raw features shape:", X_raw.shape)
print("K-Means features shape:", X_kmeans.shape)
print("Fuzzy C-Means features shape:", X_fuzzy.shape)
print("Labels shape:", y.shape)

Raw features shape: (1000, 16)
K-Means features shape: (1000, 16)
Fuzzy C-Means features shape: (1000, 16)
Labels shape: (1000,)


In [31]:
from sklearn.model_selection import train_test_split

# For raw features:
X_train_raw, X_val_raw, y_train, y_val = train_test_split(
    X_raw, y, test_size=0.3, random_state=42, stratify=y
)
# For k-means segmented features:
X_train_km, X_val_km, _, _ = train_test_split(
    X_kmeans, y, test_size=0.3, random_state=42, stratify=y
)
# For fuzzy C-means segmented features:
X_train_fuzzy, X_val_fuzzy, _, _ = train_test_split(
    X_fuzzy, y, test_size=0.3, random_state=42, stratify=y
)

print("Training set size (raw):", X_train_raw.shape)
print("Validation set size (raw):", X_val_raw.shape)

Training set size (raw): (700, 16)
Validation set size (raw): (300, 16)


In [32]:
# Train classifier on raw features:
classifier_raw = LandUseClassifier()
classifier_raw.train(X_train_raw, y_train)
print("Raw model evaluation:")
classifier_raw.evaluate(X_val_raw, y_val)
model_dir = os.path.join("..", "models")
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
classifier_raw.save(os.path.join(model_dir, "landuse_classifier_raw.pkl"))

Raw model evaluation:
Classification Report:
                      precision    recall  f1-score   support

          AnnualCrop       0.47      0.70      0.56        40
              Forest       0.43      0.97      0.60        33
HerbaceousVegetation       0.41      0.47      0.43        32
             Highway       0.00      0.00      0.00        25
          Industrial       0.85      0.94      0.89        31
             Pasture       0.75      0.14      0.23        22
       PermanentCrop       0.00      0.00      0.00        25
         Residential       0.42      0.37      0.39        30
               River       0.31      0.55      0.40        29
             SeaLake       0.36      0.15      0.21        33

            accuracy                           0.46       300
           macro avg       0.40      0.43      0.37       300
        weighted avg       0.41      0.46      0.40       300

Confusion Matrix:
[[28  1  2  0  1  0  0  3  5  0]
 [ 0 32  0  0  0  0  0  0  0  1]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [33]:
# Train classifier on k-means segmented features:
classifier_km = LandUseClassifier()
classifier_km.train(X_train_km, y_train)
print("K-Means segmented model evaluation:")
classifier_km.evaluate(X_val_km, y_val)
classifier_km.save(os.path.join(model_dir, "landuse_classifier_kmeans.pkl"))

K-Means segmented model evaluation:
Classification Report:
                      precision    recall  f1-score   support

          AnnualCrop       0.41      0.60      0.49        40
              Forest       0.38      0.94      0.54        33
HerbaceousVegetation       0.32      0.34      0.33        32
             Highway       0.00      0.00      0.00        25
          Industrial       0.55      0.90      0.68        31
             Pasture       0.00      0.00      0.00        22
       PermanentCrop       0.00      0.00      0.00        25
         Residential       0.36      0.27      0.31        30
               River       0.19      0.31      0.24        29
             SeaLake       0.33      0.06      0.10        33

            accuracy                           0.38       300
           macro avg       0.26      0.34      0.27       300
        weighted avg       0.28      0.38      0.30       300

Confusion Matrix:
[[24  2  5  0  5  0  0  0  4  0]
 [ 0 31  0  0  0  0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [34]:
# Trai# Train classifier on fuzzy C-means segmented features:
classifier_fuzzy = LandUseClassifier()
classifier_fuzzy.train(X_train_fuzzy, y_train)
print("Fuzzy C-Means segmented model evaluation:")
classifier_fuzzy.evaluate(X_val_fuzzy, y_val)
classifier_fuzzy.save(os.path.join(model_dir, "landuse_classifier_fuzzy.pkl"))

Fuzzy C-Means segmented model evaluation:
Classification Report:
                      precision    recall  f1-score   support

          AnnualCrop       0.44      0.60      0.51        40
              Forest       0.39      0.94      0.55        33
HerbaceousVegetation       0.34      0.34      0.34        32
             Highway       0.00      0.00      0.00        25
          Industrial       0.54      0.94      0.68        31
             Pasture       0.00      0.00      0.00        22
       PermanentCrop       0.00      0.00      0.00        25
         Residential       0.36      0.30      0.33        30
               River       0.21      0.34      0.26        29
             SeaLake       0.33      0.06      0.10        33

            accuracy                           0.39       300
           macro avg       0.26      0.35      0.28       300
        weighted avg       0.29      0.39      0.30       300

Confusion Matrix:
[[24  2  5  0  5  0  0  0  4  0]
 [ 0 31  0  0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
import sys
import sklearn
print("Python executable:", sys.executable)
print("scikit-learn version:", sklearn.__version__)

Python executable: C:\Users\rober\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe
scikit-learn version: 1.5.2


## Observations

- The feature extraction function (mean and standard deviation of color channels) produced a feature vector of length 6.
- The classifier (SVM) was trained on a subset of the data (50 samples) and evaluated on a validation split.
- The classification report and confusion matrix indicate areas for improvement—particularly by increasing the sample size and possibly refining feature extraction.