# Task 2: DMG

> REFS:

> [SVM](https://www.datasciencebase.com/supervised-ml/algorithms/support-vector-machines/pytorch-example/)

> [SLP](https://medium.com/@shashankshankar10/introduction-to-neural-networks-build-a-single-layer-perceptron-in-pytorch-c22d9b412ccf)

In [1]:
import os

## Data: samples and labels

In [2]:
from google.colab import drive

drive.mount('/content/drive')
data_directory = os.path.abspath("/content/drive/MyDrive/ResearchPanda/Data")
if not os.path.exists(data_directory):
    print("Data directory does not exist.")
else:
    print("Data directory exists.")

Mounted at /content/drive
Data directory exists.


In [3]:
for root, dirs, files in os.walk(data_directory):
    level = root.replace(data_directory, '').count(os.sep)
    indent = ' ' * 2 * level
    print('{}{}/'.format(indent, os.path.basename(root)))

Data/
  samples/
  labels/


In [15]:
# Get the paths of the first 8 labels and 8 images
sample_dir = os.path.join(data_directory, 'samples')
label_dir = os.path.join(data_directory, 'labels')

sample_files = sorted([os.path.join(sample_dir, f) for f in os.listdir(sample_dir) if os.path.join(sample_dir, f).endswith(".tiff") or os.path.join(sample_dir, f).endswith(".tif")])
label_files = sorted([os.path.join(label_dir, f) for f in os.listdir(label_dir) if os.path.join(label_dir, f).endswith(".tiff") or os.path.join(sample_dir, f).endswith(".tif")])

print("Sample files:", sample_files[:8])
print("Label files:", label_files[:8])

Sample files: ['/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_0.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_1.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_10.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_100.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_101.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_102.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_20171025T150721_N0500_R082_T19QGA_20230910T203157.SAFE_img_103.tiff', '/content/drive/MyDrive/ResearchPanda/Data/samples/S2A_MSIL2A_2017

## Helper functions (ref: carlos_escobar)

In [16]:
import numpy as np
import tifffile as tiff

def getMinMaxValues(label_files):
    min_value = np.inf
    max_value = -np.inf

    for label_path in label_files:
        image_data = tiff.imread(label_path)
        min_value = min(min_value, np.min(image_data))
        max_value = max(max_value, np.max(image_data))

    return min_value, max_value


In [17]:
import numpy as np
import tifffile as tiff

def getNDVIvalues(label_files):
    ndvi_values = []
    ndvi_min, ndvi_max = getMinMaxValues(label_files)

    for label_path in label_files:
        image_data = tiff.imread(label_path).astype(float)

        # Avoid division by zero if min and max are the same
        if (ndvi_max - ndvi_min) == 0:
            ndvi_normalized = np.zeros_like(image_data, dtype=np.float32)
        else:
            ndvi_normalized = ((image_data - ndvi_min) / (ndvi_max - ndvi_min)) * 2 - 1

        # Ensure values are within the -1 to 1 range
        ndvi_scaled = np.clip(ndvi_normalized, -1, 1)

        # Append values
        ndvi_values.append(ndvi_scaled)

    return ndvi_values

In [21]:
import numpy as np
def getFeatures(ndvi_values):
    X = []
    y = []
    for ndvi_array in ndvi_values:

        # Mean and Std to x features
        mean_value = np.mean(ndvi_array)
        std_value = np.std(ndvi_array)
        X.append([mean_value, std_value])

        # Classification to y feature
        # If mean tiff value > 0.5 (top 25%) => good condition land
        if mean_value > 0.5 and std_value < mean_value:
            y.append(1)
        else:
            y.append(0)

    return X, y


## Features and data split

In [22]:
ndvi_values = getNDVIvalues(label_files)
X, y = getFeatures(ndvi_values)

In [23]:
from sklearn.model_selection import train_test_split
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=17,
                                                    shuffle=True)

## SVM

In [26]:
from sklearn.svm import SVC
# Implement and train the SVM classifier
# Using a linear kernel as a starting point
svm_model = SVC(kernel='linear', random_state=42)
print("Training SVM model...")
svm_model.fit(X_train, y_train)
print("SVM model training complete.", end="\n\n")

# Predict on the test set
print("Making predictions on the test set...")
y_pred = svm_model.predict(X_test)
print("Predictions complete.")

Training SVM model...
SVM model training complete.

Making predictions on the test set...
Predictions complete.


In [28]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import classification_report, confusion_matrix
# Evaluate the model
print("Evaluating SVM model...")

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"SVM Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}", end="\n\n")
print("Evaluation reports:")
print(classification_report(y_test, y_pred))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred))

Evaluating SVM model...
SVM Model Performance:
Accuracy: 0.9919
Precision: 0.9796
Recall: 1.0000

Evaluation reports:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99        75
           1       0.98      1.00      0.99        48

    accuracy                           0.99       123
   macro avg       0.99      0.99      0.99       123
weighted avg       0.99      0.99      0.99       123

Confusion matrix:
[[74  1]
 [ 0 48]]


## Single layer perceptron
> ref: [Youtube video](https://www.youtube.com/watch?v=hSrFA-l5k3U)

In [29]:
import torch
import torch.nn as nn
import torch.optim as optim

In [68]:
import torch
import torch.nn as nn
import torch.optim as optim

class SingleLayerPerceptron(nn.Module):
    def __init__(self, input_size):
        super(SingleLayerPerceptron, self).__init__()
        self.fc = nn.Linear(input_size, 1)  # Output size is 1 for binary classification

    def forward(self, x):
        out = torch.sigmoid(self.fc(x)) # binary classification
        return out

In [69]:
# Model Iniciation, loss function and optimizer
input_size = 2
model = SingleLayerPerceptron(input_size)
criterion = nn.BCELoss() # Binary-cross-entropy for classification
optimizer = optim.SGD(model.parameters(), lr=0.1) # Stochastic Gradiant desent optimizer

In [62]:
# Data
X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)

y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: torch.Size([491, 1, 1, 2])
Shape of X_test: torch.Size([123, 1, 1, 2])
Shape of y_train: torch.Size([491, 1, 1, 1])
Shape of y_test: torch.Size([123, 1, 1, 1])


  X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
  X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
  y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
  y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)


In [74]:
# Training
epochs = 10000

print("Training Single Layer Perceptron...")
for epoch in range(epochs):
    model.train() # Training mode

    # Foward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

print("Training complete.")

Training Single Layer Perceptron...
Epoch [1000/10000], Loss: 0.2153
Epoch [2000/10000], Loss: 0.2098
Epoch [3000/10000], Loss: 0.2048
Epoch [4000/10000], Loss: 0.2002
Epoch [5000/10000], Loss: 0.1960
Epoch [6000/10000], Loss: 0.1922
Epoch [7000/10000], Loss: 0.1886
Epoch [8000/10000], Loss: 0.1853
Epoch [9000/10000], Loss: 0.1822
Epoch [10000/10000], Loss: 0.1793
Training complete.


In [75]:
# Evaluation
model.eval()
with torch.no_grad():
    print("Evaluating Single Layer Perceptron...")

    test_output = model(X_test)
    test_loss = criterion(test_output, y_test)
    print(f'Test Loss: {test_loss.item():.4f}')

    predicted = test_output.round() # Round for binary

    # Convert tensors to numpy arrays and flatten for sklearn metrics
    y_test_np = y_test.cpu().numpy().flatten()
    predicted_np = predicted.cpu().numpy().flatten()

    accuracy = (predicted == y_test).float().mean()
    print(f'Test Accuracy: {accuracy.item():.4f}')

    precision = precision_score(y_test_np, predicted_np, zero_division=0.0)
    print(f'Test Precision: {precision:.4f}')

    recall = recall_score(y_test_np, predicted_np)
    print(f'Test Recall: {recall:.4f}')

    print("Evaluation reports.")
    print(classification_report(y_test_np, predicted_np))
    print("Confusion matrix.")
    print(confusion_matrix(y_test_np, predicted_np))

Evaluating Single Layer Perceptron...
Test Loss: 0.1398
Test Accuracy: 0.9675
Test Precision: 0.9231
Test Recall: 1.0000
Evaluation reports.
              precision    recall  f1-score   support

         0.0       1.00      0.95      0.97        75
         1.0       0.92      1.00      0.96        48

    accuracy                           0.97       123
   macro avg       0.96      0.97      0.97       123
weighted avg       0.97      0.97      0.97       123

Confusion matrix.
[[71  4]
 [ 0 48]]
