<a href="https://colab.research.google.com/github/Apoak/Deep-Learning-Projects/blob/main/Hyperspectral_Tree_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import sklearn
import torch
import os

In [None]:
if not os.path.exists('tree_species_classifier_data.npz'):
  !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

In [None]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']
print(train_features.shape)
print(train_labels.shape)
print(test_features.shape)
print(test_labels.shape)


# original raw hyperspectral data.

What is the shape and data type of each provided matrix? What are the rows
and columns of the matrices? What are the ranges? How many classes are
there and what are the classes (the answer is in the paper linked above).
How many examples are provided of each class in the train and test splits?



This code fits the PCA model to the training features and applies it to
both the train and test features.


In [None]:
# sklearn PCA stuff
from sklearn.decomposition import PCA
pca = PCA(n_components=32, copy = True, whiten=True)  # Specify the number of principal components
pca.fit(train_features)                # Fit PCA on data
X_pca = pca.transform(train_features)  # Apply PCA to data
X_pca_test = pca.transform(test_features)
print(X_pca.shape)
print(train_labels.shape)
print(X_pca_test.shape)


This code creates a linear classifier and a neural network (NN) using scikit-learn. It also calculates accuracy.

In [None]:
# sklearn results
linear_classifier = sklearn.linear_model.LogisticRegression(random_state=0, max_iter=10000).fit(X_pca, train_labels)
NN = sklearn.neural_network.MLPClassifier(hidden_layer_sizes=(100, 100, 100), random_state=1, max_iter=1000).fit(X_pca, train_labels)
lc_score = linear_classifier.score(X_pca_test, test_labels)
nn_score = NN.score(X_pca_test, test_labels)
print(f'Linear Classifier Accuracy: {lc_score}')
print(f'Neural Network Accuracy: {nn_score}')


In [None]:
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

This creates tensors of the datasets.

In [None]:
# PCA for TORCH
X_pca -= np.mean(X_pca,axis=0)
X = torch.tensor(X_pca).float()
X_test = torch.tensor(X_pca_test).float()
y = torch.tensor(train_labels).long()
y_test = torch.tensor(test_labels).long()



Calculate propoprtion of tree species in dataset.

In [None]:
def count_numbers_tensor(tensor):
    unique_numbers, counts = torch.unique(tensor, return_counts=True)
    return {int(num): int(count) for num, count in zip(unique_numbers, counts)}

In [None]:
result = count_numbers_tensor(y)
print(result)

In [None]:
result = count_numbers_tensor(y_test)
print(result)

In [None]:
# Linear classifier
linear_model = torch.nn.Sequential(
    torch.nn.Linear(32,8),
)

In [None]:
# Cross-Entropy-Loss Function
loss_fn = torch.nn.CrossEntropyLoss()

This code creates the dataloader for the training splits

In [None]:
# Dataloader for training data
dataset = TensorDataset(X, y)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Multilayer Neural network
multilayer_model = torch.nn.Sequential(
    torch.nn.Linear(32,100),
    torch.nn.SiLU(),
    torch.nn.Linear(100,100),
    torch.nn.SiLU(),
    torch.nn.Linear(100,8),
)

This code creates the dataloader for the training splits

In [None]:
# Dataloader for test data
test_dataset = TensorDataset(X_test, y_test)
batch_size = 32
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

This function to calculates model accuracy, given the model and data
loader. The function iterates through the batches in the loader, adds up
the number of correct predictions in each batch, divides the sum by the total
number of predictions, and return the result.

In [None]:
# FOR TESTING MODELS with test data
def model_accuracy(model, dataloader):

  model.eval()
  correct = 0
  total = 0
  count = 0

  with torch.no_grad():
      for batch_X, batch_y in test_loader:
          outputs = model(batch_X)
          _, predicted = torch.max(outputs, 1)
          total += batch_y.size(0)
          correct += (predicted == batch_y).sum().item()
          temp = correct/total
          print(f'epoch {count}: Accuracy: {temp}')
          count += 1

  accuracy = correct / total

  return accuracy

This code trains the models. It uses the SGD optimizer with learning rate
1e-2 and weight decay of 0.001. It is trained for 100 epochs and uses the cross
entropy loss function. In each epoch, it loops through the training data loader
and zeros out the gradients; calculates model outputs and
loss; runs the backward step to calculate gradients; and runs the optimizer
step.

In [None]:
# FOR TRAINING MODELS
def train_model(model, train_loader, loss_func, epochs=100, lr=1e-2):
  opt = torch.optim.SGD(model.parameters(), lr=lr, weight_decay= 0.001)

  for epoch in range(epochs):
    for batch_idx, (batch_X, batch_y) in enumerate(dataloader):
      opt.zero_grad() # zero out the gradients
      z = model(batch_X) # compute z values
      loss = loss_fn(z,batch_y) # compute loss
      loss.backward() # compute gradients
      opt.step() # apply gradients

    print(f'epoch {epoch}: loss is {loss.item()}')

  return model

In [None]:
# Train linear model
train_model(linear_model, dataloader, loss_fn, epochs=100, lr=1e-2)

In [None]:
# Train Multilayer model
train_model(multilayer_model, dataloader, loss_fn, epochs=100, lr=1e-2)

In [None]:
# Test linear model
linear_accuracy = model_accuracy(linear_model, test_loader)
print(f'Accuracy of Torch Linnear Classifier: {linear_accuracy:.4f}')

In [None]:
# Test multilayer model
nn_accuracy = model_accuracy(multilayer_model, test_loader)
print(f'Accuracy of Torch NN: {nn_accuracy:.4f}')