# Mounting dataset from gdrive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

dataset_path = '/content/drive/My Drive/input/forestnet'

ValueError: mount failed

# Importing packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import skimage.draw as sk
from fastai.vision.all import *
from fastai.imports import *

# Read data and create DataFrames

In [None]:
train = pd.read_csv('/content/drive/My Drive/input/forestnet/ForestNetDataset/train.csv')
val = pd.read_csv('/content/drive/My Drive/input/forestnet/ForestNetDataset/val.csv')

In [None]:
train.drop(columns = ['label','latitude','longitude','year'], inplace = True)
val.drop(columns = ['label','latitude','longitude','year'], inplace = True)
train['is_valid'] = False
val['is_valid'] = True
imgset = pd.concat([train,val],ignore_index = True)
imgset.head()



create dictionary with our labels and their corresponding pixel value


In [None]:
codes_dict = {"Undefined": 0, "Plantation": 1, "Smallholder agriculture": 2,"Other": 3,"Grassland shrubland": 4}
codes_dict.values()

# Semantic segmentation data prep

In [None]:
def getEdges(polygon):
    edge = []
    for i in polygon.exterior.coords:
        x,y = i
        x = np.round(x).astype(int)
        y = np.round(y).astype(int)
        edge.append((x,y))
    return edge

In [None]:
def createPILMask(poly, label, codes):
    mask = np.zeros((332,332))
    if poly.geom_type == 'Polygon':
        # Handle single Polygon case
        edge = getEdges(poly)
        mask = sk.polygon2mask((332,332),edge)
    elif poly.geom_type == 'MultiPolygon':
        # Handle MultiPolygon case
        for polygon in poly.geoms:  # Iterate over individual Polygons
            edge = getEdges(polygon)
            pmask = sk.polygon2mask((332,332),edge)
            mask = np.logical_or(mask,pmask)
    else:
        print("Invalid geometry type encountered.")
    mask = np.where(mask, codes[label], 0).astype(np.uint8)
    return Image.fromarray(mask)

In [None]:
def labelToInt(label,codes):
    for j,i in enumerate(codes):
        if (i == label):
            return j

In [None]:
def getForestLoss(path):
    with open('/content/drive/My Drive/input/forestnet/ForestNetDataset/'+ path +'/forest_loss_region.pkl', 'rb') as f:
        return pickle.load(f)

# Find suitable weights for the labels

In [None]:
train_count = train.merged_label.value_counts()
train_count

In [None]:
val_count = val.merged_label.value_counts()
val_count

In [None]:
norm_train = np.linalg.norm(np.array(train_count.values))
norm_val = np.linalg.norm(np.array(val_count.values))

In [None]:
norm_val = np.linalg.norm(np.array(val_count.values))

n_1 = ((train_count / norm_train).values)
n_2 = ((val_count / norm_val).values)
weights = (n_1 + n_2) / 2
# Assign a weight of zero to the "Undefined" label.
weights = np.insert(weights[::-1],0,0.0)
print(codes_dict.keys())
weights



#Put the data in a DataBlock

In [None]:
db = DataBlock(blocks = (ImageBlock, MaskBlock(codes = codes_dict)),
        splitter = ColSplitter(),
        get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
        get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
        item_tfms=Resize(160,method = 'crop'),
        batch_tfms=[Normalize.from_stats(*imagenet_stats)])

In [None]:
dls = db.dataloaders(imgset, bs = 4)
dls.show_batch(vmin = 0,vmax = 5)

# Learning and training

In [None]:
import torch
from fastai.vision.all import *

# Load the FPN-based EfficientNet model from torch.hub
model = torch.hub.load(
    'AdeelH/pytorch-fpn',
    'make_fpn_efficientnet',
    name='efficientnet-b8',
    pretrained=True,
    fpn_type='fpn',
    num_classes=5,
    fpn_channels=256,
    in_channels=3,
    out_size=(160, 160)
)

In [None]:
# Accuracy metric
def segmentation_accuracy(preds, targets):
    # preds.shape: (batch_size, num_classes, height, width)
    # targets.shape: (batch_size, height, width)
    mask = targets != 0
    return (preds.argmax(dim=1)[mask]==targets[mask]).float().mean()

if torch.cuda.is_available():
    print("GPU is available!")
    device = torch.device("cuda")
else:
    print("GPU is not available. Using CPU.")
    device = torch.device("cpu")

# Modify your Learner code to use the correct device
learn = Learner(
    dls,
    model,
    metrics=lambda inp, targ: segmentation_accuracy(inp, targ),
    loss_func=LabelSmoothingCrossEntropyFlat(axis=1, weight=tensor(weights).to(device))
)

In [None]:
learn.fine_tune(8)

epoch,train_loss,valid_loss,Unnamed: 3,time
0,13.120321,36.331982,0.568086,53:21


  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  return pickle.load(f)
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  return pickle.load(f)


epoch,train_loss,valid_loss,Unnamed: 3,time
0,1.743037,1.906021,0.50188,52:14
1,3.154219,17.628099,0.327451,52:01


  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_y = lambda x: createPILMask(getForestLoss(x[1]),x[0],codes_dict),
  return pickle.load(f)
  get_x = lambda x: "/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png",
  get_x = l

In [None]:
preds = learn.get_preds()

In [None]:
preds[0].shape

In [None]:
preds[0][0][0]

In [None]:
predicted_classes = torch.argmax(preds[0], dim=1)
predicted_classes

In [None]:
# Assuming 'preds' is your tensor
max_value = torch.max(preds[0])
print(max_value)

In [None]:
from google.colab import files

torch.save(learn.model.state_dict(), '/content/drive/MyDrive/efficientnet.pth')
files.download('/content/drive/MyDrive/efficientnet.pth')

# Validation Accuracy

In [None]:
predicted_classes = torch.argmax(preds[0], dim=1)

### Getting the validation accuracy of each class

In [None]:
import torch

codes_dict = {"Undefined": 0, "Plantation": 1, "Smallholder agriculture": 2,"Other": 3,"Grassland shrubland": 4}

class_correct = {label: 0 for label in codes_dict.keys()}
class_total = {label: 0 for label in codes_dict.keys()}


for i in range(predicted_classes.shape[0]):
    # Flatten the tensor for the current image
    flattened_predictions = predicted_classes[i].flatten()

    # Calculate the mode (most frequent element) for the current image
    predicted_label, count = torch.mode(flattened_predictions)

    # Get the label name from the dictionary
    predicted_label_name = [key for key, value in codes_dict.items() if value == predicted_label.item()][0]

    # Get the actual label from the DataFrame
    actual_label = val['merged_label'].iloc[i]

    # print(f"Image {i+1}: Predicted: {predicted_label_name}, Actual: {actual_label}")

    class_total[actual_label] += 1
    if predicted_label_name == actual_label:
        class_correct[actual_label] += 1

for label in codes_dict.keys():
    accuracy = 100 * class_correct[label] / class_total[label] if class_total[label] > 0 else 0
    print(f"Accuracy of {label}: {accuracy:.2f}%")


### Getting validation macro accuracy

In [None]:
# Calculate macro accuracy
total_correct = sum(class_correct.values())
total_samples = sum(class_total.values())
macro_accuracy = 100 * total_correct / total_samples

print(f"Macro Accuracy: {macro_accuracy:.2f}%")


### Getting validation macro f1

In [None]:
from sklearn.metrics import f1_score

# Assuming 'preds' contains your model's predictions and 'val' is your DataFrame
predicted_classes = torch.argmax(preds[0], dim=1)

# Flatten the predictions and extract true labels
y_true = val['merged_label'].tolist()  # Assuming 'merged_label' column holds the true labels
y_pred = []
for i in range(predicted_classes.shape[0]):
    flattened_predictions = predicted_classes[i].flatten()
    predicted_label, _ = torch.mode(flattened_predictions)
    predicted_label_name = [key for key, value in codes_dict.items() if value == predicted_label.item()][0]
    y_pred.append(predicted_label_name)

# Calculate the macro F1-score
f1_macro = f1_score(y_true, y_pred, average='macro')
print(f"Macro F1-score: {f1_macro:.2f}")


# Visualizing predictions

In [None]:
vset = imgset[imgset.is_valid == True]
vset.reset_index(drop = True, inplace = True)


In [None]:
import scipy.stats

def label_predict(target,pred,codes,test):
    maskindices = np.nonzero(target)
    if test:
        maskindices = zip(maskindices[0],maskindices[1])
    pred_labels = pred.argmax(dim = 0)
    label_vals = [pred_labels[i[0]][i[1]].item() for i in maskindices]
    mode_result = scipy.stats.mode(label_vals)
    # Check if the mode is a scalar and handle it accordingly
    mode_value = mode_result.mode if isinstance(mode_result.mode, np.ndarray) else mode_result.mode.item()
    return list(codes)[mode_value]

  def predict_mask(target,pred,test):
    maskindices = np.nonzero(target)
    if test:
        maskindices = zip(maskindices[0],maskindices[1])
    pred_labels = pred.argmax(dim = 0)
    p_mask = np.zeros(target.shape)
    for i in maskindices:
        p_mask[i[0]][i[1]] = pred_labels[i[0]][i[1]].item()
    f, ax = plt.subplots(1,2,figsize=(20, 10))
    ax[0].imshow(target)
    ax[1].imshow(p_mask)
    ax[0].set_title("ground truth")
    ax[1].set_title("prediction")
    plt.show()


In [None]:
predict_mask(preds[1][2],preds[0][2],False)
p = label_predict(preds[1][2],preds[0][2],codes_dict,False)
g = vset.iloc[1][0]
print("predicted label: ", p)
print("correct label: ", g)

In [None]:
for i, j in enumerate(vset.head(10).values):
    f, ax = plt.subplots(1,2,figsize=(20, 10))
    im = PILImage.create("/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{j[1]}'"/images/visible/composite.png")
    ax[0].imshow(im)
    ax[1].imshow(preds[0][i].argmax(dim = 0))
    plt.show()


# Examine Model Performance on test data

### Test accuracy

In [None]:
test = pd.read_csv('/content/drive/My Drive/input/forestnet/ForestNetDataset/test.csv')
test.drop(columns = ['label','latitude','longitude','year'], inplace = True)

In [None]:
def t_acc(df,codes):
#     undefined_count = 0
    acc = []
    label = df.iloc[0][0]
    for x in df.values:
        im = PILImage.create("/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png")
        AOI = createPILMask(getForestLoss(x[1]),x[0],codes)
        p = learn.predict(im)
        acc.append(label_predict(AOI.crop_pad(p[2].shape[1]),p[2],codes,True))
#     print(len([x for x in acc if "Undefined" in x]))
    return (len([x for x in acc if label in x]) / len(acc))


In [None]:
t_plant = test[test.merged_label == 'Plantation']
t_other = test[test.merged_label == 'Other']
t_grass = test[test.merged_label == 'Grassland shrubland']
t_small = test[test.merged_label == 'Smallholder agriculture']

plantation = t_acc(t_plant,codes_dict)
other = t_acc(t_other,codes_dict)
grass = t_acc(t_grass,codes_dict)
small = t_acc(t_small,codes_dict)


In [None]:
print('plantation accuracy: ', np.around(plantation,4))
print('other accuracy: ', np.around(other,4))
print('grassland shrubland accuracy: ', np.around(grass,4))
print('smallholder agriculture accuracy: ', np.around(small,4))

### Macro F1 and Macro Accuracy

In [None]:
from sklearn.metrics import accuracy_score

def t_acc_macro(df, codes):
    y_true = df.iloc[:, 0].tolist()  # Extract true labels
    y_pred = []
    for x in df.values:
        im = PILImage.create("/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png")
        AOI = createPILMask(getForestLoss(x[1]), x[0], codes)
        p = learn.predict(im)
        y_pred.append(label_predict(AOI.crop_pad(p[2].shape[1]), p[2], codes, True))

    # Calculate macro accuracy using accuracy_score
    macro_accuracy = accuracy_score(y_true, y_pred)
    print(f"Macro Accuracy: {macro_accuracy:.4f}")

    return macro_accuracy


def t_f1_macro(df,codes):
    y_true = df.iloc[:,0].tolist()
    y_pred = []
    for x in df.values:
        im = PILImage.create("/content/drive/My Drive/input/forestnet/ForestNetDataset/"f'{x[1]}'"/images/visible/composite.png")
        AOI = createPILMask(getForestLoss(x[1]),x[0],codes)
        p = learn.predict(im)
        y_pred.append(label_predict(AOI.crop_pad(p[2].shape[1]),p[2],codes,True))
    return f1_score(y_true, y_pred, average='macro')

In [None]:
macro_acc = t_acc_macro(test,codes_dict)
print('test macro accuracy: ', np.around(macro_acc,4))

In [None]:
macro_f1 = t_f1_macro(test,codes_dict)
print('test macro f1: ', np.around(macro_f1,4))
