# Example 3: Tile2Vec features for CDL classification
In this notebook, we'll use a Tile2Vec model that has been pre-trained on the NAIP dataset to embed a small NAIP dataset and then train a classifier on the corresponding Cropland Data Layer (CDL) labels.

In [1]:
import numpy as np
import os
import torch
from time import time
from torch.autograd import Variable

import sys
sys.path.append('../')
from src.tilenet import make_tilenet
from src.resnet import ResNet18

In [2]:
torch.cuda.empty_cache()

## Step 1. Loading pre-trained model
In this step, we will initialize a new TileNet model and then load the pre-trained weights.

In [3]:
# Setting up model
in_channels = 4
z_dim = 512
cuda = torch.cuda.is_available()
tilenet = make_tilenet(in_channels=in_channels, z_dim=z_dim)
#Use old model for now
#tilenet = ResNet18()
if cuda: tilenet.cuda()

In [4]:
# Load parameters
model_fn = '../models/TileNet_epoch50-Copy1.ckpt'
checkpoint = torch.load(model_fn)
tilenet.load_state_dict(checkpoint)
tilenet.eval()

TileNet(
  (conv1): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, 

## Step 2. Embed NAIP tiles
In this step, we'll use TileNet to embed the NAIP tiles provided in `tile2vec/data/tiles`. There are 1000 tiles in total, named `1tile.npy` through `1000tile.npy`.

In [5]:
# Get data
tile_dir = '../data/tiles'
n_tiles = 1000
y = np.load(os.path.join(tile_dir, 'y.npy'))
print(y.shape)

(1000,)


In [6]:
# Embed tiles
t0 = time()
X = np.zeros((n_tiles, z_dim))
for idx in range(n_tiles):
    tile = np.load(os.path.join(tile_dir, '{}tile.npy'.format(idx+1)))
    # Get first 4 NAIP channels (5th is CDL mask)
    tile = tile[:,:,:4]
    # Rearrange to PyTorch order
    tile = np.moveaxis(tile, -1, 0)
    tile = np.expand_dims(tile, axis=0)
    # Scale to [0, 1]
    tile = tile / 255
    # Embed tile
    tile = torch.from_numpy(tile).float()
    tile = Variable(tile)
    if cuda: tile = tile.cuda()
    z = tilenet.encode(tile)
    if cuda: z = z.cpu()
    z = z.data.numpy()
    X[idx,:] = z
t1 = time()
print('Embedded {} tiles: {:0.3f}s'.format(n_tiles, t1-t0))

Embedded 1000 tiles: 8.370s


## Step 3. Train random forest classifier
In this step, we'll split the dataset into train and test sets and train a random forest classifier to predict CDL classes.

In [7]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler 
from sklearn.pipeline import make_pipeline
from torch.utils.data import TensorDataset, DataLoader

import torch
import torch.nn as nn

In [8]:
# Check CDL classes
print(set(y))

{1.0, 2.0, 21.0, 24.0, 152.0, 28.0, 36.0, 176.0, 49.0, 54.0, 61.0, 69.0, 71.0, 72.0, 75.0, 76.0, 205.0, 204.0, 208.0, 212.0, 217.0, 225.0, 236.0, 111.0, 121.0, 122.0, 123.0, 124.0}


Since the CDL classes are not numbered in consecutive order, we'll start by reindexing the classes from 0.

In [9]:
# Reindex CDL classes
y = LabelEncoder().fit_transform(y)
print(set(y))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}


In [10]:
class MLP(nn.Module):
    def __init__(self, input_size=512):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.bn = nn.BatchNorm1d(256) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 28)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        
        return x

In [11]:
def train_MLP(model, data_loader, optimizer, lr, epoch):
    model.train()
    cuda = torch.cuda.is_available()
    
    optimizer.zero_grad()
    
    for data, label in data_loader:
        if cuda:
            data = data.cuda()
            label = label.cuda()
        
        criterion = torch.nn.BCELoss()
        y_pred = model(data)
        loss = criterion(y_pred, label)
        
        print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
        
        loss.backward()
        optimizer.step()

We can randomly split the data and train a random forest classifier many times to get an estimate of the average accuracy.

In [12]:
n_trials = 10
accs = np.zeros((n_trials,))

lr = 1e-3
epochs = 10

for i in range(n_trials):
    model = MLP()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    cuda = torch.cuda.is_available()
    if cuda:
        model = model.cuda()
    
    # Splitting data and training RF classifer
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2)    
    y_tr = np.eye(28)[y_tr]
    
    X_tr = torch.Tensor(X_tr)
    X_te = torch.Tensor(X_te)
    y_tr = torch.Tensor(y_tr)
    y_te = torch.Tensor(y_te)
    
    train_dataset = TensorDataset(X_tr, y_tr)
    test_dataset = TensorDataset(X_te, y_te)
    
    train_dataloader = DataLoader(train_dataset, batch_size = 200)
    test_dataloader = DataLoader(test_dataset, batch_size=200)
    
    for e in range(epochs):
        train_MLP(model, train_dataloader, optimizer, lr, e)
    
    correct = 0
    total = 0
    
    for data, label in test_dataloader:
        if cuda:
            data = data.cuda()
            label = label.cuda()
        
        output_test = model(data)
        _, predicted = torch.max(output_test, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()
    
    accuracy = correct * 100 / total    
    accs[i] = accuracy
    
print('Mean accuracy: {:0.4f}'.format(accs.mean()))
print('Standard deviation: {:0.4f}'.format(accs.std()))

  x = self.softmax(x)


Epoch 0: train loss: 0.16242678463459015
Epoch 0: train loss: 0.14570342004299164
Epoch 0: train loss: 0.13016077876091003
Epoch 0: train loss: 0.12146203964948654
Epoch 1: train loss: 0.10028119385242462
Epoch 1: train loss: 0.09376434236764908
Epoch 1: train loss: 0.08974016457796097
Epoch 1: train loss: 0.09337861835956573
Epoch 2: train loss: 0.07610122859477997
Epoch 2: train loss: 0.07242891192436218
Epoch 2: train loss: 0.07320427149534225
Epoch 2: train loss: 0.0832841694355011
Epoch 3: train loss: 0.06660959124565125
Epoch 3: train loss: 0.06341216713190079
Epoch 3: train loss: 0.06483393162488937
Epoch 3: train loss: 0.07890687137842178
Epoch 4: train loss: 0.0609816238284111
Epoch 4: train loss: 0.058468159288167953
Epoch 4: train loss: 0.059274282306432724
Epoch 4: train loss: 0.0759292021393776
Epoch 5: train loss: 0.05660459026694298
Epoch 5: train loss: 0.0552455298602581
Epoch 5: train loss: 0.055543091148138046
Epoch 5: train loss: 0.07376072555780411
Epoch 6: train lo

Epoch 1: train loss: 0.0894862711429596
Epoch 2: train loss: 0.0664818212389946
Epoch 2: train loss: 0.07498897612094879
Epoch 2: train loss: 0.07221861928701401
Epoch 2: train loss: 0.08045878261327744
Epoch 3: train loss: 0.05751475691795349
Epoch 3: train loss: 0.06791173666715622
Epoch 3: train loss: 0.06629333645105362
Epoch 3: train loss: 0.07680609822273254
Epoch 4: train loss: 0.053164541721343994
Epoch 4: train loss: 0.06361541897058487
Epoch 4: train loss: 0.06190267950296402
Epoch 4: train loss: 0.07409409433603287
Epoch 5: train loss: 0.049796294420957565
Epoch 5: train loss: 0.05998372659087181
Epoch 5: train loss: 0.058079253882169724
Epoch 5: train loss: 0.07173601537942886
Epoch 6: train loss: 0.04717143997550011
Epoch 6: train loss: 0.05693846568465233
Epoch 6: train loss: 0.054733239114284515
Epoch 6: train loss: 0.0696667805314064
Epoch 7: train loss: 0.04468126967549324
Epoch 7: train loss: 0.054020095616579056
Epoch 7: train loss: 0.0519133098423481
Epoch 7: train 