# Train a Tile2Vec Algorithm From Scratch

## Step 1: Import Libraries and Mount google drive 

**This notebook is intended to be run on Google Colab**

In [1]:
import sys
import os
import torch
from torch import optim
from time import time
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DIR = '../content/drive/MyDrive/tile2vec/'

In [3]:
tile2vec_dir = '/atlas/u/swang/software/GitHub/tile2vec'
sys.path.append('../')
sys.path.append(tile2vec_dir)

In [4]:
from src.datasets import TileTripletsDataset, GetBands, RandomFlipAndRotate, ClipAndScale, ToFloatTensor, triplet_dataloader
from src.tilenet import make_tilenet

In [5]:
from src.training import prep_triplets, train_triplet_epoch

In [None]:
!unzip /content/drive/MyDrive/toa_triplets.zip > /dev/null

# Step 2. Set up dataloader

Run this if you wish to delete X number of images and reduce the training dataset size

In [16]:
triplet_nums = {}
for filename in os.listdir('../data/toa_triplets/'):
    triplet_num = filename.split("anchor")[0]
    triplet_num = triplet_num.split("neighbor")[0]
    triplet_num = triplet_num.split("distant")[0]

    if ".npy" in filename:
        triplet_nums[triplet_num] = 0

keys = []
for key in triplet_nums:
    keys.append(key)

# Reduce number of triplets (this time 2/3rds are kept)
keep_keys = keys[::582] 
#keep_keys = list(set(keys) - set(remove_keys))

kept_files = []
for filename in os.listdir('../data/toa_triplets/'):

    triplet_num = filename.split("anchor")[0]
    triplet_num = triplet_num.split("neighbor")[0]
    triplet_num = triplet_num.split("distant")[0]

    img_ext = filename.split(triplet_num)[1]

    if (triplet_num in keep_keys) and (".npy" in filename):
        new_filename = str(keep_keys.index(triplet_num)) + img_ext
        kept_files.append(new_filename)
        os.rename(os.path.join('../data/toa_triplets/', filename),os.path.join('../data/new_toa_triplets/', new_filename) )
    else:
        #os.remove(os.path.join('/content/toa_triplets/', filename))
        pass


Check new length of the dataset

In [17]:
print (len(os.listdir('../data/new_toa_triplets/')))

303


Initialise Cuda

In [18]:
# Environment stuff
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
cuda = torch.cuda.is_available()
print('PyTorch is using GPU: {}'.format(cuda))

PyTorch is using GPU: False


Set up the dataloader for training.

In [49]:
# Change these arguments to match your directory and desired parameters
img_type = 'naip'
tile_dir = '../data/new_toa_triplets/'
bands = 3
augment = True
batch_size = 50
shuffle = True
num_workers = 4
n_triplets = 101 

In [50]:
dataloader = triplet_dataloader(img_type, tile_dir, bands=bands, augment=augment,
                                batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
                                n_triplets=n_triplets, pairs_only=True)
print('Dataloader set up complete.')

Dataloader set up complete.


# Step 3. Set up TileNet

In [51]:
in_channels = bands
z_dim = 512

In [52]:
TileNet = make_tilenet(in_channels=in_channels, z_dim=z_dim)

# Below commented lines are for loading a checkpoint

#model_fn = '/content/drive/MyDrive/models/TileNet_epoch61_toa_data.ckpt'
#checkpoint = torch.load(model_fn, map_location=torch.device('cpu'))
#TileNet.load_state_dict(checkpoint)

if cuda: TileNet.cuda()
print('TileNet set up complete.')
TileNet.train()

TileNet set up complete.


TileNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, 

Set up optimizer.

In [53]:
lr = 1e-3
optimizer = optim.Adam(TileNet.parameters(), lr=lr, betas=(0.5, 0.999))

# Step 4. Train Tile2Vec model!

In [54]:
epochs = 100
margin = 10
l2 = 0.01
print_every = 1000
save_models = True


Define the directory for saving models.

In [55]:
model_dir = '/content/drive/MyDrive/tile2vec/models/Tile2Vec'
if not os.path.exists(model_dir): os.makedirs(model_dir)

In [56]:
t0 = time()
avg_38802_img_losses = []
avg_38802_img_l_ns = []
avg_38802_img_l_ds = []
avg_38802_img_nds = []

with open("results_fn", 'w') as file:

    print('Begin training.................')
    for epoch in range(95, epochs):
        (avg_loss, avg_l_n, avg_l_d, avg_l_nd) = train_triplet_epoch(
            TileNet, cuda, dataloader, optimizer, epoch+1, margin=margin, l2=l2,
            print_every=print_every, t0=t0)
        avg_38802_img_losses.append(avg_loss)
        avg_38802_img_l_ns.append(avg_l_n)
        avg_38802_img_l_ds.append(avg_l_d)
        avg_38802_img_nds.append(avg_l_nd)
        # Save model after last epoch
        if save_models:
            model_fn = os.path.join(model_dir, 'TileNet_epoch{}_toa_data.ckpt'.format(epoch))
            torch.save(TileNet.state_dict(), model_fn)

Begin training.................
Finished epoch 1: 65.142s
  Num Batches: 3
  Sum Loss: 39.18760108947754
  Average loss: 13.0625
  Average l_n: 7.6390
  Average l_d: -8.5146
  Average l_nd: -0.8756

Finished epoch 2: 125.941s
  Num Batches: 3
  Sum Loss: 37.81863021850586
  Average loss: 12.6062
  Average l_n: 8.3711
  Average l_d: -11.4333
  Average l_nd: -3.0622

Finished epoch 3: 195.594s
  Num Batches: 3
  Sum Loss: 34.282400131225586
  Average loss: 11.4275
  Average l_n: 6.5566
  Average l_d: -7.7297
  Average l_nd: -1.1730

Finished epoch 4: 260.470s
  Num Batches: 3
  Sum Loss: 34.566630363464355
  Average loss: 11.5222
  Average l_n: 5.6348
  Average l_d: -6.9989
  Average l_nd: -1.3641

Finished epoch 5: 324.866s
  Num Batches: 3
  Sum Loss: 34.04160976409912
  Average loss: 11.3472
  Average l_n: 3.8508
  Average l_d: -4.2173
  Average l_nd: -0.3665

Finished epoch 6: 387.458s
  Num Batches: 3
  Sum Loss: 29.647513389587402
  Average loss: 9.8825
  Average l_n: 4.9544
  Aver

Finished epoch 50: 3187.067s
  Num Batches: 3
  Sum Loss: 27.698665618896484
  Average loss: 9.2329
  Average l_n: 3.4647
  Average l_d: -5.8813
  Average l_nd: -2.4165

Finished epoch 51: 3262.776s
  Num Batches: 3
  Sum Loss: 25.58309555053711
  Average loss: 8.5277
  Average l_n: 2.6276
  Average l_d: -5.8420
  Average l_nd: -3.2144

Finished epoch 52: 3335.028s
  Num Batches: 3
  Sum Loss: 28.219429969787598
  Average loss: 9.4065
  Average l_n: 3.4280
  Average l_d: -5.2631
  Average l_nd: -1.8351

Finished epoch 53: 3408.116s
  Num Batches: 3
  Sum Loss: 25.762423515319824
  Average loss: 8.5875
  Average l_n: 2.5735
  Average l_d: -5.7942
  Average l_nd: -3.2207

Finished epoch 54: 3503.495s
  Num Batches: 3
  Sum Loss: 26.899226188659668
  Average loss: 8.9664
  Average l_n: 2.9579
  Average l_d: -5.8018
  Average l_nd: -2.8439

Finished epoch 55: 3625.308s
  Num Batches: 3
  Sum Loss: 26.261643409729004
  Average loss: 8.7539
  Average l_n: 2.7768
  Average l_d: -5.6415
  Aver

Finished epoch 99: 6742.265s
  Num Batches: 3
  Sum Loss: 26.655125617980957
  Average loss: 8.8850
  Average l_n: 2.9695
  Average l_d: -5.4856
  Average l_nd: -2.5161

Finished epoch 100: 6814.286s
  Num Batches: 3
  Sum Loss: 25.10195541381836
  Average loss: 8.3673
  Average l_n: 2.6601
  Average l_d: -5.8363
  Average l_nd: -3.1762



In [58]:
np.save('/content/drive/MyDrive/tile2vec/models/38802Img/avg_38802_img_losses', avg_38802_img_losses)
np.save('/content/drive/MyDrive/tile2vec/models/38802Img/avg_38802_img_l_ns', avg_38802_img_l_ns)
np.save('/content/drive/MyDrive/tile2vec/models/38802Img/avg_38802_img_l_ds', avg_38802_img_l_ds)
np.save('/content/drive/MyDrive/tile2vec/models/38802Img/avg_38802_img_nds', avg_38802_img_nds)