## `deeplearner` inference notebook


In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
%%capture
!pip install rasterio
!pip install tensorboardX
!pip install boto3
!pip install S3Fs
!pip install urllib3==1.25.5

In [3]:
import os
import sys
import shutil
import importlib
from pathlib import Path
import pandas as pd

Update `deeplearner`

In [4]:
git_username = "agroimpacts"
repo = "deeplearner"
root = "/content/gdrive/MyDrive/"
token_path = f"{root}data/tokens/gh.txt"
git_token = open(token_path, "r+").read().splitlines()[0]
git_token
branch_name = "normalization_test"
clone_path = f"{root}sam/FieldBoundaryDataset/FB_working_folder"

if not os.path.exists(clone_path):
    os.makedirs(clone_path)

git_path = f'https://{git_token}@github.com/{git_username}/{repo}.git'
repo_clone_path = f"{clone_path}/{repo}"

os.chdir(clone_path)
if not os.path.isdir(repo_clone_path):
  !git clone -b "{branch_name}" "{git_path}"
else:
  if os.listdir(repo_clone_path):
    os.chdir(repo_clone_path)
    !git branch
    !git config core.fileMode false  # needed to ignore permissions changes
    !git pull

os.chdir("..")


* [32mnormalization_test[m
Already up to date.


In [5]:
sys.path.insert(0, os.path.join(repo_clone_path, 'deeplearner/'))
sys.path.insert(0, repo_clone_path)
import deeplearner
importlib.reload(deeplearner)
from deeplearner.models import *
from deeplearner.losses import *
from deeplearner.datatorch import *
from deeplearner.utils import *
from deeplearner.compiler import *

## U-Net dropout 15


### Parameters

In [6]:
config = {
    "source_dir" : f"{root}data/imagery/",
    "working_dir" : (
        f"{root}/data/imagery/sam/FieldBoundaryDataset/FB_working_folder"
    ),
    "pred_dir" : f"{root}data/predictions/cropland/attn_unet_dropout15",
    
    # train and validation dataset
    "img_path_cols" : ["dir_os"],
    "norm_stats_type" : "local_per_tile",

    # Model
    "input_channels" : 4,
    "n_classes" : 3,

    # Model compiler
    "gpuDevices" : [0],
    "params_init_path" : (
        f"{root}data/models/deeplearner/gh_cg_tz_ng/attn_unet_dropout15/"
        f"unet_att_d_params.pth"
    ),
    "freeze_layer_ls" : None,

    # Model fitting
    "dropout_rate" : 0.15,
    
    # prediction
    "pred_patch_size": 250,
    "pred_buffer": 179,
    "composite_buffer": 179,
    "pred_batch": 2,
    "shrink_pixels": 54,
    "average_neighbors": False

}

if not os.path.exists(config["pred_dir"]):
    os.makedirs(config["pred_dir"])

if not os.path.exists(config["working_dir"]):
    os.makedirs(config["working_dir"])

log_dir = Path(config["working_dir"]) / "logs"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

### Model

In [7]:
model = eval('unet_att_d'.lower())(
    n_classes=config["n_classes"], in_channels=config["input_channels"], 
    use_skipAtt=False, dropout_rate=config["dropout_rate"]
)
model = ModelCompiler(
    model, buffer = config["pred_buffer"], 
    gpuDevices = config["gpuDevices"], 
    params_init = config["params_init_path"],
    freeze_params = config["freeze_layer_ls"]
)

----------GPU available----------
total number of trainable parameters: 157.9M
---------- Pre-trained model compiled successfully ----------


### Prediction function

In [8]:
def load_pred_data(dir_data, log_dir, pred_patch_size, pred_buffer, 
                   pred_composite_buffer, pred_batch, catalog, catalog_row, 
                   img_path_cols, average_neighbors=False):
    def load_single_tile(catalog_ind = catalog_row):
        dataset = planetData(
            dir_data, log_dir, catalog=catalog, dataSize=pred_patch_size, 
            buffer=pred_buffer, bufferComp=pred_composite_buffer, 
            usage="predict", catalogIndex=catalog_ind, 
            imgPathCols=img_path_cols
        )
        data_loader = DataLoader(dataset, batch_size=pred_batch, shuffle=False)
        meta = dataset.meta
        tile = dataset.tile
        year = dataset.year
        return data_loader, meta, tile, year

    if average_neighbors == True:
        catalog["tile_col_row"] = catalog.apply(
            lambda x: "{}_{}".format(x['tile_col'], x['tile_row']), axis=1
        )
        tile_col = catalog.iloc[catalog_row].tile_col
        tile_row = catalog.iloc[catalog_row].tile_row
        row_dict = {
            "center": catalog_row,
            "top": catalog.query('tile_col=={} & tile_row=={}'.format(tile_col, tile_row - 1)).iloc[0].name \
                if "{}_{}".format(tile_col, tile_row - 1) in list(catalog.tile_col_row) else None,
            "left" : catalog.query('tile_col=={} & tile_row=={}'.format(tile_col - 1, tile_row)).iloc[0].name \
                if "{}_{}".format(tile_col - 1, tile_row) in list(catalog.tile_col_row) else None,
            "right" : catalog.query('tile_col=={} & tile_row=={}'.format(tile_col + 1, tile_row)).iloc[0].name \
                if "{}_{}".format(tile_col + 1, tile_row) in list(catalog.tile_col_row) else None,
            "bottom": catalog.query('tile_col=={} & tile_row=={}'.format(tile_col, tile_row + 1)).iloc[0].name \
                if "{}_{}".format(tile_col, tile_row + 1) in list(catalog.tile_col_row) else None,
        }
        dataset_dict = {k:load_single_tile(catalog_ind = row_dict[k]) if row_dict[k] is not None else None 
                        for k in row_dict.keys()}
        return dataset_dict
    # direct crop edge pixels
    else:
        return load_single_tile()

### Load catalog

In [9]:
catalog = (
    f"{root}data/imagery/planetscope/predict_catalog_ghana_2019-2021_CM.csv"
)
pred_catalog = pd.read_csv(catalog)

### Run predictions

#### For 2019

In [10]:
yr = "2019"
pred_cat = (
    pred_catalog.loc[pred_catalog.dir_os.str.contains(yr)].reset_index()
)
aois = pred_cat["aoi"].unique().tolist()

First, have to move tiles into new sub-folders because of i/o failures in Drive. 

Start by creating new sub-folders in tiles

In [12]:
tile_path = os.path.dirname(
    f"{config['source_dir']}{pred_cat.loc[0]['dir_os']}"
)
for y in ["2019", "2020", "2021"]:
    new_path = f"{tile_path}/{y}"
    if not os.path.exists(new_path):
        os.makedirs(new_path)

Then move files into 2019 folder

In [13]:
pred_cat2 = pred_cat.copy()
for i in range(0, len(pred_cat)):
    cat_row = pred_cat.loc[i]
    img_path = f"{config['source_dir']}{cat_row['dir_os']}"
    new_img_path = f"{tile_path}/{yr}/{os.path.basename(img_path)}"
    new_cat_path = (
        f"{os.path.dirname(cat_row['dir_os'])}/{yr}/"
        f"{os.path.basename(cat_row['dir_os'])}"
    )
    # if os.path.exists(img_path):
    #     shutil.move(img_path, new_img_path)
    
    pred_cat2.loc[i, "dir_os"] = new_cat_path

In [14]:
# pred_cat2
!ls /content/gdrive/MyDrive/data/imagery/planetscope/tiles/2019 | wc -l

8113


Write out new 2019 catalog, but first drop 3 missing tiles from it to prevent prediction time errors

In [15]:
pred_cat2

Unnamed: 0,index,aoi,tile,tile_col,tile_row,dir_os,type
0,0,1,486215,294,530,planetscope/tiles/2019/tile486215_2019-06_2019...,center
1,3,1,486216,295,530,planetscope/tiles/2019/tile486216_2019-06_2019...,center
2,6,1,486217,296,530,planetscope/tiles/2019/tile486217_2019-06_2019...,center
3,9,1,486218,297,530,planetscope/tiles/2019/tile486218_2019-06_2019...,center
4,12,1,486219,298,530,planetscope/tiles/2019/tile486219_2019-06_2019...,center
...,...,...,...,...,...,...,...
8111,24333,16,639908,307,655,planetscope/tiles/2019/tile639908_2019-06_2019...,center
8112,24336,16,639909,308,655,planetscope/tiles/2019/tile639909_2019-06_2019...,center
8113,24339,16,639910,309,655,planetscope/tiles/2019/tile639910_2019-06_2019...,center
8114,24342,16,639911,310,655,planetscope/tiles/2019/tile639911_2019-06_2019...,center


In [16]:
catalog_out = (
    f"{root}data/imagery/planetscope/predict_catalog_ghana_2019.csv"
)
tls = os.listdir(f"{tile_path}/{yr}")
pred_cat2['img'] = pred_cat2.apply(lambda x: os.path.basename(x["dir_os"]), axis=1)

pred_cat2 = pred_cat2[pred_cat2['img'].isin(tls)].reset_index()
pred_cat2.drop('img', axis=1, inplace=True)

if not os.path.exists(catalog_out):
    pred_cat2.to_csv(catalog_out)
# # [tl for tl in os.path.basename(pred_cat2["dir_os"]) if tl not in tls]
# pred_catalog = pd.read_csv(catalog)

In [None]:
# for aoi in aois[0:1]:
for aoi in aois[6:7]:
    print(f"AOI {aoi}")
    pcat = pred_cat2.loc[pred_cat2.aoi == aoi]
    inds = pcat.query("type == 'center'").index.values
    print(inds[-1])
    # for i in inds:
    # for i in range(3130, 3168):
    for i in range(3542, inds[-1]+1):
        print("Predicting on index %s" % (i))
        pred_dataloader = load_pred_data(
            dir_data=config["source_dir"], log_dir=log_dir, 
            pred_patch_size=config["pred_patch_size"], 
            pred_buffer=config["pred_buffer"], 
            pred_composite_buffer=config["composite_buffer"], 
            pred_batch=config["pred_batch"], catalog=pred_cat2, catalog_row=i, 
            img_path_cols=config["img_path_cols"], 
            average_neighbors = config["average_neighbors"]
        )
        p = model.predict(
            pred_dataloader, bucket=None, outPrefix=config["pred_dir"], 
            predBuffer=config["pred_buffer"], 
            averageNeighbors=config["average_neighbors"], 
            shrinkBuffer = config["shrink_pixels"]
        )

    print(f"Finished {aoi}")

Move the prediction images into sub-folders organized by year and score

In [33]:
for y in ["score1", "score2"]:
    new_path = f"{config['pred_dir']}/{yr}/{y}"
    if not os.path.exists(new_path):
        os.makedirs(new_path)

In [None]:
os.chdir(config['pred_dir'])
!mv Score_1* 2019/score1/
!mv Score_2* 2019/score2
!ls 

# New Section