In [1]:
# try the imports
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import torch
import math
import numpy as np
import pandas as pd
import sys
import random
sys.path.append("../Configs")
import constants
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
import torch.nn as nn

import cv2

In [2]:
#Variables
#model_name = "FULL_TEST"
#model_name = 'Upsampled_Train_bipn'
model_name = 'Hybrid_DL_GEE'

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
class Combo_BI_Dataset(Dataset):
    """
    Dataset for country/ies DL Sentinel2 10-channel imagery.
    """
    def __init__(self, DS_Name, balance=False, limit=0, mode="full",transform=None):
        super(Combo_BI_Dataset, self).__init__()
        self.df=pd.DataFrame()
        self.upsampled=True
        self.transform=transform
        
        def add_dataset(dataset_name):
            df=pd.read_csv(constants.IMAGE_DATASETS_ROOT+dataset_name+'/metadata.csv')
            if dataset_name == 'bangladesh_dl_dataset' or dataset_name == 'india_dl_dataset':
                self.upsampled=False
                
            if balance:
                confirmed={'Image':{},'Label':{},'Geometry':{}}
                denied={'Image':{},'Label':{},'Geometry':{}}
                for idx in df['Label'].keys():
                    if df['Label'][idx]:
                        confirmed['Image'][idx]=df['Image'][idx]
                        confirmed['Label'][idx]=df['Label'][idx]
                        confirmed['Geometry'][idx]=df['Geometry'][idx]
                    else:
                        denied['Image'][idx]=df['Image'][idx]
                        denied['Label'][idx]=df['Label'][idx]
                        denied['Geometry'][idx]=df['Geometry'][idx]
                        
                if limit:
                    random.seed(constants.RANDOM_STATE)
                    sample_keys=random.sample(list(confirmed['Image'].keys()),min([len(confirmed['Image']),limit]))
                    print('Positive size:',len(sample_keys))
                    confirmed={'Image':{},'Label':{},'Geometry':{}}
                    for idx in sample_keys:
                        confirmed['Image'][idx]=df['Image'][idx]
                        confirmed['Label'][idx]=df['Label'][idx]
                        confirmed['Geometry'][idx]=df['Geometry'][idx]
                random.seed(constants.RANDOM_STATE)
                sample_keys=random.sample(list(denied['Image'].keys()),min([len(denied['Image']),math.ceil((len(confirmed['Image'])*3))]))
                print('Negative Size:',len(sample_keys))
                denied={'Image':{},'Label':{},'Geometry':{}}
                for idx in sample_keys:
                    denied['Image'][idx]=df['Image'][idx]
                    denied['Label'][idx]=df['Label'][idx]
                    denied['Geometry'][idx]=df['Geometry'][idx]
                confirmed['Image'].update(denied['Image'])
                confirmed['Label'].update(denied['Label'])
                confirmed['Geometry'].update(denied['Geometry'])
                df=pd.DataFrame().from_dict(confirmed)
            #"""
            df["Image"]=constants.IMAGE_DATASETS_ROOT+dataset_name+'/'+df["Image"]
            df=df[['Image','Label']]
            return df
            
    # ADD DATASETS HERE FOR TRAINING
        
        self.df = pd.concat([self.df,add_dataset(DS_Name)])
        
    # END EDITABLES
        
        self.df = self.df.sample(frac=1, random_state=constants.RANDOM_STATE)
        
        if mode == "tiny":
            self.df = self.df.sample(frac=.05, random_state=constants.RANDOM_STATE)
        
    
    def __getitem__(self, idx):
        file_name, label = self.df.iloc[idx]
        if not self.upsampled:
            img = np.load(file_name)[1:4]
            sr=cv2.dnn_superres.DnnSuperResImpl_create()
            path="../ESPCN_x4.pb"
            sr.readModel(path)
            sr.setModel('espcn',4)
            rgb=np.transpose(img)
            rgb=rgb-np.min(rgb)
            rgb=rgb/np.max(rgb)
            rgb = np.uint8(255 * rgb)
            result=sr.upsample(rgb)
            img = torch.Tensor(result/256).to(device)
        else:
            img = torch.Tensor(np.load(file_name)/256).to(device)
        
        if self.transform:
            img=self.transform(img)
            img=img*256
        else:
            img=img*256
        img.int()
            
        label = torch.Tensor([label]).to(device)
        
        return img, label
        
        
    def __len__(self):
        return len(self.df)


In [4]:
from torchvision.models import resnet18
import torch.nn as nn

class Resnet(torch.nn.Module):
    def __init__(self, num_channels, image_width=64, device=None, pretrained=False):
        super(Resnet, self).__init__()
        self.device = device        
        self.resnet = resnet18(pretrained=pretrained)
        self.resnet.conv1 = torch.nn.Conv2d(num_channels, image_width, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.resnet.fc = torch.nn.Linear(self.resnet.fc.in_features, 256).to(device)
        self.final_fc = torch.nn.Linear(256, 1).to(device)
        
        self.resnet = self.resnet.to(device)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        resnet_output = self.resnet(x)
        outputs = self.final_fc(resnet_output)
        outputs = self.sigmoid(outputs)
        return outputs
    

In [5]:
def load_checkpoint(model_checkpoint, model, device, optimizer=None):
    """
    Loads a pretrained checkpoint to continue training
    model_checkpoint: Path of the model_checkpoint that ends with .pth
    model: model to load to
    device: devide to load on (gpu/cpu)
    optimizer (optional): optimize to load state to
    """
    checkpoint = torch.load(model_checkpoint, map_location=device)
    print('Loaded best state dict from epoch: {}'.format(checkpoint["epoch"]))
    model.load_state_dict(checkpoint["model_state_dict"])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.to(device)
    return model

def evaluate_model(model, val_loader):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        all_labels = []
        all_preds = []
        all_outputs = []
        for batch_iter, (inputs, labels) in enumerate(val_loader):
            outputs = model(np.transpose(inputs.cpu(),(0,3,1,2)).to(device)).to(device)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            outputs = outputs.cpu().detach().numpy()
            labels = labels.cpu().detach().numpy()
            preds = (outputs > .5).astype('int')
            
            all_labels.append(labels)
            all_preds.append(preds)
            all_outputs.append(outputs)
        
        all_labels = np.array([label for vec in all_labels for label in vec])
        all_preds = np.stack([pred for vec in all_preds for pred in vec])
        all_outputs = np.stack([output for vec in all_outputs for output in vec])
        val_acc = np.mean((all_labels == all_preds).astype('int'))
            
    return val_acc, running_loss / batch_iter, all_labels, all_outputs

In [60]:
dataset=[]
val_dset=None
transformList=None
#"""
#Commend or uncomment for datasets you'd like to add.
#dataset.append(Combo_BI_Dataset('bangladesh_dl_dataset',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('Upsampled0_high_sampled_india_dl_dataset',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('Upsampled0_updatedModel_state_sampled_pakistan_dl_dataset',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('Upsampled0_updatedModel_state_sampled_nepal_dl_dataset',True,858,transform=transformList))
#"""
#"""
#Commend or uncomment for datasets you'd like to add.
dataset.append(Combo_BI_Dataset('FULL_TEST_calm_panda',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('FULL_TEST_calm_snake',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('FULL_TEST_fiery_pig',True,858,transform=transformList))
#dataset.append(Combo_BI_Dataset('FULL_TEST_gaudy_snake',True,858,transform=transformList))
#"""
"""
for dset in dataset:
    if val_dset is None:
        val_dset=dset
    else:
        val_dset=torch.utils.data.ConcatDataset([val_dset,dset])
print(f"{len(val_dset)} validation examples.")
val_loader = DataLoader(val_dset, batch_size=64,shuffle=True)
"""

Positive size: 500
Negative Size: 719


'\nfor dset in dataset:\n    if val_dset is None:\n        val_dset=dset\n    else:\n        val_dset=torch.utils.data.ConcatDataset([val_dset,dset])\nprint(f"{len(val_dset)} validation examples.")\nval_loader = DataLoader(val_dset, batch_size=64,shuffle=True)\n'

In [61]:
train_dset=[]
val_dset=[]
for dset in dataset:
    train, val = torch.utils.data.random_split(
        dset, 
        [len(dset)*8//10, len(dset)-len(dset)*8//10], # 80-20% split
        generator=torch.Generator().manual_seed(0)
    )
    buff=[val[x] for x in range(len(val)) if val[x][1][0]==1. ]
    print(len(buff))
    if train_dset is None and val_dset is None:
        train_dset=train
        val_dset=val
    else:
        train_dset=torch.utils.data.ConcatDataset([train_dset,train])
        val_dset=torch.utils.data.ConcatDataset([val_dset,val])
print(f"{len(train_dset)} training examples and {len(val_dset)} validation examples.")
train_loader = DataLoader(train_dset, batch_size=64,shuffle=True)
val_loader = DataLoader(val_dset, batch_size=64,shuffle=True)


104
975 training examples and 244 validation examples.


In [62]:
# let's give it a whirl

# some things to try for speedups:
# torch.cuda.empty_cache()
# change number of workers, use detach instead of just 
N_EPOCHS = 50
model = Resnet(3, device=device)
lr = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

best_val_loss = None

logdir = constants.MODEL_ROOT+model_name+"_50_training_steps/"


In [63]:
#let's test this bad boy
best_val_model = Resnet(3, device=device)
final_epoch_model = Resnet(3, device=device)

best_val_model = load_checkpoint(f"{logdir}/checkpoints/best_dl_best.pth", 
                                 best_val_model, 
                                 device)
#best_val_model = load_checkpoint('kiln_prod_weights.pth',best_val_model,device)
final_epoch_model = load_checkpoint(f"{logdir}/checkpoints/last_dl_last.pth", 
                                    final_epoch_model, 
                                    device)

#final_epoch_model = load_checkpoint(f"{logdir}/checkpoints/lr0.0001_epoch49.pth", 
#                                    final_epoch_model, 
#                                    device)


Loaded best state dict from epoch: 6
Loaded best state dict from epoch: 19


In [64]:
best_val_acc, best_val_loss, best_labels, best_outputs = evaluate_model(best_val_model, 
                                                                        val_loader)
final_val_acc, final_val_loss, final_labels, final_outputs = evaluate_model(final_epoch_model, 
                                                                            val_loader)


In [65]:
print('Best:',best_val_acc, best_val_loss)
print('Last:',final_val_acc, final_val_loss)

Best: 0.9877049180327869 0.075228667507569
Last: 0.9549180327868853 0.24655809998512268
