# Version loading each image separately, suitable for new synced camera capture data and large data


In [1]:
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset as dset
from torchvision import transforms as T
from torch.utils.data import ConcatDataset,DataLoader
from torch.utils.data.dataset import random_split
from tensorboardX import SummaryWriter
from model import vgg,vgg_bn,vgg_DDFF,vgg_DDFF_bn
import matplotlib.pyplot as plt
from plot_utils import show2D

import time
import os
from PIL import Image
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']="0"
np.random.seed(100);
torch.manual_seed(100);
torch.backends.cudnn.deterministic = True



In [2]:
from torchvision.transforms.functional import adjust_gamma
class RandomGamma(object):
    """
    Crop the given PIL Image at a random location.
    """

    def __init__(self, gammaRange = [0.5,1],gainRange = [0.5,1.5]):
            self.gammaMin,self.gammaMax = gammaRange[0],gammaRange[1]
            self.gainMin,self.gainMax = gainRange[0],gainRange[1]
    @staticmethod
    def get_params(gammaMin,gammaMax,gainMin,gainMax):
        gamma = np.random.rand()*(gammaMax-gammaMin)+gammaMin
        gain =  np.random.rand()*(gainMax-gainMin)+gainMin
        return gamma,gain

    def __call__(self, PILimg):
        """
        Args:
            img (PIL Image): PIL RGB image in range [0,255]

        Returns:
            PIL Image: gamma augmentated PIL image in range [0,255*gain]
        """

        gamma,gain = self.get_params(self.gammaMin, self.gammaMax, self.gainMin, self.gainMax)
        print(gamma)
        print(gain)
        return adjust_gamma(PILimg, gamma, gain)


In [3]:
class FSdataset_PIL(dset):
    """
    Creating FS dataset (for one orientation) containing two matrix, FS and xyzthetaphi. FS has dimension N,nF,H(y),W(x), xyz has dimension N,5. Where 5 columns are coordinates of object x,y,z,theta,phi respectively. The N should increase along x first, then y, then z. 
    and support  a dictionary containing FS (N,nF,H,W) and xyzthetaphi (N,5), i.e.,(x1,y1,z1,theta,phi)   
    """
    def __init__(self,datafolder,F_list=[''], orientation='', thetaphi=np.zeros(1), nx = 4, ny = 4, nz = 9, constant_brightness = False, gamma_aug = False):
        #data folder: path to the folde rcontaining all subdirectories of all F and all orientation
        #F_list: list of strings speifying value of sensor position F, in the order of increasing distance (F) from the camera e.g F_list = ['F40','F41']
        #orientation: string specifying the orientation of the object, e.g. 'o1'
        # thetaphi: the encoding of the thetaphi for the orientation, should be size (nx*ny*nz,2)
        super(FSdataset_PIL,self).__init__()
        self.datafolder, self.F_list, self.orientation = datafolder, F_list, orientation
        self.N = nx*ny*nz
        self.XYZthetaphi = np.concatenate([self._generate_XYZ(nx,ny,nz),thetaphi], 1).astype('float32') #generate XYZ and concatenate with thetaphi
        self.transform = self._make_transform(constant_brightness = constant_brightness, gamma_aug = gamma_aug)
    def __len__(self):
        length_list = [len(os.listdir(os.path.join(self.datafolder, F +'_'+self.orientation))) for F in self.F_list]
        assert all(length == self.N for length in length_list), "Number of images for each F is not all the same!" #check if number of images for each F is the same
        return self.N # number of images of each F, i.e., number of object positions
    def __getitem__(self,index):
        #load images in PIL Image format into a list
        FS = [Image.open(os.path.join(self.datafolder, F +'_'+self.orientation,'%04d.bmp' %(index+1))) for F in self.F_list] #color image list, +1 since the proper labview saved image starts at index 1
        FS = [self.transform(img) for img in FS]
        #FS = [img.convert('L') for img in FS]
        #FS = [self.PIL2tensor(img) for img in FS]
        FS = torch.cat(FS, 0) # concatenate tensors in color dimension
        return {'FS':FS,'xyzthetaphi':self.XYZthetaphi[index]} #FS has dimension nF,H(1024),W(1280), xyz has shape (5,),i.e., (x,y,z,theta,phi)
    @classmethod
    def _make_transform(cls, constant_brightness = False, gamma_aug = False):
        #generate the transform needed when loading the image
        if constant_brightness == False and gamma_aug == False:
            T_list = [T.Lambda(cls.convert2gray), T.ToTensor()] #  oTensor convert H,W,C PIL images to torch float tensor C(1),H,W normalized to range [0,1]. Image.convert('L') convert to gray PIL images, assume color channel in RGB order
            
        elif constant_brightness == False and gamma_aug == True:
            T_list = [RandomGamma(), T.Lambda(cls.convert2gray), T.ToTensor()]
        elif constant_brightness == True and gamma_aug == False:
            T_list = [T.Lambda(cls.convert2gray), T.ToTensor(), T.Lambda(cls.norm_const_bright)]
        else: # Both true
            T_list = [RandomGamma(), T.Lambda(cls.convert2gray), T.ToTensor(), T.Lambda(cls.norm_const_bright)]
        return T.Compose(T_list)
    @staticmethod
    def convert2gray(img):
        #input: PIL image
        #ouput gray scale PIL image 
        return img.convert('L')
    @staticmethod
    def norm_const_bright(img):
        #input 2D img torch tensor;output image with mean intensity const_value
        const_value = 0.5
        return img/img.mean()*const_value
    @staticmethod
    def _generate_XYZ(nx,ny,nz):
        # auxilary function for generating XYZ, return nx*ny*nz,3
        x = np.linspace(0, 1, nx)
        y = np.linspace(0, 1, ny)
        z = np.linspace(0, 1, nz)
        X, Y, Z = np.meshgrid(x, y, z, indexing = 'ij')     
        return np.concatenate([X.flatten(order = 'F')[:,np.newaxis],Y.flatten(order = 'F')[:,np.newaxis],Z.flatten(order = 'F')[:,np.newaxis]], axis =1)


In [4]:
#Total # of data sample is 288
#valds_size=43
valds_size=2300
bs_train=20
bs_val=10
lr=1e-4
#writer = SummaryWriter('logs/3 20 data_norm_imag_c_0.5/D_4_DDFF_bn/lr1e-3_valds_20_o1_o2')
#writer = SummaryWriter('logs/3 20 data/D_4_DDFF_bn/lr1e-3_valds_20_o1_o2')
#log_path = 'logs/6 13 data/D_4_DDFF_bn_no_dropout/lr1e-4_bs10_valds_40_o1_o2_o3_100hiddenUnits_sensors_F45'
#log_path = 'logs/orientation_classification/5 20 data/D_4_bn_no_dropout/lr1e-4_bs20_valds_580_o1_o2_o3_100hiddenUnits'
#log_path = 'logs/07 08 data/D_4_DDFF_bn_no_dropout/lr1e-4_bs20_valds_2300_train_o1_to_o7_val_o8_100hiddenUnits'
log_path = 'logs/orientation_classification/7 08 data/D_4_DDFF_bn_no_dropout/lr1e-4_gamma_0.3_stepAt_3_5_10_20_bs20_valds_2300_o1_to_o8_100hiddenUnits_run3'
writer =  SummaryWriter(log_path)

In [5]:

nx,ny,nz = [11,11,16]
constant_brightness = False
gamma_aug = False
datafolder = '/home/zyhuang/EVO970Plus/2019 07 08/'
#datafolder = '/home/zyhuang/WD/XYZthetaphi/Raw Camera data/2019 06 13/'
F_list = ['F35.95','F37.35']
ds1=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o1', thetaphi = 0*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)
ds2=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o2', thetaphi = 1*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    
ds3=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o3', thetaphi = 2*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)        

ds4=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o4', thetaphi = 3*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    
ds5=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o5', thetaphi = 4*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    
ds6=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o6', thetaphi = 5*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    
ds7=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o7', thetaphi = 6*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    
ds8=FSdataset_PIL(datafolder,F_list=F_list, orientation = 'o8', thetaphi = 7*np.ones([nx*ny*nz,2]), nx = nx, ny = ny, nz = nz, constant_brightness = constant_brightness, gamma_aug = gamma_aug)    

ds_all=ConcatDataset([ds1,ds2,ds3,ds4,ds5,ds6,ds7,ds8])

ds_train,ds_val=random_split(ds_all, [len(ds_all)-valds_size,valds_size])

train_loader=DataLoader(ds_train, batch_size=bs_train,shuffle=True, num_workers = 4)
val_loader=DataLoader(ds_val, batch_size=bs_val,shuffle=False, num_workers = 4)

# Training code for predicting xyz

In [None]:
device = torch.device("cuda")
#net=vgg_bn(config_key='A_4')
#net=vgg(config_key='D_4')
net=vgg_DDFF_bn(nF=len(F_list),config_key='D_4_DDFF',dropout = False, num_hiddenunit=100)
net.to(device)
criterion = nn.MSELoss()
optimizer=optim.Adam(net.parameters(),lr=lr)

In [None]:

step=0
for epoch in range(60):
    print("Current epoch number%d" %epoch) 
    for idx,data in enumerate(train_loader,0):
        net.train()
        FS,xyz,theta,phi=data['FS'].to(device),data['xyzthetaphi'][:,0:3].to(device),data['xyzthetaphi'][:,3].to(device),data['xyzthetaphi'][:,4].to(device)
        score=net(FS)
        loss=criterion(score,xyz)
        loss_x,loss_y,loss_z=criterion(score[:,0],xyz[:,0]),criterion(score[:,1],xyz[:,1]),criterion(score[:,2],xyz[:,2])
        print('Loss is %3f' %(loss.item()))
        print('Loss x,y,z is %3f, %3f, %3f' %(loss_x.item(),loss_y.item(),loss_z.item()))
        optimizer.zero_grad()
        loss.backward()            
        optimizer.step() 
        
        if step % 10 == 0:
            writer.add_scalar('loss', loss.item(), step)
            writer.add_scalar('loss z', loss_z.item(), step)
        
        step = step + 1 
    
    #validate every epoch
    full_val_loss,full_val_loss_x,full_val_loss_y,full_val_loss_z = [0,0,0,0]
    net.eval()
    for idx,data in enumerate(val_loader,0):
        FS,xyz,theta,phi=data['FS'].to(device),data['xyzthetaphi'][:,0:3].to(device),data['xyzthetaphi'][:,3].to(device),data['xyzthetaphi'][:,4].to(device)
        with torch.no_grad():
            score=net(FS)
            loss=criterion(score,xyz)
            loss_x,loss_y,loss_z=criterion(score[:,0],xyz[:,0]),criterion(score[:,1],xyz[:,1]),criterion(score[:,2],xyz[:,2])
            full_val_loss += loss.item()/len(val_loader)
            full_val_loss_x += loss_x.item()/len(val_loader)
            full_val_loss_y += loss_y.item()/len(val_loader)
            full_val_loss_z += loss_z.item()/len(val_loader)

    print('Val Loss is %3f' %(full_val_loss))#for multiple test sample
    print('Val Loss x,y,z is %3f, %3f, %3f' %(full_val_loss_x,full_val_loss_y,full_val_loss_z))

    writer.add_scalar('Val loss', full_val_loss, epoch)
    writer.add_scalar('Val loss x', full_val_loss_x, epoch)
    writer.add_scalar('Val loss y', full_val_loss_y, epoch)
    writer.add_scalar('Val loss z', full_val_loss_z, epoch)
    show2D(xyz.to('cpu').detach(),score.to('cpu').detach(),howmany=10,save_name=None)
                  
    torch.save(net.state_dict(), os.path.join(log_path, 'model.pth'))


In [None]:
show2D(xyz.to('cpu').detach(),score.to('cpu').detach(),howmany=10,save_name=log_path+'/')

# Training code for only prediciting orientation 

In [6]:
num_class = 8 #number of orientation, as classification problem
device = torch.device("cuda")
net=vgg_bn(config_key='D_4',num_out = num_class,dropout = False,num_hiddenunit=100)
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=lr)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [3,5,10,20], gamma=0.3)

In [None]:

step=0
for epoch in range(60):
    print("Current epoch number%d" %epoch) 
    for idx,data in enumerate(train_loader,0):
        net.train()
        FS,xyz,theta,phi=data['FS'].to(device),data['xyzthetaphi'][:,0:3].to(device),data['xyzthetaphi'][:,3].to(device),data['xyzthetaphi'][:,4].to(device)
        score=net(FS)
        loss=criterion(score,theta.type(torch.int64))
        print('Loss is %3f' %(loss.item()))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % 10 == 0:
            writer.add_scalar('loss', loss.item(), step)
        
        step = step + 1 
        
    full_val_loss = 0
    full_val_acc = 0
    net.eval()
    for idx,data in enumerate(val_loader,0):
        with torch.no_grad():
            FS,xyz,theta,phi=data['FS'].to(device),data['xyzthetaphi'][:,0:3].to(device),data['xyzthetaphi'][:,3].to(device),data['xyzthetaphi'][:,4].to(device)
            score=net(FS)
            loss=criterion(score,theta.type(torch.int64))
            predicted_class = score.cpu().detach().numpy().argmax(1)
            val_acc = sum((predicted_class == theta.cpu().detach().numpy()))/len(theta)
            full_val_acc += val_acc/len(val_loader) #assume each batch has same number of samples
            full_val_loss += loss.item()/len(val_loader)

    print('Val Loss is %3f, classification accuracy is %3f' %(full_val_loss,full_val_acc))#for multiple test sample
    writer.add_scalar('Val loss', full_val_loss, epoch)
    writer.add_scalar('Val classification accuracy', full_val_acc, epoch)
     
    #save model at the end of every epoch
    torch.save(net.state_dict(), os.path.join(log_path, 'model.pth'))
    scheduler.step()
        
