# Fine tuning training
First the PDD-Net is trained with PWC-Net as additional soft target, then is fine tuned on a different dataset

In [None]:
import numpy as np
import cv2
from PIL import Image
import os
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import pandas as pd
import datetime as datetime
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from math import ceil

from utils.preprocessing import preprocessing_flownet, preprocessing_pwc
from utils.load_models import load_flownet2, load_pwcnet, init_weights
from utils.plotting import flow2img, overlaySegment, showFlow
from utils.layers import warp, warp_Flow
from utils.encoding import labelMatrixOneHot, dice_coeff
import torch.utils.checkpoint
from models.pdd_net.pdd_student import OBELISK2d

# Select a GPU for the work
os.environ["CUDA_VISIBLE_DEVICES"] = '3'
available_gpus = [(torch.cuda.device(i),torch.cuda.get_device_name(i)) for i in range(torch.cuda.device_count())]
print(available_gpus)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

# Data

In [None]:
imgs = torch.load('/share/data_ultraschall/nicke_ma/data/train_frames_disp_6.pth')
segs = torch.load('/share/data_ultraschall/nicke_ma/data/train_segs_disp_6.pth')

test_imgs = torch.load('/share/data_ultraschall/nicke_ma/data/test_frames_disp_6.pth')
test_segs = torch.load('/share/data_ultraschall/nicke_ma/data/test_segs_disp_6.pth')

train_set = torch.arange(len(imgs))
test_set = torch.arange(len(test_imgs))

# Student

In [None]:
class OBELISK2d(nn.Module):
    def __init__(self, chan=16, size=(150,150)):
        super(OBELISK2d, self).__init__()
        channels = chan
        self.offsets = nn.Parameter(torch.randn(2, channels * 2, 2) * 0.05)
        self.layer0 = nn.Conv2d(1, 4, 5, stride=2, bias=False, padding=2)
        self.batch0 = nn.BatchNorm2d(4)

        self.layer1 = nn.Conv2d(channels * 8, channels * 4, 1, bias=False,
                                groups=1)
        self.batch1 = nn.BatchNorm2d(channels * 4)
        self.layer2 = nn.Conv2d(channels * 4, channels * 4, 3, bias=False,
                                padding=1)
        self.batch2 = nn.BatchNorm2d(channels * 4)
        self.layer3 = nn.Conv2d(channels * 4, channels * 1, 1)

        H = size[0]
        W = size[1]
        self.o_m = H // 4 +1
        self.o_n = W // 4 +1

        self.displace_range = 11
        self.disp_hw = 5
        self.ogrid_xy = F.affine_grid(torch.eye(2, 3).unsqueeze(0),
                                 (1, 1, self.o_m, self.o_n)).view(1, 1, -1, 2).cuda()
        self.disp_range = 0.25
        self.displacement_width = 11
        shift_xy = F.affine_grid(self.disp_range * torch.eye(2, 3).unsqueeze(0), (1, 1, self.displacement_width, self.displacement_width)).view(1, 1, -1, 2).cuda()
        grid_size = 32  # 25#30
        self.grid_xy = F.affine_grid(torch.eye(2, 3).unsqueeze(0),
                                (1, 1, grid_size, grid_size)).view(1, -1, 1,
                                                                   2).cuda()

    def forward(self, fixed_img, moving_img):
        img_in_f = F.avg_pool2d(fixed_img, 3, padding=1, stride=2)
        img_in_f = F.relu(self.batch0(self.layer0(img_in_f)))
        sampled_f = F.grid_sample(img_in_f,self.ogrid_xy + self.offsets[0, :, :].view(1, -1,1,2)).view(1, -1, self.o_m, self.o_n)
        sampled_f -= F.grid_sample(img_in_f,self.ogrid_xy + self.offsets[1, :, :].view(1, -1,1,2)).view(1, -1, self.o_m, self.o_n)

        x_1 = F.relu(self.batch1(self.layer1(sampled_f)))
        x_1 = F.relu(self.batch2(self.layer2(x_1)))
        features_fixed = self.layer3(x_1)
        
        img_in_m = F.avg_pool2d(moving_img, 3, padding=1, stride=2)
        img_in_m = F.relu(self.batch0(self.layer0(img_in_m)))
        sampled_m = F.grid_sample(img_in_m,self.ogrid_xy + self.offsets[0, :, :].view(1, -1,1,2)).view(1, -1, self.o_m, self.o_n)
        sampled_m -= F.grid_sample(img_in_m,self.ogrid_xy + self.offsets[1, :, :].view(1, -1,1,2)).view(1, -1, self.o_m, self.o_n)

        x_2 = F.relu(self.batch1(self.layer1(sampled_m)))
        x_2 = F.relu(self.batch2(self.layer2(x_2)))
        features_moving = self.layer3(x_2)

        ssd_distance = self.correlation_layer(features_moving, features_fixed)
        soft_cost,disp_xy = self.meanfield(ssd_distance, fixed_img, self.displace_range, self.o_m, self.o_n)
        
        return soft_cost, disp_xy


    def min_convolution(self, ssd_distance, displace_range, H, W):
        # Prepare operators for smooth dense displacement space
        pad1 = nn.ReplicationPad2d(5)
        avg1 = nn.AvgPool2d(5, stride=1)
        max1 = nn.MaxPool2d(3, stride=1)
        pad2 = nn.ReplicationPad2d(4)
        # approximate min convolution / displacement compatibility

        ssd_minconv = avg1(avg1(-max1(-pad1(
            ssd_distance.permute(0, 2, 3, 1).reshape(1, -1, self.displace_range,
                                                    self.displace_range)))))

        ssd_minconv = ssd_minconv.permute(0, 2, 3, 1).view(1, -1, H, W)
        min_conv_cost = avg1(avg1(pad2(ssd_minconv)))

        return min_conv_cost


    def meanfield(self, ssd_distance, img_fixed, displace_range, H, W):
        crnt_dev = ssd_distance.device

        cost = self.min_convolution(ssd_distance, displace_range, H, W)

        soft_cost = F.softmax(-10 * cost.view(displace_range ** 2, -1).t(), 1)

        disp_hw = (displace_range - 1) // 2
        disp_mesh_grid = disp_hw * F.affine_grid(torch.eye(2, 3).unsqueeze(0), (
        1, 1, displace_range, displace_range), align_corners=True)
        disp_mesh_grid /= torch.Tensor([(W - 1) * .5, (H - 1) * .5])

        disp_xy = torch.sum(
            soft_cost.view(1, H, W, -1, 1) * disp_mesh_grid.view(1, 1, 1, -1,
                                                                2).to(crnt_dev),
            3).permute(0, 3, 1, 2)

        return soft_cost, disp_xy


    def correlation_layer(self, feat_moving, feat_fixed):
        disp_hw = (self.displacement_width - 1) // 2
        feat_moving_unfold = F.unfold(feat_moving.transpose(1, 0),
                                    (self.displace_range, self.displace_range),
                                    padding=self.disp_hw)
        B, C, H, W = feat_fixed.size()

        ssd_distance = ((feat_moving_unfold - feat_fixed.view(C, 1, -1)) ** 2).sum(0).view(1, displace_range ** 2, H, W)

        return ssd_distance

# Teacher

In [None]:
flownet = load_flownet2().cuda()
flownet.eval()
pwc = load_pwcnet().cuda()
pwc.eval()

# Soft target training

In [None]:
disp_hw = 5
displace_range = 11
label_weights = torch.tensor([0.1,0.6, 0.3])# weights for background = 0.1, Vein = 0.6 and Artery = 0.3
epochs = 200
lr = 0.00025
grad_accum = 5

H=150;W=150

student = OBELISK2d(16)
student.apply(init_weights)
student.train().cuda()

optimizer = torch.optim.Adam(list(student.parameters()),lr=lr)
alpha=0.5

prev_eval = 0
best_epoch = 0

In [None]:
losses = []
accs = []
unwarped = []
scale=2
for epoch in trange(epochs, desc='epoch Loop', leave=False):
    student.train()
    # Cross Validation
    #train_set = torch.from_numpy(np.random.choice(np.arange(len(imgs)),size=int(len(imgs)*0.95), replace=False))

    #test_set = torch.arange(len(imgs))
    #for idx in train_set:
    #    test_set = test_set[test_set != idx]
    # Shuffle training examples
    #rnd_train_idx = torch.randperm(train_set.size(0))
    
    # shuffle data
    train_set_perm = torch.randperm(len(train_set))
    train_set = train_set[train_set_perm]
    # show all examples to model
    for i in trange(len(train_set), desc='Train Loop', leave=False):
        
        rnd_idx = train_set[i]
        tmp_loss = []
        p_fix = train_set[rnd_idx]

        # Get image and segmentation
        fixed = imgs[p_fix:p_fix+1,0,:].unsqueeze(0).float()
        moving = imgs[p_fix:p_fix+1,1,:].unsqueeze(0).float()

        fixed_seg = segs[p_fix:p_fix+1,0,:].contiguous() * 2
        moving_seg = segs[p_fix:p_fix+1,1,:].contiguous() * 2

        if len(torch.where(torch.histc(fixed_seg) != 0)[0]) == 3 and fixed_seg.max() <= 1:
            fixed_seg = fixed_seg*2
        if len(torch.where(torch.histc(moving_seg) != 0)[0]) == 3 and moving_seg.max() <= 1:
            moving_seg = moving_seg*2
        
        ########## PWC-Net
        # Here we rescale the images for the Teacher 
        # the flownet expects intputs that match Nx64. 
        teacher_fixed = F.interpolate(fixed, size=(scale*64,scale*64), mode='bicubic')
        teacher_moving = F.interpolate(moving, size=(scale*64,scale*64), mode='bicubic')

        # Generate the pwc flow estimation
        pwc_flow_in = preprocessing_pwc(teacher_fixed.detach().clone().reshape(scale*64,scale*64,1),teacher_moving.detach().clone().reshape(scale*64,scale*64,1)).cuda()
        pwc_flow = pwc(pwc_flow_in) 
        pwc_flow = F.interpolate(pwc_flow, size=(H,W),mode='bicubic')

        # warp the segmentations with pwc flow
        warped_pwc_seg = warp(moving_seg.float().unsqueeze(0).cuda(), pwc_flow.cuda()).cpu()
        
        ########## FlowNet2
        # Generate the flownet flow estimation
        #flow_in = preprocessing_flownet(teacher_fixed.detach().clone().reshape(scale*64,scale*64,1),teacher_moving.detach().clone().reshape(scale*64,scale*64,1)).cuda() * 255
        #flownet_flow = flownet(flow_in)
        #flownet_flow = F.interpolate(flownet_flow.cpu(), size=(H,W), mode='bicubic')

        # warp segmentation with flownet flow
        #warped_flownet_seg = warp(moving_seg.float().unsqueeze(0).cuda(), flownet_flow.cuda()).cpu()
        

        # Label preparation for PDD
        C1,Hf,Wf = moving_seg.size()
        label_moving_onehot = F.one_hot(moving_seg.long(),num_classes=3).permute(0,3,1,2).float()
        label_moving = F.interpolate(label_moving_onehot,size=(Hf//4 +1,Wf//4 +1),mode='bicubic')
        label_fixed = F.one_hot(fixed_seg.long(),num_classes=3).permute(0,3,1,2).float()
        label_fixed = F.interpolate(label_fixed,size=(Hf//4 +1,Wf//4 +1),mode='bicubic')
        # generate the "unfolded" version of the moving encoding that will result in the shifted versions per channel
        # according to the corresponding discrete displacement pair
        label_moving_unfold = F.unfold(label_moving,(displace_range,displace_range),padding=disp_hw).view(1,3,displace_range**2,-1)

        
        ########## PDD Forward pass
        soft_cost,disp_xy = student(fixed.cuda(), moving.cuda())

        # warp the label
        label_warped = torch.sum(soft_cost.cpu().t().unsqueeze(0)*label_moving_unfold.squeeze(0),1)
        
        
        #dense_flow_fit = F.interpolate(disp_xy,size=(H,W),mode='bicubic')
        #apply and evaluate transformation
        #identity = F.affine_grid(torch.eye(2,3).unsqueeze(0),(1,1,H,W),align_corners=False).cuda()
        #warped_student_seg = F.grid_sample(moving_seg.cuda().float().unsqueeze(1),identity+dense_flow_fit.permute(0,2,3,1),mode='nearest',align_corners=False).cpu() 
        
        pwc_onehot = labelMatrixOneHot(F.interpolate(warped_pwc_seg, size=(H//4 +1, W//4 +1), mode='bicubic').view(1,H//4 +1, W//4 +1), 3)
        #flownet_onehot = labelMatrixOneHot(F.interpolate(warped_flownet_seg, size=(H//4 +1, W//4 +1), mode='bicubic').view(1,H//4 +1, W//4 +1), 3)

        pwc_diff = torch.sum(torch.pow(label_warped.view(3,-1)-pwc_onehot.view(3,-1).detach(),2), 1) * label_weights
        #flownet_diff = torch.sum(torch.pow(label_warped.view(3,-1)-flownet_onehot.view(label_warped.shape),2), 1) * label_weights

        label_distance1 = torch.sum(torch.pow(label_fixed.reshape(3,-1)-label_warped.reshape(3,-1),2),1) * label_weights

        diffloss = 2*((disp_xy[0,:,1:,:]-disp_xy[0,:,:-1,:])**2).mean()+\
            2*((disp_xy[0,1:,:,:]-disp_xy[0,:-1,:,:])**2).mean()+\
            2*((disp_xy[0,:,:,1:]-disp_xy[0,:,:,:-1])**2).mean()
        
        
        #print(flownet_onehot.max())
        # Caclculate the label weighted teacher loss
        #teacher_loss = alpha * pwc_diff + (1-alpha)*flownet_diff
        
        # Combine the teacherloss with the label loss
        loss = 0.5 * pwc_diff.mean() + diffloss + label_distance1.mean() #+ 
        
        # propagate backwards
        loss.backward()
        tmp_loss.append([0.5 * pwc_diff.mean().item(),
                         label_distance1.mean().item(),
                         diffloss.item()])
        
        if i %grad_accum == 0:
            # every grad_accum iterations :Make an optimizer step
            optimizer.step()
            optimizer.zero_grad()
    

    losses.append(np.mean(tmp_loss, axis=0))
    
    student.eval()
    # Evaluate model:
    eval_dice = torch.zeros(len(test_set),2)
    for j in trange(len(test_set)-1, desc='Eval', leave=False):
        fixed = test_imgs[j:j+1,0,:].unsqueeze(0).float()
        moving = test_imgs[j:j+1,1,:].unsqueeze(0).float()

        fixed_seg = test_segs[j:j+1,0,:].contiguous() * 2
        moving_seg = test_segs[j:j+1,1,:].contiguous() * 2

        teacher_fixed = F.interpolate(fixed, size=(scale*64,scale*64), mode='bicubic')
        teacher_moving = F.interpolate(moving, size=(scale*64,scale*64), mode='bicubic')

        # Generate the pwc flow estimation
        pwc_flow_in = preprocessing_pwc(teacher_fixed.detach().clone().reshape(scale*64,scale*64,1),teacher_moving.detach().clone().reshape(scale*64,scale*64,1)).cuda()
        pwc_flow = pwc(pwc_flow_in) 
        pwc_flow = F.interpolate(pwc_flow, size=(H,W),mode='bicubic')

        # warp the segmentations with pwc flow
        warped_pwc_seg = warp(moving_seg.float().unsqueeze(0).cuda(), pwc_flow.cuda()).cpu()
        
        ########## FlowNet2
        # Generate the flownet flow estimation
        flow_in = preprocessing_flownet(teacher_fixed.detach().clone().reshape(scale*64,scale*64,1),teacher_moving.detach().clone().reshape(scale*64,scale*64,1)).cuda() * 255
        flownet_flow = flownet(flow_in)
        flownet_flow = F.interpolate(flownet_flow.cpu(), size=(H,W), mode='bicubic')

        # warp segmentation with flownet flow
        warped_flownet_seg = warp(moving_seg.float().unsqueeze(0).cuda(), flownet_flow.cuda()).cpu()
        
        oft_cost,disp_xy = student(fixed.cuda(), moving.cuda())
        pred_flow=F.interpolate(disp_xy,size=(150,150))
        
        identity = F.affine_grid(torch.eye(2,3).unsqueeze(0),(1,1,H,W),align_corners=False)
        warped_student_seg = F.grid_sample(moving_seg.unsqueeze(0).float(),identity+pred_flow.cpu().permute(0,2,3,1),mode='nearest',align_corners=False).cpu()
        
        d0 = dice_coeff(warped_student_seg.squeeze(), fixed_seg.squeeze() ,3)
        d1 = dice_coeff(warped_student_seg.squeeze(), warped_pwc_seg.squeeze() ,3)
        #dice = torch.sum(d0, d1, axis=0)
        dice = torch.stack([d0,d1])
        eval_dice[j] = dice.mean(axis=0)
    #print(eval_dice.mean(axis=0), eval_dice.mean(), ' Loss: ', np.mean(tmp_loss, axis=0))
    accs.append(eval_dice.mean())
    if eval_dice.mean() > prev_eval:
        prev_eval = eval_dice.mean()
        best_epoch = epoch
        torch.save(student.state_dict(), f'models/Experiment_2/fineTuneSoft/soft_trained_{best_epoch}.pth')
    if epoch > 10:
        if eval_dice.mean() == prev_eval:
            if eval_dice.mean() == accs[-2]:
                if eval_dice.mean() == accs[-3]:
                    print(f"Final Eval Score: {eval_dice.mean()}")
                    break
        

In [None]:
eval_dice.mean()

# Fine tune

In [None]:
imgs1 = torch.load('/share/data_ultraschall/nicke_ma/data/frames_oneFixed_multipleMoving_dist2.pth')
segs1 = torch.load('/share/data_ultraschall/nicke_ma/data/segs_oneFixed_multipleMoving_dist2.pth')

imgs2 = torch.load('/share/data_ultraschall/nicke_ma/data/frames_oneFixed_multipleMoving.pth')
segs2 = torch.load('/share/data_ultraschall/nicke_ma/data/segs_oneFixed_multipleMoving.pth')

In [None]:
tune_imgs = torch.cat((imgs1,imgs2))
tune_segs = torch.cat((segs1,segs2))

break_point = int(len(tune_imgs)*0.9)
tune_set = torch.arange(break_point)
test_tune_set = torch.arange(break_point, len(tune_imgs))
#define a training split 
torch.manual_seed(42)
# Now, we prepare our train & test dataset.
#tune_set = torch.from_numpy(np.random.choice(np.arange(len(tune_imgs)),size=int(len(tune_imgs)*0.9), replace=False))

#test_tune_set = torch.arange(len(tune_imgs))
#for idx in tune_set:
#    test_tune_set = test_tune_set[test_tune_set != idx]


print(f"{tune_set.shape[0]} train examples")
print(f"{test_tune_set.shape[0]} test examples")

In [None]:
obel = OBELISK2d(16)
path_to_state_dict = f'models/Experiment_2/fineTuneSoft/soft_trained_{best_epoch}.pth'
obel.load_state_dict(torch.load(path_to_state_dict))
obel.cuda()

In [None]:
disp_hw = 5
displace_range = 11
epochs = 200
lr = 0.00025
grad_accum = 5
optimizer = torch.optim.Adam(list(obel.parameters()),lr=lr, weight_decay=0.000005)

best_epoch = 0
prev_eval = 0

In [None]:
losses = []
accs = []
unwapred = []
for epoch in trange(epochs):

    # shuffle data
    tune_set_perm = torch.randperm(len(tune_set))
    tune_set = tune_set[tune_set_perm]
    # show all examples to model
    for i in trange(len(tune_set), desc='Train loop', leave=False):
        rnd_idx = tune_set[i]
        loss_tmp = []
        p_fix = tune_set[rnd_idx]

        # Get image and segmentation
        fixed = tune_imgs[p_fix:p_fix+1,0,:].unsqueeze(0).float()
        moving = tune_imgs[p_fix:p_fix+1,1,:].unsqueeze(0).float()

        fixed_seg = tune_segs[p_fix:p_fix+1,0,:].contiguous() * 2
        moving_seg = tune_segs[p_fix:p_fix+1,1,:].contiguous() * 2

        if len(torch.where(torch.histc(fixed_seg) != 0)[0]) == 3 and fixed_seg.max() <= 1:
            fixed_seg = fixed_seg*2
        if len(torch.where(torch.histc(moving_seg) != 0)[0]) == 3 and moving_seg.max() <= 1:
            moving_seg = moving_seg*2
            
        # Downsize the label
        C1,Hf,Wf = moving_seg.size()
        label_moving = F.one_hot(moving_seg.long(),num_classes=3).permute(0,3,1,2).float()
        label_moving = F.interpolate(label_moving,size=(Hf//4 +1,Wf//4 +1),mode='bicubic')
        
        label_fixed = F.one_hot(fixed_seg.long(),num_classes=3).permute(0,3,1,2).float()
        label_fixed = F.interpolate(label_fixed,size=(Hf//4 +1,Wf//4 +1),mode='bicubic')
        # generate the "unfolded" version of the moving encoding that will result in the shifted versions per channel
        # according to the corresponding discrete displacement pair
        label_moving_unfold = F.unfold(label_moving,(displace_range,displace_range),padding=disp_hw).view(1,3,displace_range**2,-1)

        #with torch.cuda.amp.autocast():
        # passed through obelisk layer
        soft_cost,disp_xy = obel(fixed.cuda(), moving.cuda())      # fixed
        #feat50 = obel(moving.cuda())     # moving

         # compute the cost tensor using the correlation layer
        #ssd_distance = correlation_layer(displace_range, feat50, feat00)

        # compute the MIN-convolution & probabilistic output with the given function
        #soft_cost,disp_xy = meanfield(ssd_distance, fixed, displace_range, H//4 +1, W//4 +1)

        label_warped = torch.sum(soft_cost.cpu().t().unsqueeze(0)*label_moving_unfold.squeeze(0),1)
        
        #print(((torch.pow(label_fixed.reshape(3,-1)-label_warped.reshape(3,-1),2)).T.mul(label_weights)).T.shape)
        label_distance1 = torch.sum((torch.pow(label_fixed.reshape(3,-1)-label_warped.reshape(3,-1),2)),1) * label_weights
        #label_distance1 = label_distance1 * label_weights 

        diffloss = 1.5*((disp_xy[0,:,1:,:]-disp_xy[0,:,:-1,:])**2).mean()+\
            1.5*((disp_xy[0,1:,:,:]-disp_xy[0,:-1,:,:])**2).mean()+\
            1.5*((disp_xy[0,:,:,1:]-disp_xy[0,:,:,:-1])**2).mean()


        loss = label_distance1.mean() + diffloss
        # perform the backpropagation and weight updates
        loss.backward()
        loss_tmp.append(loss.item())

        if (i+1)%grad_accum == 0:
            # every grad_accum iterations :Make an optimizer step
            optimizer.step()
            optimizer.zero_grad()

    
    losses.append(np.mean(loss_tmp))
    
    # Evaluate model:
    eval_dice = torch.zeros(len(test_tune_set),2)
    for j in trange(len(test_tune_set)-1, desc='Eval', leave=False):
        fixed = imgs[j:j+1,0,:].unsqueeze(0).float()
        moving = imgs[j:j+1,1,:].unsqueeze(0).float()

        fixed_seg = segs[j:j+1,0,:].contiguous() * 2
        moving_seg = segs[j:j+1,1,:].contiguous() * 2
        
        oft_cost,disp_xy = obel(fixed.cuda(), moving.cuda())
        pred_flow=F.interpolate(disp_xy,size=(150,150))
        
        identity = F.affine_grid(torch.eye(2,3).unsqueeze(0),(1,1,H,W),align_corners=False)
        warped_student_seg = F.grid_sample(moving_seg.unsqueeze(0).float(),identity+pred_flow.cpu().permute(0,2,3,1),mode='nearest',align_corners=False).cpu()
        
        d0 = dice_coeff(warped_student_seg.squeeze(), fixed_seg ,3)
        eval_dice[j] = d0
    print(eval_dice.mean(axis=0), eval_dice.mean(), " Loss: ", np.mean(loss_tmp))
    accs.append(eval_dice.mean())
    if eval_dice.mean() > prev_eval:
        prev_eval = eval_dice.mean()
        best_epoch = epoch
        torch.save(obel.state_dict(), f'models/Experiment_2/fineTuneSoft/tuned-17-02.pth')
    if epoch > 10:
        if eval_dice.mean() == prev_eval:
            if eval_dice.mean() == accs[-2]:
                if eval_dice.mean() == accs[-3]:
                    print(f"Final Eval Score: {eval_dice.mean()}")
                    break

In [None]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True)