In [1]:
import h5py
import re
# import hickle as hkl
import torch
import torch.nn as nn
#from torch.legacy.nn import Reshape
# import graphviz
import torch.nn.functional as F
from torch.autograd import Variable
#from visualize import make_dot
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
import torch.utils.data as utils
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imresize, imread, imshow
import cv2
import time
import logging
from math import log,sqrt
from PIL import Image

In [6]:
# import torch
# import torch.nn.functional as F
from torch import nn

from misc import initialize_weights1


class _EncoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=False):
        super(_EncoderBlock, self).__init__()
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        ]
        if dropout:
            layers.append(nn.Dropout())
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        self.encode = nn.Sequential(*layers)

    def forward(self, x):
        return self.encode(x)


class _DecoderBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels):
        super(_DecoderBlock, self).__init__()
        self.decode = nn.Sequential(
            nn.Conv2d(in_channels, middle_channels, kernel_size=3),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(middle_channels, middle_channels, kernel_size=3),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=2, stride=2),
        )

    def forward(self, x):
        return self.decode(x)


class UNet(nn.Module):
    def __init__(self, num_classes):
        super(UNet, self).__init__()
        self.enc1 = _EncoderBlock(3, 64)
        self.enc2 = _EncoderBlock(64, 128)
        self.enc3 = _EncoderBlock(128, 256)
        self.enc4 = _EncoderBlock(256, 512, dropout=True)
        self.center = _DecoderBlock(512, 1024, 512)
        self.dec4 = _DecoderBlock(1024, 512, 256)
        self.dec3 = _DecoderBlock(512, 256, 128)
        self.dec2 = _DecoderBlock(256, 128, 64)
        self.dec1 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )
        self.final = nn.Conv2d(64, num_classes, kernel_size=1)
        initialize_weights1(self)

    def forward(self, x):
        enc1 = self.enc1(x)
        enc2 = self.enc2(enc1)
        enc3 = self.enc3(enc2)
        enc4 = self.enc4(enc3)
        center = self.center(enc4)
        dec4 = self.dec4(torch.cat([center, F.upsample(enc4, center.size()[2:], mode='bilinear')], 1))
        dec3 = self.dec3(torch.cat([dec4, F.upsample(enc3, dec4.size()[2:], mode='bilinear')], 1))
        dec2 = self.dec2(torch.cat([dec3, F.upsample(enc2, dec3.size()[2:], mode='bilinear')], 1))
        dec1 = self.dec1(torch.cat([dec2, F.upsample(enc1, dec2.size()[2:], mode='bilinear')], 1))
        final = self.final(dec1)
        x = F.upsample(final, x.size()[2:], mode='bilinear')
        x = F.log_softmax(x)
#         m = torch.nn.Softmax()
#         x = m(x)
        #x = x.view(-1,360,480)
        
        return x

In [7]:
def get_predictions(output_batch):
    bs,c,h,w = output_batch.size()
    tensor = output_batch.data
    values, indices = tensor.cpu().max(1)
    indices = indices.view(bs,h,w)
    return indices

In [5]:
def batch_rgb_to_bgr(batch):
    #print(batch.size())
    (r, g, b) = torch.chunk(batch, 3, 1)
    #print(r.size())
    batch1 = torch.cat((b, g, r),1)
    #print(batch1.size())
    return batch1

In [6]:
class Normalize(object):
    """
    Normalize an tensor image with mean and standard deviation.
    Given mean: (R, G, B) and std: (R, G, B),
    will normalize each channel of the torch.*Tensor, i.e.
    channel = (channel - mean) / std
    Args:
        mean (sequence): Sequence of means for R, G, B channels respecitvely.
        std (sequence): Sequence of standard deviations for R, G, B channels
            respecitvely.
    """

    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        # TODO: make efficient
        for t, m, s in zip(tensor, self.mean, self.std):
            t.sub_(m).div_(s)
        return tensor

In [8]:
import cv2 
import numpy as np
import argparse
import sys
import scipy.misc

######################Weight for Camvid ####################
net = UNet(12)
net = net.cuda()
net.load_state_dict(torch.load("UNET_seg_Camvid_epochs_135.pth"))
######################################################################
cap = cv2.VideoCapture('0005VD.MXF') 
count = 0
ret=1
fourcc1 = cv2.VideoWriter_fourcc(*'XVID')
fourcc2 = cv2.VideoWriter_fourcc(*'XVID')

out1 = cv2.VideoWriter('video.avi',fourcc1, 20.0, (480,360))
out2 = cv2.VideoWriter('segment.avi',fourcc2, 20.0, (480,360))


# out1 = cv2.VideoWriter('output1.avi',fourcc1, 20.0, (480,270))

while ret:
    
    cap.set(cv2.CAP_PROP_POS_MSEC,count*70)      # Go to the 1 sec. position
    ret,frame = cap.read()
 
    if ret:
        frame1 = cv2.resize(frame, (480, 360))
        R_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        frame = cv2.cvtColor(frame1,cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        
        frame = np.array(frame)/255
        test = torch.from_numpy(frame).transpose(1,2).transpose(0,1)
        
        test = np.reshape(test,(1,3,360,480))
        test=test.type(torch.FloatTensor)

        test_pred = net.forward(Variable(test, volatile=True).cuda())
        test_pred=get_predictions(test_pred)
        pred = test_pred.cpu()
        pred = pred.detach().numpy()
        pred = np.reshape(pred,(1,360,480))
        pred = pred.transpose(1,2,0)
        pred = cv2.resize(pred,(480,360))

#         print(pred.shape[0])
        for i in range(pred.shape[0]):
            for j in range(pred.shape[1]):
                R_gray[i][j]=pred[i][j]
        col_pred = cv2.applyColorMap(R_gray*70, cv2.COLORMAP_JET)

        cv2.imshow('Pred',col_pred)
        cv2.imshow('Video',frame1)
        out1.write(frame1)
        out2.write(col_pred)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    count += 1
cap.release()
out1.release()
out2.release()
cv2.destroyAllWindows()

  nn.init.kaiming_normal(module.weight)
  "See the documentation of nn.Upsample for details.".format(mode))


In [7]:
import cv2 
import numpy as np
import argparse
import sys
import scipy.misc

######################Weight for CSS####################
net = UNet(3)
net = net.cuda()
net.load_state_dict(torch.load("./CSS_weight/UNET_weight_epochs_100.pth"))
###############################################################################

cap = cv2.VideoCapture('2.MP4') 
count = 0
ret=1
fourcc1 = cv2.VideoWriter_fourcc(*'XVID')
fourcc2 = cv2.VideoWriter_fourcc(*'XVID')

out1 = cv2.VideoWriter('video_CSS.avi',fourcc1, 20.0, (480,270))
out2 = cv2.VideoWriter('segment_CSS.avi',fourcc2, 20.0, (480,270))


# out1 = cv2.VideoWriter('output1.avi',fourcc1, 20.0, (480,270))

while ret:
    
    cap.set(cv2.CAP_PROP_POS_MSEC,count*100)      # Go to the 1 sec. position
    ret,frame = cap.read()
 
    if ret:
        frame1 = cv2.resize(frame, (480, 270))
        R_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        frame = cv2.cvtColor(frame1,cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)
        
        frame = np.array(frame)/255
        test = torch.from_numpy(frame).transpose(1,2).transpose(0,1)
        
        test = np.reshape(test,(1,3,270,480))
        test=test.type(torch.FloatTensor)

        test_pred = net.forward(Variable(test, volatile=True).cuda())
        test_pred=get_predictions(test_pred)
        pred = test_pred.cpu()
        pred = pred.detach().numpy()
        pred = np.reshape(pred,(1,270,480))
        pred = pred.transpose(1,2,0)
        pred = cv2.resize(pred,(480,270))

#         print(pred.shape[0])
        for i in range(pred.shape[0]):
            for j in range(pred.shape[1]):
                R_gray[i][j]=pred[i][j]
        col_pred = cv2.applyColorMap(R_gray*100, cv2.COLORMAP_JET)

        cv2.imshow('Pred',col_pred)
        cv2.imshow('Video',frame1)
        out1.write(frame1)
        out2.write(col_pred)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    count += 1
cap.release()
out1.release()
out2.release()
cv2.destroyAllWindows()

  nn.init.kaiming_normal(module.weight)
  "See the documentation of nn.Upsample for details.".format(mode))


KeyboardInterrupt: 

In [None]:
# import cv2, pafy
# url = 'https://www.youtube.com/watch?v=b7BEAsyPgHM'
# # vPafy = pafy.new(url)
# # play = vPafy.getbest(preftype="webm")
# # url = "https://www.youtube.com/watch?v=aKX8uaoy9c8"
# videoPafy = pafy.new(url)
# play = videoPafy.getbest(preftype="webm")

#     #start the video
# cap = cv2.VideoCapture(play.url)
# while (True):
#     ret,frame = cap.read()
#     print(frame.shape)
#     cv2.imshow('frame',frame)
#     if cv2.waitKey(20) & 0xFF == ord('q'):
#         break    

# cap.release()
# cv2.destroyAllWindows()

In [None]:
# import cv2
# import pafy

# url = "https://www.youtube.com/watch?v=b7BEAsyPgHM"
# video = pafy.new(url)
# best = video.getbest(preftype="mp4")

# capture = cv2.VideoCapture()
# capture.open(best.url)

# success,image = capture.read()
# print(image.shape)

# while success:
#     cv2.imshow('frame', image)
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

#     success,image = capture.read()

# cv2.destroyAllWindows()
# capture.release()