In [105]:
%reset -f
import torch
from torchvision import transforms
from matplotlib import pyplot as plt
#from models import TCNmodel
import numpy as np
from random import randrange
from skimage.io import imread_collection
from IPython.core.display import HTML
from torch import nn
from sklearn.metrics.pairwise import cosine_similarity

train_sample,frame = torch.load('dataset.pt')
#train_sample,frame = torch.load('dataset_test.pt')

torch.cuda.empty_cache()

In [129]:
class Convblock(nn.Module):
    def __init__(self, in_channels,hidden_channels,out_channels):
        super(Convblock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, hidden_channels,
                               kernel_size=5)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(hidden_channels, out_channels,
                               kernel_size=5)
        self.relu2 = nn.ReLU()

    def forward(self, x):
        # reshape
        x = x.view(-1,288,42,77) # x.view(-1,288,35,35) for 299 x 299 else 360 x 640
        x = self.conv1(x)
        # Activation function
        x = self.relu1(x)
        # Second convolutional layer
        x = self.conv2(x)
        # Activation function
        x = self.relu2(x)
        return x

# spatial Softmax and last linear layer
class SpatialSoftmax(torch.nn.Module):
    def __init__(self, height, width, channel, temperature=None):
        super(SpatialSoftmax, self).__init__()
        self.height = height
        self.width = width
        self.channel = channel

        if temperature:  
              self.temperature = torch.ones(1)*temperature   
        else:   
              self.temperature = nn.Parameter(torch.ones(1))  

        pos_x, pos_y = np.meshgrid(
                np.linspace(-1., 1., self.height),
                np.linspace(-1., 1., self.width)
                )
        pos_x = torch.from_numpy(pos_x.reshape(self.height*self.width)).float()
        pos_y = torch.from_numpy(pos_y.reshape(self.height*self.width)).float()
        self.register_buffer('pos_x', pos_x)
        self.register_buffer('pos_y', pos_y)
        self.linear =nn.Linear(32, 32)

    def forward(self, feature):
        feature = feature.view(-1,self.height*self.width)

        softmax_attention = nn.functional.softmax(feature/self.temperature, dim=-1)
        expected_x = torch.sum(self.pos_x*softmax_attention, dim=1, keepdim=True)
        expected_y = torch.sum(self.pos_y*softmax_attention, dim=1, keepdim=True)
        expected_xy = torch.cat([expected_x, expected_y], 1)
        feature_keypoints = expected_xy.view(-1, self.channel*2)
        feature_keypoints = self.linear(feature_keypoints)

        return feature_keypoints

# TCNmodel
class TCNmodel(nn.Module):
  # input tensor: N x 3 x H(>=299) x W(>=299) 
  # output tensor: N x 32


    def __init__(self):
        super(TCNmodel, self).__init__()
        self.model = torch.hub.load('pytorch/vision:v0.8.0', 'inception_v3', pretrained=True)
        self.conv2 = Convblock(288,32,16)
        self.spmax = SpatialSoftmax(34,69,16,temperature=None) #SpatialSoftmax(27,27,16,temperature=None) for 299 x 299 else for 360 x 640

    def forward(self, x):
        x = self.model.Conv2d_1a_3x3(x)
        x = self.model.Conv2d_2a_3x3(x)
        x = self.model.Conv2d_2b_3x3(x)
        x = self.model.maxpool1(x)

        x = self.model.Conv2d_3b_1x1(x)
        x = self.model.Conv2d_4a_3x3(x)
        x = self.model.maxpool2(x)
        x = self.model.Mixed_5b(x)
        x = self.model.Mixed_5c(x)
        x = self.model.Mixed_5d(x)
        x = self.conv2(x)
        x = self.spmax(x)

        return x

def output_dataset(data,frame, net,device):
    net = net.to(device)
    data = data.reshape(train_sample.shape[1],3,360,640)
    i = 0 
    final_output = []
    F,C,W,H = data.shape
    with torch.no_grad():
        for j in range(frame.shape[0]):
            f = frame[j]+i
            inputs = data[i:f,:,:,:]
            i = f     
            inputs= inputs.to(device)
            outputs = net(inputs)
            final_output.append(outputs)
    torch.cuda.empty_cache()
    return final_output

#looks for nearest neighbour
def cosine(outputs):
    list_frames_outputs = []
    for i in range(len(outputs)):
        output = outputs[i].cpu().detach().numpy() 
        cosine = cosine_similarity(output)
        prev_frame = 0
        list = []
        list.append(prev_frame)
        for i in range(len(output-1)):
            cosine[i][i] = 0
            cosine[:,prev_frame] = 0
            test = np.nanargmax(cosine[i])
            prev_frame = test
            list.append(test)
        list_frames_outputs.append(list)
    return list_frames_outputs

# searches for frames between -1 and 1 frame away > so for frame 3 the next frame can be 4 or 2 
def test_accuracy(list,tolerance):
    accuracy = []
    for j in range(len(list)):
        acc = 0
        for i in range(len(list[j])-1):
            if list[j][i+1] in range(list[j][i]-tolerance-1, list[j][i]+tolerance+1):
                acc += 1 
        accuracy.append((acc / (len(list[j])-1))*100)
    return accuracy

def test(outputs):
    list_frames_outputs = cosine(outputs)
    test_accuracy_list = test_accuracy(list_frames_outputs, 1)
    av_accuracy = np.mean(test_accuracy_list)
    return list_frames_outputs , test_accuracy_list, av_accuracy

import cv2
import IPython.display
import numpy as np
import PIL.Image
from matplotlib import animation
from matplotlib import pyplot as plt


def create_animation(images, interval_ms=100, **fig_kwargs):

    # use larger plot by default
    if "figsize" not in fig_kwargs:
        fig_kwargs["figsize"] = (13, 9)

    fig, ax = plt.subplots(**fig_kwargs)
    fig.tight_layout()
    ax.axis("off")
    im = ax.imshow(images[0][:, :, ::-1])
    plt.close()  # this is required to not display the generated image

    def init():
        im.set_data(images[0][:, :, ::-1])  # ::-1: BGR --> RGB

    def animate(i):
        image = images[i]
        if image is not None:
            im.set_data(image[:, :, ::-1])
        return im

    anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(images), interval=interval_ms)

    # disable warning for video creation (anim.to_*())
    import logging

    logging.getLogger("matplotlib.animation").disabled = True

    # from IPython.core.display import HTML
    # use HTML(anim.to_html5_video()) to show within jupyter notebook as video
    # or HTML(anim.to_jshtml()) to show within jupyter notebook as interactive widget
    return anim


def draw_bbox_to_image(image, bbox, color=(0, 255, 0), thickness=5):
    corner_coords = bbox.get_bbox_corners_vis()
    image = cv2.rectangle(image, corner_coords[:2], corner_coords[-2:], color, thickness)
    return image


def showimage(a):
    """Show an image below the current jupyter notebook cell.
    Expects gray or bgr input (opencv2 default)"""
    # bgr -> rgb
    if len(a.shape) > 2 and a.shape[2] == 3:
        a = a[..., ::-1]  # bgr -> rgb
    image = PIL.Image.fromarray(a)
    IPython.display.display(image)  # display in cell output


# https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=12
colors_qualitative = np.array(
    [
        [166, 206, 227],
        [31, 120, 180],
        [178, 223, 138],
        [51, 160, 44],
        [251, 154, 153],
        [227, 26, 28],
        [253, 191, 111],
        [255, 127, 0],
        [202, 178, 214],
        [106, 61, 154],
        [255, 255, 153],
        [177, 89, 40],
    ]
)

# rgb representation for k3d
colors_qualitative_k3d = np.dot(colors_qualitative, np.asarray([2 ** 16, 2 ** 8, 2 ** 0])).tolist()


In [134]:
for i in range(5):
    PATH = 'C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_' +str(9*i) + '.pth'
    print(PATH)

C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_0.pth
C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_9.pth
C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_18.pth
C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_27.pth
C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_36.pth


In [130]:
%%time
for i in range(5):
    PATH = 'C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_' +29 + '.pth'
    print(PATH)
PATH = 'C:/Users/maxwa/Documents/TCN_self_supervised_learning/SaveModel_29.pth'
model = TCNmodel()
model.load_state_dict(torch.load(PATH))
model.eval()
device = 'cuda'
outputs = output_dataset(train_sample,frame,model,device)
list_frames_outputs, accuracy,av_accuracy = test(outputs)

Using cache found in C:\Users\Maxwa/.cache\torch\hub\pytorch_vision_v0.8.0


CPU times: total: 9.89 s
Wall time: 9.13 s


In [131]:
print(av_accuracy)
col_dir_test = 'frames/train/1/*.jpg'
#creating a collection with the available images
img_test = imread_collection(col_dir_test)
print(len(img_test))
print(len(list_frames_outputs[0]))
print(list_frames_outputs[0])
images = []
for i in range(len(list_frames_outputs[0])):
    index = list_frames_outputs[0][i]
    images.append(img_test[index])
    
anim = create_animation(images,interval_ms=100)
video = anim.to_html5_video()
HTML(video)

37.590653165182886
41
42
[0, 36, 33, 3, 2, 37, 9, 7, 6, 38, 5, 29, 12, 11, 27, 15, 14, 17, 16, 20, 22, 18, 23, 19, 21, 25, 32, 13, 26, 34, 28, 31, 30, 10, 1, 4, 24, 8, 39, 35, 40, 0]


In [None]:
col_dir_test = 'frames/test/*.jpg'
#creating a collection with the available images
img_test = imread_collection(col_dir_test)
img_test_1 = img_test[:46]
print(len(img_test_1))
print(img_test_1[0].shape)
test = np.zeros([46,1080,1920,3])
for i in range(46):
    test[i,:,:,:] = img_test_1[i]
    
print(test.shape)
test = np.reshape(test,(46,1080*1920*3))
print(test.shape)
#list_frames_outputs, accuracy,av_accuracy  = test(test)