In [None]:
import cv2
import numpy as np
import math
import torch
import torch.nn as nn
import torch.functional as F
print(torch.__version__)
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm, trange
%matplotlib notebook

In [None]:
image_bgr = cv2.imread("head.jpg")

image_rgb = cv2.cvtColor(image_bgr,cv2.COLOR_BGR2RGB)


h,w,c = image_rgb.shape
print(h,w)
input_dataset = torch.meshgrid(
    torch.linspace(0,h,h),
    torch.linspace(0,w,w)
)

input_dataset  = torch.stack(input_dataset,2).reshape(-1,2)
target_dataset = torch.tensor(image_rgb).reshape(-1,3).float() / 255.0

print("Input Shape" ,list(input_dataset.shape ))
print("Target Shape",list(target_dataset.shape))



sigmas = [
    400,#face center
    100,#nose tip
    350,
    300,
    200,# left cheek
    200,# right cheek
#     300,
#     300,
#     300,
#     300,
    200,
    300,
    200,
]

means = [
    [1015,952],#face center
    [1152,952],#nose tip
    [918,777],
    [923,1136],
    [1121,686], # left cheek
    [1121,1193],#right cheek
#     [1276,747],
#     [1286,1126],
#     [1501,854],
#     [1481,1054],
    [580,665],
    [670,942],
    [539,1213],
]

for mean,sigma in zip(means,sigmas):
    y,x = mean
    cv2.circle(image_rgb,(x,y),sigma,(255,0,0),5)

plt.imshow(image_rgb)

In [None]:
class PositionEncoder(nn.Module):
    
    def __init__(self,means,sigmas,dim):
        super().__init__()
        num_points = len(means)
        
        self.means  = nn.Parameter(torch.tensor(means ).unsqueeze(0).float(),requires_grad=False)
        self.sigmas = nn.Parameter(torch.tensor(sigmas).unsqueeze(0).float(),requires_grad=False)
        self.fc = nn.Linear(num_points,dim,bias=False)
        
    def forward(self,x):

        # add a dimension so that x broadcasts with the means
        x = x.unsqueeze(1) #[batch_size, 1 , input_dim]

        # Calculate the distance squared from each sample to each mean
        dist_squared = ((x - self.means)**2).sum(dim=2)  #[batch_size, embedding_dim]

        # 
        x = torch.exp(-0.5* dist_squared /(self.sigmas**2 + 10e-4) )
        
        x = self.fc(x)
        return x
    
    def export(self):
        
        means_list  = self.means.squeeze(0).tolist()
        sigmas_list = self.sigmas.squeeze(0).tolist()
        features_list = self.fc.weight.transpose(1,0).tolist()
        print(self.fc.weight.shape)
        
        return means_list, sigmas_list, features_list
    

In [None]:
class SoftmaxPositionEncoder(nn.Module):
    
    def __init__(self,means,sigmas,dim):
        super().__init__()
        num_anchors = len(means)
        
        means    = torch.tensor(means ).float() #[num_anchors,2]
        sigmas   = torch.tensor(sigmas).float() #[num_anchors]
        features = torch.randn(num_anchors,dim) #[num_anchors,dim]
        
        self.means    = nn.Parameter(means   , requires_grad=True)
        self.sigmas   = nn.Parameter(sigmas  , requires_grad=True)
        self.features = nn.Parameter(features, requires_grad=True)
        
    def forward(self,x):

        # add a dimension so that x broadcasts with the means
        x = x.unsqueeze(1) #[batch_size, 1 , input_dim]

        # Calculate the difference between the inputs and the means
        diff = x - self.means.unsqueeze(0) #[batch_size, num_anchors , 2]
        
        # Calculate the distance squared from each sample to each mean
        dist = (diff**2).sum(dim=2).sqrt()  #[batch_size, num_anchors]

        weighting = - (dist / self.sigmas.unsqueeze(0))**2  #[batch_size, num_anchors]
        
        weighting = torch.softmax(weighting,dim=1) #[batch_size, num_anchors]
        
        x = weighting @ self.features
        return x
    
    def export(self):
        
        means_list  = self.means.squeeze(0).tolist()
        sigmas_list = self.sigmas.squeeze(0).tolist()
        features_list = self.fc.weight.transpose(1,0).tolist()
        print(self.fc.weight.shape)
        
        return means_list, sigmas_list, features_list
    

In [None]:
class Model(nn.Module):
    def __init__(self,dim):
        super().__init__()
        f = 256
        self.fc = nn.Sequential(
            nn.Linear(dim,f),
            nn.Tanh(),
            nn.Linear(f,f//2),
            nn.Tanh(),
            nn.Linear(f//2,f//4),
            nn.Tanh(),
            nn.Linear(f//4,3),
        )
        
    def forward(self,x):
#         norm = x.norm(p=2, dim=1, keepdim=True)
#         x = x / norm
        x = self.fc(x)
        return x

In [None]:
def train(model,input_dataset,target_dataset):
    cv2.namedWindow("img",0)
    device = torch.device("cuda")
    model.to(device)
    model.train()
    epochs = 1000
    data_size = input_dataset.shape[0]
    batch_size = 2**15
    batch_count = data_size // batch_size

    criterion = nn.MSELoss()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=epochs)

    output_image = torch.zeros_like(target_dataset).to(device)
    
    for epoch_i in trange(epochs):
        indicies = torch.randperm(data_size)
        indicies = indicies[:(batch_count*batch_size)]
        indicies = indicies.reshape(-1,batch_size)

        for batch_indicies in indicies:
            input_tensor = input_dataset[batch_indicies].to(device)
            target_tensor = target_dataset[batch_indicies].to(device)
            
            output_tensor = model(input_tensor)
            
            output_image[batch_indicies] = output_tensor.detach()
            
            loss = criterion(output_tensor,target_tensor)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        lr_scheduler.step()
            
        img_rgb = (output_image.reshape(1892, 1824,3)*255).detach().cpu().numpy()
        img_rgb = np.clip(img_rgb,0,255).astype(np.uint8)
        img_bgr = cv2.cvtColor(img_rgb,cv2.COLOR_BGR2RGB)
        for mean,sigma in zip(encoder.means.data.cpu(),encoder.sigmas.data.cpu()):
            y,x = mean
            y = int(y)
            x = int(x)
            sigma=int(sigma)
            cv2.circle(img_bgr,(x,y),sigma,(255,0,0),5)
        cv2.imshow("img",img_bgr)
        cv2.waitKey(1)

encoder = SoftmaxPositionEncoder(means,sigmas,128)
model = Model(128)
seq = nn.Sequential(encoder,model)
train(seq,input_dataset,target_dataset) 

In [None]:
class Transform():
    def __init__(self,parent_transform = None,x=0.0,y=0.0,angle=0.0,scale=1.0):
        
        
        if parent_transform is None:
            self.mean_tensor_list    = []
            self.std_tensor_list     = []
            self.feature_tensor_list = []
            
            self.scale = 1.0
            self.translation     = torch.tensor([[0.0,0.0]])
            self.rotation_matrix = torch.tensor([[1.0,0.0],[0.0,1.0]])
        else:
            self.mean_tensor_list    = parent_transform.mean_tensor_list
            self.std_tensor_list     = parent_transform.std_tensor_list
            self.feature_tensor_list = parent_transform.feature_tensor_list
            
            translation = torch.tensor([[y,x]]).float()
            c = math.cos(math.radians(angle))
            s = math.sin(math.radians(angle)) 
            rotation_matrix = torch.tensor([[c,s],[-s,c]])
            
            self.translation = parent_transform.translation + translation @ parent_transform.rotation_matrix
            self.rotation_matrix = parent_transform.rotation_matrix @ rotation_matrix * scale
            self.scale = parent_transform.scale * scale
            
    def add_anchors(self,mean_tensor,std_tensor,feature_tensor):
        mean_tensor = mean_tensor @ self.rotation_matrix
        mean_tensor = mean_tensor + self.translation
        std_tensor  = std_tensor  * self.scale
        
        self.mean_tensor_list.append(mean_tensor)
        self.std_tensor_list.append(std_tensor)
        self.feature_tensor_list.append(feature_tensor)
        
    def get_encoder_tensors(self):
        mean_tensor    = torch.cat(self.mean_tensor_list   ,dim=0)
        std_tensor     = torch.cat(self.std_tensor_list    ,dim=0)
        feature_tensor = torch.cat(self.feature_tensor_list,dim=0)
        
        return mean_tensor,std_tensor,feature_tensor
        
        
        

In [None]:

    
class Animator():
    def __init__(self):
        self.img_w = 1920
        self.img_h = 1080
        
        self.device = torch.device("cuda")
        self.model = model
        self.encoder = SoftmaxPositionEncoder([[0,0]],[500],64)
        
        self.encoder.to(self.device)
        self.model.to(self.device)
        
        self.pixel_coordinates = self.build_pixel_coordinates(self.img_w,self.img_h)
        
        
    def render_animation(self,output_path,fps,length,draw_function):
        
        try:
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            video_writer = cv2.VideoWriter(output_path,fourcc, fps, (self.img_w, self.img_h))

            for frame_i in trange(length):
                transform = Transform()
                draw_function(transform,frame_i)

                mean_tensor,std_tensor,feature_tensor = transform.get_encoder_tensors()

                self.encoder.means.data = mean_tensor
                self.encoder.sigmas.data = std_tensor  
                self.encoder.features.data = feature_tensor

                self.encoder.to(self.device)

                img_rgb = self.render_frame()
                img_bgr = cv2.cvtColor(img_rgb,cv2.COLOR_BGR2RGB)
                video_writer.write(img_bgr)
                cv2.imshow("img",img_bgr)
                cv2.waitKey(1)
        finally:
            video_writer.release()
            
                
    def build_pixel_coordinates(self,img_w,img_h):
        
        mesh_list = torch.meshgrid(
            torch.arange(img_h),
            torch.arange(img_w),
        )
        pixel_coordinates = torch.stack(mesh_list,2).reshape(-1,2)
        
        pixel_coordinates = pixel_coordinates.float().to(self.device)
        
        return pixel_coordinates
    

    
    
    def render_frame(self):
        
        # Pick a suitable batch size
        batch_size  = 2**15
        
        # Get the number of pixel in the image
        num_pixels  = len(self.pixel_coordinates)
        
        # Compute the number of batches
        num_batches = int(np.ceil( num_pixels / batch_size ))
        
        # Create empty output tensor
        output_tensor = torch.zeros(num_pixels,3)
        
        with torch.no_grad():
            
            # foreach batch
            for i in range(num_batches):
                
                # Slice out a batch of coordinates
                i1=i*batch_size
                i2=min(i1+batch_size,num_pixels)
                input_tensor = self.pixel_coordinates[i1:i2]
                
                # Run the coordinates through the encoder and model
                output_tensor[i1:i2] = self.model(self.encoder(input_tensor))
                
        # Reshape output tensor back into image        
        img_rgb = (output_tensor.reshape(self.img_h, self.img_w,3)*255).detach().cpu().numpy()
        
        # Conver to numpy image for opencv
        img_rgb = np.clip(img_rgb,0,255).astype(np.uint8)

        return img_rgb
        
       
        



In [None]:
# Two heads rotating
fps = 30
run_time = 60
total_frames = fps*run_time 
def draw(transform, frame_i):
    
    angle = frame_i/total_frames*2*360
    t1 = Transform(transform,1920/2-500,1080/2,angle)
    draw_face(t1)
    t2 = Transform(transform,1920/2+500,1080/2,-angle)
    draw_face(t2)
    
def draw_face(transform):
    transform = Transform(transform,-962,-1024,0)
    means=encoder.means.data.cpu()
    sigmas=encoder.sigmas.data.cpu()
    features=encoder.features.data.cpu()
    transform.add_anchors(means,sigmas,features)
    
renderer = Animator()     
renderer.render_animation("test.mp4",fps,total_frames,draw)

In [None]:
# Two heads banging together
fps = 30
run_time = 60
total_frames = fps*run_time 
def draw(transform, frame_i):
    
    angle = frame_i/total_frames*10*360
    
    s = math.sin(math.radians(angle-90))*5
    s = 1 / (1+math.exp(-s))
    s *= 500
    
    t1 = Transform(transform,1920/2-s,1080/2,0)
    draw_face(t1)
    t2 = Transform(transform,1920/2+s,1080/2,0)
    draw_face(t2)
    
def draw_face(transform):
    transform = Transform(transform,-962,-1024,0)
    means=encoder.means.data.cpu()
    sigmas=encoder.sigmas.data.cpu()
    features=encoder.features.data.cpu()
    transform.add_anchors(means,sigmas,features)
    
renderer = Animator()     
renderer.render_animation("test.mp4",fps,total_frames,draw)

In [None]:
# Two heads zoom together
fps = 30
run_time = 60
total_frames = fps*run_time 
def draw(transform, frame_i):
    
    angle = frame_i/total_frames*20*360
    
    s = math.sin(math.radians(angle-90))*5
    s = 1 / (1+math.exp(-s))
    
    s *= 0.8
    s += 0.2
    
    t_center = Transform(transform,1920/2,1080/2,0)

    t1 = Transform(t_center,x=-1920/3/2,scale=s)
    draw_face(t1)
    t2 = Transform(t_center,x=+1920/3/2,scale=s)
    draw_face(t2)
    
def draw_face(transform):
    transform = Transform(transform,-962,-1024,0)
    means=encoder.means.data.cpu()
    sigmas=encoder.sigmas.data.cpu()
    features=encoder.features.data.cpu()
    transform.add_anchors(means,sigmas,features)
    
renderer = Animator()     
renderer.render_animation("test.mp4",fps,total_frames,draw)

In [None]:
# Face Spin
fps = 30
run_time = 60
total_frames = fps*run_time 
def draw(transform, frame_i):
    
    angle = frame_i/total_frames*2*360
    
    s = math.sin(math.radians(angle*5-90))*5
    s = 1 / (1+math.exp(-s))
    
    s *= 0.99
    s += 0.2
    
    t_center = Transform(transform,1920/2,1080/2,0)
    t_spin = Transform(t_center,angle = angle,scale=0.8)
    
    spokes = 8
    for i in range(spokes):
        t1 = Transform(t_spin,angle=i/spokes*360)
        t1 = Transform(t1,y=-1920/6*s,scale=.2,angle=60*s)
        draw_face(t1)
  
    
def draw_face(transform):
    transform = Transform(transform,-962,-1024,0)
    means=encoder.means.data.cpu()
    sigmas=encoder.sigmas.data.cpu()
    features=encoder.features.data.cpu()
    transform.add_anchors(means,sigmas,features)
    
renderer = Animator()     
renderer.render_animation("spin.mp4",fps,total_frames,draw)

In [None]:
# Face grid split
fps = 30
run_time = 60
total_frames = fps*run_time 

def map(start,end,ratio):
    return start + (end-start)*ratio

def draw(transform, frame_i):
    
    angle = frame_i/total_frames*360
    pulse = frame_i/total_frames*4
    
    r = math.sin(math.radians(pulse*360-90))*5
    r = 1 / (1+math.exp(-r))
    
    
    
    t_center = Transform(transform,1920/2,1080/2)   
    w = 1920/5
    h = 1080/3
    z = map(.7,0.12,r)
    for x in np.linspace(-2*w,2*w,5):
        for y in np.linspace(-h,h,3):
            x = map(0,x,r)
            y = map(0,y,r)
            t1 = Transform(t_center,x=x,y=y,scale=z,angle=angle)
            draw_face(t1)
  
    
def draw_face(transform):
    transform = Transform(transform,-962,-920,0)
    means=encoder.means.data.cpu()
    sigmas=encoder.sigmas.data.cpu()
    features=encoder.features.data.cpu()
    transform.add_anchors(means,sigmas,features)
    
renderer = Animator()     
renderer.render_animation("grid_pulse.mp4",fps,total_frames,draw)