In [1]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms
from torchvision.datasets import FashionMNIST,StanfordCars
from matplotlib import pyplot as plt
import numpy as np
import torch.nn.functional as F
import math
import pandas as pd
from PIL import Image

In [2]:
import wandb

In [3]:
?wandb.init

In [4]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [5]:
# Defining the device

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [6]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return 
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = torch.tanh(out)

        return out

In [7]:
class Encoder(nn.Module):
    def __init__(self,block,layers,size):
        self.inplanes=64
        super(Encoder,self).__init__()
        self.conv1=nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1=nn.BatchNorm2d(64)
        self.relu=nn.ReLU(inplace=True)
        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
        self.layer1=self._make_layer(block,16,layers[0])
        self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
#         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(39168, size) ## CHANGE FROM HERE 
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    def forward(self,x):
        x=self.conv1(x)
        x=self.bn1(x)
        x=self.relu(x)
        
        x=self.maxpool(x)
        x=self.layer1(x)
        x=self.layer2(x)
        x=self.layer3(x)
#         x=self.layer4(x)
        
        x=self.avgpool(x)
        x=x.view(x.size(0),-1)
        x=self.fc(x)
        
        return x

In [8]:
class Decoder(nn.Module):
    def __init__(self,size):
        super(Decoder,self).__init__()
        self.dfc3=nn.Linear(size,256)
        self.bn3=nn.BatchNorm1d(256)
        self.dfc2=nn.Linear(256,256)
        self.bn2=nn.BatchNorm1d(256)
        self.dfc1=nn.Linear(256,128*6*6)
        self.bn1=nn.BatchNorm1d(4608)
        self.upsample1=nn.Upsample(scale_factor=(2.073,2.4))
#         self.unflatten1 = nn.Unflatten(1, ())
        self.dconv5 = nn.ConvTranspose2d(128, 64, 3, padding = 0)
        self.dconv4 = nn.ConvTranspose2d(64, 32, 3, padding = 1)
        self.dconv3 = nn.ConvTranspose2d(32, 64, 3, padding = 1)
        self.dconv2 = nn.ConvTranspose2d(64, 32, 5, padding = 2)
        self.dconv1 = nn.ConvTranspose2d(32, 4, 24, stride = 4, padding = (10,12))
    def forward(self,x):
        x = self.dfc3(x)
        x = F.relu(self.bn3(x))
        
        x = self.dfc2(x)
        x = F.relu(self.bn2(x))
        x = self.dfc1(x)
        x = F.relu(self.bn1(x))
        x = x.view(-1,128,6,6)
        x=self.upsample1(x)
        x = self.dconv5(x)
        x = F.relu(x)
        x = F.relu(self.dconv4(x))
        x = F.relu(self.dconv3(x))
        x=self.upsample1(x)
        x = self.dconv2(x)
        x = F.relu(x)
        x=self.upsample1(x)

        x = self.dconv1(x)

        x = torch.sigmoid(x)
        return x

In [9]:
# tester=torch.ones((4,dims))
# decoder=Decoder(dims)
# (decoder(tester)).shape

In [10]:
class VAE(nn.Module):
    def __init__(self,dims):
        super(VAE,self).__init__()
        self.encoder = Encoder(Bottleneck, [3, 4, 6, 2],dims*2)
        self.decoder = Decoder(dims)
        self.dims=dims
    def reparameterise(self, mu, logvar):
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = std.data.new(std.size()).normal_()
            return eps.mul(std).add_(mu)
        else:
            return mu
    def forward(self,y):
        mu_logvar = self.encoder(y)
        mu_logvar=mu_logvar.view(-1,2,self.dims)
        mu = mu_logvar[:, 0, :]
        logvar = mu_logvar[:, 1, :]
        z = self.reparameterise(mu, logvar).view(-1,self.dims)
        z=self.decoder(z)
        return z,mu,logvar
        

In [11]:
dims=1024
model = VAE(dims).to(device)

In [12]:

# Setting the optimiser

learning_rate = 1e-3

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=learning_rate,
)

In [13]:
# Reconstruction + KL divergence losses summed over all elements and batch

def loss_function(ỹ, y, mu1, logvar1):
    BCE = nn.functional.binary_cross_entropy(
        ỹ, y, reduction='sum'
    )

    KLD = (-0.5 * torch.mean(-logvar1.exp() + logvar1 + 1.0 - mu1.pow(2)))
    return BCE+KLD,BCE,KLD

In [14]:
class MyDataset(Dataset):
    def __init__(self, train_path,transform_x=None,transform_y=None):
        self.df = pd.read_csv(train_path, sep=',', usecols=['input', 'output'])
        self.transform_x=transform_x
        self.transform_y=transform_y
    def __getitem__(self, index):
        x = np.array(Image.open(self.df.iloc[index, 1]))
        y = np.array(Image.open(self.df.iloc[index, 0]))
        if self.transform_x is not None:
            x=self.transform_x(x)
            y=self.transform_y(y)
        else:
            x, y = torch.from_numpy(x), torch.from_numpy(y)
        return x, y
    def __len__(self):
#         return len(self.df)
        return 1000

In [15]:
epochs = 1000
batch_size = 4

In [16]:
wandb.config = {
  "learning_rate": learning_rate,
  "epochs": epochs,
  "batch_size": batch_size,
  "dims":dims
}

In [17]:
wandb.init(project="AerialPoseEstimator")

[34m[1mwandb[0m: Currently logged in as: [33mpthpth[0m. Use [1m`wandb login --relogin`[0m to force relogin
wandb: ERROR Failed to sample metric: Not Supported


In [18]:
train_loader=MyDataset("./dataset_train.csv")
test_loader=MyDataset("./dataset_test.csv")
train_loader=DataLoader(train_loader, batch_size=batch_size,shuffle=True)
test_loader=DataLoader(test_loader, batch_size=batch_size,shuffle=True)

In [19]:
wandb.watch(model)

[]

In [20]:
def batch_mean_x(loader):
    cnt=0
    fst_moment=torch.empty(3)
    snd_moment=torch.empty(3)
    for images,_ in loader:
        # c h w b
#         print(images.shape)
        images=images/255
        b,h,w,c = images.shape
        nb_pixels=b * h * w
        sum_ =  torch.sum(images,dim=[0,1,2])
        sum_of_square = torch.sum(images**2,dim=[0,1,2])
        
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / ( cnt + nb_pixels)
        
        cnt+=nb_pixels
    mean,std=fst_moment,torch.sqrt(snd_moment - fst_moment ** 2)
    return mean,std

In [21]:
def batch_mean_y(loader):
    cnt=0
    fst_moment=torch.empty(4)
    snd_moment=torch.empty(4)
    for _,images in loader:
        # c h w b
#         print(images.shape)
        images=images/255
        b,h,w,c = images.shape
        nb_pixels=b * h * w
        sum_ =  torch.sum(images,dim=[0,1,2])
        sum_of_square = torch.sum(images**2,dim=[0,1,2])
        
        fst_moment = (cnt * fst_moment + sum_) / (cnt + nb_pixels)
        snd_moment = (cnt * snd_moment + sum_of_square) / ( cnt + nb_pixels)
        
        cnt+=nb_pixels
    mean,std=fst_moment,torch.sqrt(snd_moment - fst_moment ** 2)
    return mean,std

In [22]:
mean_x,std_x=batch_mean_x(train_loader)
mean_y,std_y=batch_mean_y(train_loader)

transform_img_normal_x = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = mean_x,std= std_x)
])
transform_img_normal_y = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = mean_y,std = std_y)
])

In [23]:
train_loader=MyDataset("./dataset_train.csv",
                       transform_x=transform_img_normal_x,
                       transform_y=transform_img_normal_y)
test_loader=MyDataset("./dataset_test.csv",
                      transform_x=transform_img_normal_x,
                      transform_y=transform_img_normal_y)
train_loader=DataLoader(train_loader, batch_size=batch_size,shuffle=True)
test_loader=DataLoader(test_loader, batch_size=batch_size,shuffle=True)

In [24]:
# Training and testing the VAE

codes = dict(μ=list(), logσ2=list(), x=list())
for epoch in range(0, epochs + 1):
    # Training
    if epoch > 0:  # test untrained net first
        model.train()
        train_loss = 0
        bcs=0
        kls=0
        for x,y in train_loader:
            x = x.to(device)
            y = y.to(device)
            x=x.view(-1,3,480,720)
            y=y.view(-1,4,480,720)
            x=torch.div(x,255)
            y=torch.div(y,255)
            xs=[]
            ys=[]
            for i in x.split(360,-1):
                for j in i.split(240,-2):
                    xs.append(j)
            for i in y.split(360,-1):
                for j in i.split(240,-2):
                    ys.append(j)
            x=torch.cat(xs)
            y=torch.cat(ys)
            # ===================forward=====================
            y_bar, mu1, logvar1 = model(x)
            loss,bc,kl = loss_function(y_bar, y, mu1, logvar1)
            train_loss += loss
            bcs+=bc.item()
            kls+=kl
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
#             print(loss.item())
            optimizer.step()
        # ===================log========================

    # Testing
        wandb.log({"train_loss":train_loss /len(train_loader.dataset), 
                       "train BCE Loss": bcs / len(train_loader.dataset),
                       "train KLD Loss": kls / len(train_loader.dataset)})
        means, logvars, labels = list(), list(), list()
        if epoch%10==0:
            torch.save({'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': train_loss,}, 
                       "./Weights/resnet.pt")
    torch.cuda.empty_cache()
    with torch.no_grad():
        model.eval()
        test_loss = 0
        bcs=0
        kls=0
        for x,y in test_loader:
            x = x.to(device)
            y = y.to(device)
            x=x.view(-1,3,480,720)
            y=y.view(-1,4,480,720)
            x=torch.div(x,255)
            y=torch.div(y,255)
            xs=[]
            ys=[]
            for i in x.split(360,-1):
                for j in i.split(240,-2):
                    xs.append(j)
            for i in y.split(360,-1):
                for j in i.split(240,-2):
                    ys.append(j)
            x=torch.cat(xs)
            y=torch.cat(ys)
            # ===================forward=====================
            ỹ, mu, logvar = model(x)
            # print(ỹ.shape)
            # print(y.shape)
            loss,bc,kl = loss_function(ỹ, y, mu, logvar)
            test_loss+=loss.item()
            bcs+=bc.item()
            kls+=kl.item()
    test_loss /= len(test_loader.dataset)
    wandb.log({"test_loss":test_loss /len(test_loader.dataset), 
                   "test BCE Loss": bcs / len(test_loader.dataset),
                   "test KLD Loss": kls / len(test_loader.dataset)})
    print(epoch)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132


KeyboardInterrupt: 

In [None]:
# test_loader=MyDataset("./dataset_test.csv")

In [None]:
# temp =np.array(Image.open("./Datasets/Input/Echendens-LHS_09620.png_6.png"), dtype = float)/255.0

In [None]:
# tem = torch.from_numpy(temp).view(-1,3,480,720)

In [None]:
# tem=tem.to(device,dtype=torch.float32)

In [None]:
# ans=(model(tem))

In [None]:
# ans=(ans[0]*255).detach().cpu().numpy()

In [None]:
# ans.shape

In [None]:
# img=Image.frtomarray(ans)