In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -u "/content/drive/MyDrive/ensemble.zip" -d "/content/TRAFFIC"

In [None]:
!pip install PyTorch
!pip install timm

In [15]:
import torch
import torch.nn as nn
import timm
import os
import cv2
# from torchsummary import summary
import torch.nn.functional as F
import matplotlib.pyplot as plt



# Load the ViT model
vit_model = timm.create_model('vit_base_patch16_224', pretrained=True)  # this loads a pretrained version
for parameter in vit_model.parameters():
    parameter.requires_grad = False

# Store the in_features from the original classifier
in_features = vit_model.head.in_features

# Replace the classifier with nn.Identity
vit_model.head = nn.Identity()

# Your model that utilizes the loaded ViT
class ViTForWeights(nn.Module):
    def __init__(self, vit_model, in_features):
        super(ViTForWeights, self).__init__()
        self.vit = vit_model
        self.fc = nn.Linear(in_features, 3)  # We're using the stored in_features value here
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.vit(x)
        x = self.fc(x)
        out = self.softmax(x)
        return out  # Use softmax to ensure the weights sum up to 1

model = ViTForWeights(vit_model, in_features) # Use softmax to ensure the weights sum up to 1
# summary(model.cuda(), input_size=(3, 224, 224))

# Example images
bgs_path = "/content/TRAFFIC/ensemble/Subsense/"
of_path = "/content/TRAFFIC/ensemble/RLOF/"
suim_path = "/content/TRAFFIC/ensemble/SUIM/"

# read the images from each folder
bgs_images = os.listdir(bgs_path)
of_images = os.listdir(of_path)
suim_images = os.listdir(suim_path)

# sort the images in ascending order based on the number in the file name
bgs_images.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
of_images.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
suim_images.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

# read the first image to get the height and width
bgs_image = cv2.imread(bgs_path + bgs_images[0], cv2.IMREAD_UNCHANGED)
ht, wt = bgs_image.shape[:2]

# go through each image and apply soft voting
# for k in range(len(bgs_images)):
bgs_image = cv2.imread(bgs_path + bgs_images[1], cv2.IMREAD_UNCHANGED)
of_image = cv2.imread(of_path + of_images[1], cv2.IMREAD_UNCHANGED)
suim_image = cv2.imread(suim_path + suim_images[1], cv2.IMREAD_UNCHANGED)
suim_image = cv2.resize(suim_image, (wt, ht))
print(bgs_image.shape,of_image.shape,suim_image.shape)

# Concatenate along the channel dimension
bgs_image = torch.tensor(bgs_image).float()
of_image = torch.tensor(of_image).float()
suim_image = torch.tensor(suim_image).float()
print(bgs_image.size(),of_image.size(),suim_image.size())

# Add a channel dimension to each image
bgs_image = bgs_image.unsqueeze(0)
of_image = of_image.unsqueeze(0)
suim_image = suim_image.unsqueeze(0)

# Concatenate along the channel dimension
concat_images = torch.cat([bgs_image, of_image, suim_image], dim=0)  # Now the result will be of shape (3, height, width)
# ci = concat_images.permute(1,2,0)
# ci= ci.detach().numpy()
# cv2.imwrite("out.jpg",ci)
# plt.imshow(ci)
# plt.show()
# print(ci.shape)
# Add a batch dimension to the concatenated images
concat_images = concat_images.unsqueeze(0)  # The result will be of shape (1, 3, height, width)

# Resize concatenated image to match ViT input
resize_size = (224, 224)
concat_images_resized = F.interpolate(concat_images, size=resize_size, mode='bilinear', align_corners=True)

# Pass the resized image through the model
weights = model(concat_images_resized)

# # Now you can pass this to your model
# weights = model(concat_images)

# Multiply with the images
print(weights)
weighted_image1 = weights[:, 0].unsqueeze(1) * bgs_image
weighted_image2 = weights[:, 1].unsqueeze(1) * of_image
weighted_image3 = weights[:, 2].unsqueeze(1) * suim_image

ground_truth = cv2.imread("/content/TRAFFIC/ensemble/groundtruth/" + bgs_images[1], cv2.IMREAD_UNCHANGED)
ground_truth = torch.tensor(ground_truth).float()
# Calculate loss with ground truth
# Assuming `ground_truth` is your ground truth tensor
loss = nn.MSELoss()
total_loss = loss(weighted_image1, ground_truth) + loss(weighted_image2, ground_truth) + loss(weighted_image3, ground_truth)

print(total_loss)


(1080, 1920) (1080, 1920) (1080, 1920)
torch.Size([1080, 1920]) torch.Size([1080, 1920]) torch.Size([1080, 1920])
tensor([[0.6911, 0.0379, 0.2711]], grad_fn=<SoftmaxBackward0>)
tensor(13839.9727, grad_fn=<AddBackward0>)


  out = self.softmax(x)
  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.functional as F
import numpy as np

# class diceloss(torch.nn.Module):
#     def __init__(self, eps=1e-7):
#         super(diceloss, self).__init__()
#         self.eps = eps

#     def forward(self, prediction, target):
#         # prediction = F.sigmoid(prediction)

#         intersection = (prediction * target).sum()
#         union = prediction.sum() + target.sum()

#         dice = 2.0 * intersection / (union + self.eps)
#         return 1.0 - dice
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-7):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, prediction, target):
        intersection = (prediction * target).sum()
        dice = (2. * intersection + self.smooth) / (prediction.sum() + target.sum() + self.smooth)
        return 1.0 - dice
class diceloss(torch.nn.Module):
    def init(self):
        super(diceloss, self).init()
    def forward(self,pred, target):
      lenIntersection=0
      for i in range(pred.shape[0]):
          for j in range(pred.shape[1]):
              if ( np.array_equal(pred[i][j],target[i][j]) ):
                  lenIntersection+=1

      lenimg=pred.shape[1]*pred.shape[1]
      lenimg2=target.shape[1]*target.shape[2]
      value = (2. * lenIntersection  / (lenimg + lenimg2))
      return value
# class diceloss(torch.nn.Module):
#     def init(self):
#         super(diceloss, self).init()
#     def forward(self,pred, target):
#        smooth = 1.
#        iflat = pred.contiguous().view(-1)
#        tflat = target.contiguous().view(-1)
#        intersection = (iflat * tflat).sum()
#        A_sum = torch.sum(iflat * iflat)
#        B_sum = torch.sum(tflat * tflat)
#        return 1 - (((2. * intersection) + smooth) / (A_sum + B_sum + smooth) )
model = ViTForWeights(vit_model, in_features)
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
# loss_function = nn.MSELoss()
model.train()
resize_size = (224, 224)

epochs = 20
for epoch in range(epochs):
    total_loss = 0

    for k in range(40):
        optimizer.zero_grad()
        bgs_image = cv2.imread(bgs_path + bgs_images[k], cv2.IMREAD_UNCHANGED)
        of_image = cv2.imread(of_path + of_images[k], cv2.IMREAD_UNCHANGED)
        suim_image = cv2.imread(suim_path + suim_images[k], cv2.IMREAD_UNCHANGED)
        of_image = cv2.threshold(of_image, 0.3*255, 255, cv2.THRESH_BINARY)[1]
        suim_image = cv2.threshold(suim_image, 0.3*255, 255, cv2.THRESH_BINARY)[1]
        ht, wt = bgs_image.shape[:2]
        suim_image = cv2.resize(suim_image, (wt, ht))

        bgs_image = torch.tensor(bgs_image)
        of_image = torch.tensor(of_image)
        suim_image = torch.tensor(suim_image)

        bgs_image = bgs_image.unsqueeze(0)
        of_image = of_image.unsqueeze(0)
        suim_image = suim_image.unsqueeze(0)

        concat_images = torch.cat([bgs_image, of_image, suim_image], dim=0).unsqueeze(0)
        concat_images_resized = F.interpolate(concat_images, size=resize_size, mode='bilinear', align_corners=True)
        # print(concat_images_resized.max(),concat_images_resized.size())
        # asdsa
        weights = model(concat_images_resized.float())

        weighted_image = weights[:, 0] * bgs_image.squeeze() + weights[:, 1] * of_image.squeeze() + weights[:, 2] * suim_image.squeeze()
        weighted_image = cv2.threshold(weighted_image.detach().numpy(),0.3,1,cv2.THRESH_BINARY)[1]
        ground_truth = cv2.imread("/content/TRAFFIC/ensemble/groundtruth/" + bgs_images[k], cv2.IMREAD_UNCHANGED)
        ground_truth = torch.tensor(ground_truth)
        # Compute the losses
        criterion = DiceLoss()
        combined_loss = criterion(torch.from_numpy(weighted_image).requires_grad_(True), ground_truth)
        # combined_loss = combined_loss.tensor(requires_grad=True)
        # torch.tensor(combined_loss,requires_grad=True)
        # print(combined_loss)
        total_loss += combined_loss.item()

        # Backpropagate
        combined_loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/40}",weights)


In [10]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import cv2
import timm
from torch import nn

# Initialize ViT model
vit_model = timm.create_model('vit_base_patch16_224', pretrained=True)
for parameter in vit_model.parameters():
    parameter.requires_grad = False

in_features = vit_model.head.in_features
vit_model.head = nn.Identity()

class ViTForWeights(nn.Module):
    def __init__(self, vit_model, in_features):
        super(ViTForWeights, self).__init__()
        self.vit = vit_model
        self.fc = nn.Sequential(
            nn.Linear(in_features, 64),
            nn.ReLU(),
            nn.Linear(64, 3),
            nn.Softmax(dim=-1)
        )

    def forward(self, x):
        x = self.vit(x)
        return self.fc(x)

class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-5):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, prediction, target):
        intersection = (prediction * target).sum()
        union = prediction.sum() + target.sum()
        dice = (2. * intersection + self.smooth) / (union + self.smooth)
        return 1.0 - dice

# Initialize model and optimizer
model = ViTForWeights(vit_model, in_features)
optimizer = optim.Adam(model.parameters(), lr=1e-4)  # Decreased learning rate
criterion = DiceLoss()

resize_size = (224, 224)
epochs = 20
for epoch in range(epochs):
    total_loss = 0

    for k in range(40):
        optimizer.zero_grad()

        bgs_image = cv2.imread(bgs_path + bgs_images[k], cv2.IMREAD_UNCHANGED)
        of_image = cv2.imread(of_path + of_images[k], cv2.IMREAD_UNCHANGED)
        suim_image = cv2.imread(suim_path + suim_images[k], cv2.IMREAD_UNCHANGED)
        of_image = cv2.threshold(of_image, 0.3*255, 255, cv2.THRESH_BINARY)[1]
        suim_image = cv2.threshold(suim_image, 0.3*255, 255, cv2.THRESH_BINARY)[1]
        ht, wt = bgs_image.shape[:2]
        suim_image = cv2.resize(suim_image, (wt, ht))

        bgs_image = torch.tensor(bgs_image)
        of_image = torch.tensor(of_image)
        suim_image = torch.tensor(suim_image)

        bgs_image = bgs_image.unsqueeze(0)
        of_image = of_image.unsqueeze(0)
        suim_image = suim_image.unsqueeze(0)
        weights = model(concat_images_resized.float())

        weighted_image = (weights[:, 0] * bgs_image.squeeze() +
                          weights[:, 1] * of_image.squeeze() +
                          weights[:, 2] * suim_image.squeeze())

        weighted_image_thresholded = (weighted_image > 0.3).float()

        loss = criterion(weighted_image_thresholded, ground_truth)
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/40}")


RuntimeError: ignored

In [21]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import cv2
import timm
from torch import nn

# ... [All the class and function definitions] ...
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViTForWeights(vit_model, in_features).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

model.train()

resize_size = (224, 224)
epochs = 20

for epoch in range(epochs):
    total_loss = 0

    for k in range(40):
        optimizer.zero_grad()

        bgs_image = cv2.imread(bgs_path + bgs_images[k], cv2.IMREAD_UNCHANGED) / 255.0
        of_image = cv2.imread(of_path + of_images[k], cv2.IMREAD_UNCHANGED) / 255.0
        suim_image = cv2.imread(suim_path + suim_images[k], cv2.IMREAD_UNCHANGED) / 255.0

        ht, wt = bgs_image.shape[:2]
        suim_image = cv2.resize(suim_image, (wt, ht))

        bgs_image = torch.tensor(bgs_image).unsqueeze(0).to(device)
        of_image = torch.tensor(of_image).unsqueeze(0).to(device)
        suim_image = torch.tensor(suim_image).unsqueeze(0).to(device)

        concat_images = torch.cat([bgs_image, of_image, suim_image], dim=0).unsqueeze(0)
        concat_images_resized = F.interpolate(concat_images, size=resize_size, mode='bilinear', align_corners=True)

        weights = model(concat_images_resized.float())
        weighted_image = weights[0, 0] * bgs_image + weights[0, 1] * of_image + weights[0, 2] * suim_image

        ground_truth = cv2.imread("/content/TRAFFIC/ensemble/groundtruth/" + bgs_images[k], cv2.IMREAD_UNCHANGED) / 255.0
        ground_truth = torch.tensor(ground_truth).unsqueeze(0).to(device)

        # Compute the loss
        combined_loss = criterion(weighted_image, ground_truth)

        total_loss += combined_loss.item()

        # Backpropagate
        combined_loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / 40}")


  out = self.softmax(x)


Epoch 1/20, Loss: 126.82596793333846
Epoch 2/20, Loss: 126.59412537953752
Epoch 3/20, Loss: 126.52288935464419
Epoch 4/20, Loss: 126.48071374562008
Epoch 5/20, Loss: 126.44642258142703
Epoch 6/20, Loss: 126.41518349403911
Epoch 7/20, Loss: 126.3859973744641
Epoch 8/20, Loss: 126.35880434483916
Epoch 9/20, Loss: 126.3336970619456
Epoch 10/20, Loss: 126.31069025088479
Epoch 11/20, Loss: 126.28970221264642
Epoch 12/20, Loss: 126.270596515485
Epoch 13/20, Loss: 126.25321624002531
Epoch 14/20, Loss: 126.23740001826206
Epoch 15/20, Loss: 126.22298975017807
Epoch 16/20, Loss: 126.20983725189413
Epoch 17/20, Loss: 126.19780509904831
Epoch 18/20, Loss: 126.18676971893133
Epoch 19/20, Loss: 126.1766199787894
Epoch 20/20, Loss: 126.16725765963801
