# Imports

In [1]:
import random

import torch
import torch.nn.init
import torch.optim as optim
from torch import nn
from tqdm import tqdm
import numpy as np
import pandas as pd
from PIL import Image, ImageOps
from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import flip

# Device and extracted features location 

In [None]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')
device

In [3]:
def show_image(scene_name, image_number, ss, data):
    wh = ss[scene_name]
    plt.imshow(data[scene_name][image_number].reshape((wh[1], wh[0], 3))[::-1])
    plt.axis('off')

# Dataset Reading and creation
Read image names, width, height 

In [None]:
scenes = ['train_scene', 'truck_scene', 'drjohnson_scene', 'playroom_scene']
scene_adjust = {'drjohnson_scene':True, 'playroom_scene':True, 'train_scene':False, 'truck_scene':False}
scene_paths = {'drjohnson_scene': {}, 'playroom_scene': {}, 'train_scene': {}, 'truck_scene': {}}
scenes_size = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
scenes_files = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
scenes_sample = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
scenes_gt = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}

for scene in scenes:
    paths = {'dataset_path':'', 'feature_path':'', 'images_path':'', 'gsres_path':''}
    dataset_path = f'...'
    feature_path = f'...'
    images_path = f'...'
    gsres_path = f'...'

    paths['dataset_path'] = dataset_path
    paths['feature_path'] = feature_path
    paths['images_path'] = images_path
    paths['gsres_path'] = gsres_path
    scene_paths[scene] = paths
    
    image_names = open(f'{feature_path}imagenames', 'r')
    image_names_file = image_names.read().splitlines()
    width_height = image_names_file[0].split(' ')
    image_width, image_height = int(width_height[0]), int(width_height[1])
    files_list = image_names_file[1:]
    scenes_size[scene] = [image_width, image_height]
    scenes_files[scene] = files_list
for scene in scenes:
    print(f'{scene}:')
    print(f'\tPaths:\n\t\t{scene_paths[scene]}')
    print(f'\tAdjust: {scene_adjust[scene]}')
    print(f'\tSize: {scenes_size[scene]}')
    print(f'\t# of images: {len(scenes_files[scene])}')
    print()

# Sample Images

In [None]:
image_sample = 0.5
for scene in scenes:
    sampled_images = random.sample(range(len(scenes_files[scene])), int(len(scenes_files[scene]) * image_sample))
    scenes_sample[scene] = sampled_images
    
for scene in scenes:
    print(f'{scene}:')
    print('\t# of sampled images:', len(scenes_sample[scene]))

## Read ground truths and features

Some ground truth images are double the size so we must adjust their scale accordingly

In [7]:
gt = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
for scene in scenes:
    ground_truths = [np.array(ImageOps.scale(ImageOps.mirror(Image.open(f'{scene_paths[scene]['images_path']}{scenes_files[scene][i]}.jpg').rotate(180)), 1 if not scene_adjust[scene] else 0.5)).reshape((-1, 3)).astype(np.float32) for i in scenes_sample[scene]]
    ground_truths = [i/255.0 for i in ground_truths]
    gt[scene] = ground_truths

In [None]:
show_image('playroom_scene', 0, scenes_size, gt)

In [9]:
bf = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
for scene in scenes:
    blending_features = [pd.read_csv(f'{scene_paths[scene]['feature_path']}blend_{scenes_files[scene][i]}', sep=' ', header=None) for i in scenes_sample[scene]]
    bf[scene] = blending_features

In [10]:
fr = {'drjohnson_scene': [], 'playroom_scene':[], 'train_scene': [], 'truck_scene': []}
for scene in scenes:
    features_rest = [pd.read_csv(f'{scene_paths[scene]['feature_path']}features_{scenes_files[scene][i]}', sep=' ', header=None) for i in scenes_sample[scene]]
    fr[scene] = features_rest

## Sample Datasets

Discard image_sample percent of images and for each image discard pixel_sample percent of pixels to create the train dataset 

In [32]:
pixel_sample = 0.1
X_train = {'drjohnson_scene': pd.DataFrame(), 'playroom_scene':pd.DataFrame(), 'train_scene': pd.DataFrame(), 'truck_scene': pd.DataFrame()}
y_train = {'drjohnson_scene': pd.DataFrame(), 'playroom_scene':pd.DataFrame(), 'train_scene': pd.DataFrame(), 'truck_scene': pd.DataFrame()}
for scene in scenes:
    X = pd.DataFrame()
    y = pd.DataFrame()
    for i in range(len(scenes_sample[scene])):
        sum_aici = fr[scene][i].iloc[:, :3].to_numpy()
        sum_aici_k = fr[scene][i].iloc[:, 3:6].to_numpy()
        sum_ai = fr[scene][i].iloc[:, 6:7].to_numpy()
        sum_ai_k = fr[scene][i].iloc[:, 7:8].to_numpy()
        prod_1_ai = fr[scene][i].iloc[:, 8:9].to_numpy()
        prod_1_ai_k = fr[scene][i].iloc[:, 9:10].to_numpy()
        o_m_z = fr[scene][i].iloc[:, 10:11].to_numpy()
        zmin = fr[scene][i].iloc[:, 11:12].to_numpy()
        zmax = fr[scene][i].iloc[:, 12:13].to_numpy()

        ctail = (sum_aici - sum_aici_k)/(sum_ai - sum_ai_k + 0.00000000001) #3
        prod_ai_k_n = prod_1_ai/(prod_1_ai_k + 0.00000000001)
        tail = prod_1_ai_k * ((np.ones_like(prod_ai_k_n) - prod_ai_k_n)*ctail)

        blendk = bf[scene][i] #3
        prod_1_ai_k_df = pd.DataFrame(prod_1_ai_k) #1
        z_norm = pd.DataFrame((o_m_z - zmin)/(zmax - zmin + 0.00000000001)) #1
        ctail = pd.DataFrame(ctail) #3
        prod_ai_k_n = pd.DataFrame(prod_ai_k_n) #1
        tail = pd.DataFrame(tail)
    
        sampled_pixels = random.sample(range(len(blendk)), int(len(blendk) * pixel_sample))
        feats = pd.concat([ctail, tail, z_norm], axis=1).iloc[sampled_pixels, :]
        ground = pd.DataFrame(gt[scene][i]).iloc[sampled_pixels, :]
    
        X = pd.concat([X, feats])
        y = pd.concat([y, ground])
    X_train[scene] = X
    y_train[scene] = y

In [None]:
X_train['playroom_scene']

# Model Architecture
Fully connected MLP with 3 hidden layers

In [19]:
class GSNN(nn.Module):
    def __init__(self, in_features, s1, s2, s3, out_features, device):
        super().__init__()
        self.device = device
        self.arch = nn.Sequential(nn.Linear(in_features, s1, device=self.device), nn.ReLU(),
                                  nn.Linear(s1, s2, device=self.device), nn.ReLU(),
                                  nn.Linear(s2, s3, device=self.device), nn.ReLU(),
                                  nn.Linear(s3, out_features, device=self.device), nn.Sigmoid())
        self.in_features = in_features
        self.out_features = out_features
        self.s1 = s1
        self.s2 = s2
        self.s3 = s3
        
    def forward(self, x):
        return self.arch(x)

# Training Loop
Adam optimizer, smooth L1 loss and backpropagation 

In [34]:
train_loss = []
def train(model, X_train, y_train, epochs=700, batch_size=100, lr=0.01):
    #Create dataset
    X_tensor = torch.tensor(X_train.to_numpy(dtype=np.float32)).to(device)
    y_tensor = torch.tensor(y_train.to_numpy(dtype=np.float32)).to(device)
    
    dataset = TensorDataset(X_tensor, y_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    # Move model to the appropriate device
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.SmoothL1Loss()

    for epoch in range(epochs):
        total_loss = 0
        model.train()  # Set the model to training mode

        progress = tqdm(dataloader, desc=f'Epoch {epoch+1}/{epochs}', unit='batches')

        for b_train, b_labels in progress:
            outputs = model(b_train)
            optimizer.zero_grad()
            loss = criterion(outputs, b_labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            progress.set_postfix({'Loss': total_loss / len(dataloader)})
        train_loss.append(total_loss/len(dataloader))

    print('Training complete')

# Model Creation and Training
MLP architecture 10-16-32-16-3. Train for 25 epochs with a batch of 512 and learning rate 0.001. Save trained model weigths in a .pt file

In [None]:
model = GSNN(7, 16, 32, 16, 3, device)
epochs = 25
batch = 512
lr = 0.001
train(model, X_train['playroom_scene'], y_train['playroom_scene'], epochs=epochs, batch_size=batch, lr=lr)

In [25]:
torch.save(model.state_dict(), f'full_{model.in_features}_{model.s1}_{model.s2}_{model.s3}_{model.out_features}_{epochs}_{batch}_{lr}.pt')

# Model Evaluation 
Load model from previously saved file and enable evaluation mode

In [None]:
gsnn = GSNN(10, 16, 32, 16, 3, device)
gsnn.to(device)
model_path = f'full_{model.in_features}_{model.s1}_{model.s2}_{model.s3}_{model.out_features}_{epochs}_{batch}_{lr}.pt'
gsnn.load_state_dict(torch.load(model_path, weights_only=True))
gsnn.eval()

## Inference
Choose 10 sample ground truth images and their respective features, feed into the model and save outputs as vesus images  

In [None]:
scene = 'truck_scene'
sample_images = 10
sample_out = random.sample(range(len(scenes_files[scene])), sample_images)
for n in sample_out:
    image_name = scenes_files[scene][n]
    
    blend = pd.read_csv(f'{scene_paths[scene]['feature_path']}blend_{image_name}', sep=' ', header=None)
    feats = pd.read_csv(f'{scene_paths[scene]['feature_path']}features_{image_name}', sep=' ', header=None)

    sum_aici = feats.iloc[:, :3].to_numpy()
    sum_aici_k = feats.iloc[:, 3:6].to_numpy()
    sum_ai = feats.iloc[:, 6:7].to_numpy()
    sum_ai_k = feats.iloc[:, 7:8].to_numpy()
    prod_1_ai = feats.iloc[:, 8:9].to_numpy()
    prod_1_ai_k = feats.iloc[:, 9:10].to_numpy()
    o_m_z = feats.iloc[:, 10:11].to_numpy()
    zmin = feats.iloc[:, 11:12].to_numpy()
    zmax = feats.iloc[:, 12:13].to_numpy()

    ctail = (sum_aici - sum_aici_k)/(sum_ai - sum_ai_k + 0.00000000001) #3
    prod_ai_k_n = prod_1_ai/(prod_1_ai_k + 0.00000000001)
    tail = prod_1_ai_k * ((np.ones_like(prod_ai_k_n) - prod_ai_k_n)*ctail)
    
    prod_1_ai_k_df = pd.DataFrame(prod_1_ai_k) #1
    z_norm = pd.DataFrame((o_m_z - zmin)/(zmax - zmin + 0.00000000001)) #1
    ctail = pd.DataFrame(ctail) #3
    prod_ai_k_n = pd.DataFrame(prod_ai_k_n) #1
    tail = pd.DataFrame(tail)   
    
    inp = pd.concat([blend, ctail, tail, z_norm], axis=1)
    
    input_tensor = torch.tensor(inp.values, dtype=torch.float32).to('cuda')

    with torch.no_grad():
        predictions = gsnn(input_tensor)
    df = pd.DataFrame(predictions.cpu().numpy())

    output_image = df.to_numpy().reshape((scenes_size[scene][1], scenes_size[scene][0], 3))
    gt = ImageOps.scale(Image.open(f'{scene_paths[scene]['images_path']}{image_name}.jpg'), 1 if not scene_adjust[scene] else 0.5)
    gsres = ImageOps.scale(Image.open(f'{scene_paths[scene]['gsres_path']}{image_name}.png'), 1 if not scene_adjust[scene] else 0.5)

    plt.imshow(np.array(gsres)/255.0)
    plt.axis('off')
    plt.savefig(f'{image_name}_3dgs.png', dpi=250, bbox_inches='tight', pad_inches=0)

    plt.imshow(np.array(gt)/255.0)
    plt.axis('off')
    plt.savefig(f'{image_name}_actual.png', dpi=250, bbox_inches='tight', pad_inches=0)

    plt.imshow(output_image[::-1])
    plt.axis('off')
    plt.savefig(f'{image_name}_inference.png', dpi=250, bbox_inches='tight', pad_inches=0)

In [None]:
for n in sample_out:
    image_name = scenes_files[scene][n]
    fl, _, _ = flip.evaluate(f'{image_name}_actual.png', f'{image_name}_inference.png', "LDR")
    plt.imshow(fl)
    plt.axis('off')
    plt.savefig(f'{image_name}_gt_flip.png', dpi=250, bbox_inches='tight', pad_inches=0)

    fl, _, _ = flip.evaluate(f'{image_name}_actual.png', f'{image_name}_inference.png', "LDR")
    plt.imshow(fl)
    plt.axis('off')
    plt.savefig(f'{image_name}_3dgs_flip.png', dpi=250, bbox_inches='tight', pad_inches=0)

    fl, _, _ = flip.evaluate(f'{image_name}_actual.png', f'{image_name}_3dgs.png', "LDR")
    plt.imshow(fl)
    plt.axis('off')
    plt.savefig(f'{image_name}_gt_3dgs_flip.png', dpi=250, bbox_inches='tight', pad_inches=0)