#Distance Estimator
To estimate the real distance(unit: meter) of the object

__Input__: Bounding box coordinates(xmin, ymin, xmax, ymax)   
__Output__: 3D location z of carmera coordinates(z_loc)

##Load Module

In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import time
import torch
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader 
from sklearn.preprocessing import StandardScaler

##Dataset

In [None]:
df_train = pd.read_csv('dataset/data/train.csv')
df_test = pd.read_csv('dataset/data/test.csv')

In [None]:
#Split train and validate dataset
mask = np.random.rand(len(df_train)) < 0.9
train = df_train.iloc[mask]
valid = df_train.iloc[~mask]

In [None]:
X_train = torch.FloatTensor(train[['xmin', 'ymin', 'xmax', 'ymax','width','height','depth_x','depth_y','depth_min']].values)
y_train = torch.FloatTensor(train[['zloc']].values)

X_valid = torch.FloatTensor(valid[['xmin', 'ymin', 'xmax', 'ymax','width','height','depth_x','depth_y','depth_min']].values)
y_valid = torch.FloatTensor(valid[['zloc']].values)

X_test = torch.FloatTensor(df_test[['xmin', 'ymin', 'xmax', 'ymax','width','height','depth_x','depth_y','depth_min']].values)
y_test = torch.FloatTensor(df_test[['zloc']].values)

In [None]:
# standardized data
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
y_train = scalar.fit_transform(y_train)

##Modeling

In [None]:
class DistanceEstimator(nn.Module):
  def __init__(self):
    super().__init__()
    #Layer
    self.activation = torch.nn.ReLU()
    self.fc1 = nn.Linear(4,6)
    self.fc2 = nn.Linear(6,5)
    self.fc3 = nn.Linear(5,2)
    self.fc4 = nn.Linear(2,1)

  def forward(self, x):
    fc1 = self.activation(self.fc1(x))
    fc2 = self.activation(self.fc2(fc1))
    fc3 = self.activation(self.fc3(fc2))
    out = self.fc4(fc3)

    return out

##Train

In [None]:
# Function to save the model 
def saveModel(model): 
    path = "NetModel.pth" 
    torch.save(model.state_dict(), path) 

In [None]:
from sklearn.metrics import mean_squared_error
def train_model(model, train_dataloader, valid_dataloader, loss_fn, lr=1e-5, batch_size=512, epochs=100, validate=False):
  param_lrs = [{'params':param, 'lr':lr} for param in model.parameters()]
  optimizer = torch.optim.Adam(param_lrs, lr=lr)
  
  # Define your execution device
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  # Convert model parameters and buffers to CPU or Cuda
  model.to(device)

  best_rmse = np.Inf
  print("Begin training...") 
  for epoch in range(1, epochs+1): 
    running_train_loss = 0.0 
    running_rmse = 0.0 
    running_vall_loss = 0.0 
    total = 0 

    for batch_ind, samples in enumerate(train_dataloader):
      x_train, y_train = samples
      optimizer.zero_grad()
      pred = model.forward(x_train)
      train_loss = loss_fn(pred, y_train)
      train_loss.backward()
      optimizer.step()
      running_train_loss += train_loss.item()

    train_loss_value = running_train_loss/len(train_dataloader)
    with torch.no_grad(): 
      model.eval() 
      for data in valid_dataloader: 
        inputs, outputs = data 
        predicted_outputs = model(inputs) 
        val_loss = loss_fn(predicted_outputs, outputs) 
      
        # The label with the highest value will be our prediction 
        running_vall_loss += val_loss.item()  
        total += outputs.size(0) 
        rmse = mean_squared_error(outputs, predicted_outputs)**0.5
        running_rmse += rmse

    # Calculate validation loss value 
    val_loss_value = running_vall_loss/len(valid_dataloader)  
    rmse = running_rmse / total

    if rmse < best_rmse:
      saveModel(model)
      best_rmse = rmse

    # Print the statistics of the epoch 
    print('Epoch {0}/{1} - loss: {2:.4f} / val_loss: {3:.4f} - RMSE: {4:.4f}'.format(epoch, epochs, train_loss_value, val_loss_value,rmse))

In [None]:
train_dataset = TensorDataset(X_train, y_train)
valid_dataset = TensorDataset(X_valid, y_valid)
train_dataloader = DataLoader(train_dataset, batch_size=2)
valid_dataloader = DataLoader(valid_dataset, batch_size=2)

In [None]:
model = DistanceEstimator()
#optimizer = torch.optim.Adam(model.parameters, lr=1e-5)
loss_func = nn.MSELoss()

In [None]:
train_model(model, train_dataloader, valid_dataloader, loss_func, epochs=100, batch_size=2048)

Begin training...
Epoch 1/100 - loss: 820.1153 / val_loss: 446.2309 - RMSE: 9.1343
Epoch 2/100 - loss: 404.3583 / val_loss: 413.9524 - RMSE: 8.8388
Epoch 3/100 - loss: 387.5919 / val_loss: 392.2649 - RMSE: 8.6115
Epoch 4/100 - loss: 352.8325 / val_loss: 352.4003 - RMSE: 8.1533
Epoch 5/100 - loss: 318.3038 / val_loss: 319.1914 - RMSE: 7.7451
Epoch 6/100 - loss: 288.2460 / val_loss: 288.8740 - RMSE: 7.3467
Epoch 7/100 - loss: 260.9907 / val_loss: 261.8633 - RMSE: 6.9647
Epoch 8/100 - loss: 237.7482 / val_loss: 239.5026 - RMSE: 6.6274
Epoch 9/100 - loss: 219.3085 / val_loss: 222.1979 - RMSE: 6.3532
Epoch 10/100 - loss: 205.3341 / val_loss: 209.1079 - RMSE: 6.1400
Epoch 11/100 - loss: 194.6163 / val_loss: 198.7911 - RMSE: 5.9692
Epoch 12/100 - loss: 185.7895 / val_loss: 189.9370 - RMSE: 5.8209
Epoch 13/100 - loss: 178.0124 / val_loss: 181.7656 - RMSE: 5.6797
Epoch 14/100 - loss: 170.9268 / val_loss: 174.3709 - RMSE: 5.5468
Epoch 15/100 - loss: 164.4998 / val_loss: 167.7377 - RMSE: 5.4252
E

##Predict

In [None]:
def predict(test_dataloader): 
    # Load the model that we saved at the end of the training loop 
    model = DistanceEstimator()
    path = "NetModel.pth" 
    model.load_state_dict(torch.load(path)) 
     
    running_rmse = 0 
    total = 0 
    pred = []
 
    with torch.no_grad(): 
      for data in test_dataloader: 
        inputs, outputs = data 
        outputs = outputs.to(torch.float32) 
        predicted_outputs = model(inputs) 
        pred.append(float(predicted_outputs))
        total += outputs.size(0) 
        rmse = mean_squared_error(outputs, predicted_outputs)**0.5
        running_rmse += rmse
 
      print('RMSE:',running_rmse / total)
    return pred

In [None]:
test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=2)

In [None]:
pred = predict(test_dataset)

RMSE: 6.1483723454847885


In [None]:
#Result with prediction
df_test['zloc_pred'] = pred
df_test

Unnamed: 0,filename,class,xmin,ymin,xmax,ymax,observation angle,xloc,yloc,zloc,zloc_pred
0,000003.png,Car,614.24,181.78,727.31,284.77,1.55,1.00,1.75,13.22,16.841417
1,000004.png,Car,280.38,185.10,344.90,215.59,1.96,-15.71,2.16,38.26,35.956409
2,000006.png,Car,548.00,171.33,572.40,194.42,-1.55,-2.72,0.82,48.22,51.254139
3,000007.png,Car,542.05,175.55,565.27,193.79,1.64,-4.71,1.71,60.52,56.263161
4,000008.png,Car,597.59,176.18,720.90,261.14,-1.33,1.07,1.55,14.44,18.912607
...,...,...,...,...,...,...,...,...,...,...,...
4050,007472.png,Car,560.57,179.91,582.47,196.80,1.65,-3.54,2.16,66.76,59.328232
4051,007473.png,Car,760.94,163.22,878.26,207.91,-0.02,7.22,0.90,24.06,24.770483
4052,007474.png,Car,561.93,186.85,698.62,273.77,1.89,0.55,1.80,14.99,19.802439
4053,007475.png,Car,271.08,193.61,375.66,255.15,-1.30,-7.46,1.65,19.13,23.211821
