@ author: ODD_team

#Distance Estimator
To estimate the real distance(unit: meter) of the object

__Input__: Bounding box coordinates(xmin, ymin, xmax, ymax)   
__Output__: 3D location z of carmera coordinates(z_loc)

## Load Module

In [None]:
# import module
from tqdm import tqdm
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
#import category_encoders as ce
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader 
from sklearn.preprocessing import StandardScaler
from custom_datasets import CustomDataset
from sklearn.metrics import mean_squared_error
import math
from sklearn.preprocessing import MinMaxScaler,StandardScaler, LabelEncoder

In [None]:
# cd to ./weights
os.makedirs('./weights', exist_ok=True)

## Dataset

In [None]:
# get data set
df_train = pd.read_csv('../datasets/iou_train.csv')
df_valid = pd.read_csv('../datasets/iou_valid.csv')
df_test = pd.read_csv('../datasets/iou_test.csv')

In [None]:
# sort the z_loc values
df_train['zloc'].sort_values()

In [None]:
df_train.isna().sum(axis=0)

In [None]:
df_valid.isna().sum(axis=0)

In [None]:
df_test.isna().sum(axis=0)

In [None]:
#df_train = df_train[df_train['zloc'] > 0]
#df_valid = df_valid[df_valid['zloc'] > 0]
#df_test = df_test[df_test['zloc'] > 0]

In [None]:
#check the class
df_train['class'].unique()

In [None]:
# onehot encoding(dummy var)
class_dummy = pd.get_dummies(df_train['class'])
df_train = pd.concat([df_train, class_dummy], axis=1)

class_dummy = pd.get_dummies(df_valid['class'])
df_valid = pd.concat([df_valid, class_dummy], axis=1)

class_dummy = pd.get_dummies(df_test['class'])
df_test = pd.concat([df_test, class_dummy], axis=1)

In [None]:
# TrVd
#df_train = pd.concat([df_train, df_valid], axis=0)

In [None]:
# encoding
le = LabelEncoder()
train_label = le.fit_transform(df_train['class'])
df_train['class_num'] = train_label

valid_label = le.fit_transform(df_valid['class'])
df_valid['class_num'] = valid_label

test_label = le.fit_transform(df_test['class'])
df_test['class_num'] = test_label

In [None]:
# see the info of df
df_train.info()

In [None]:
variable = ['xmin','ymin','xmax','ymax','width', 'height','depth_mean_trim','depth_mean','depth_max','depth_median','Misc', 'bicycle', 'car', 'person', 'train', 'truck']
val_length = len(variable)
batch_sz = 24
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# train
train_dataset = CustomDataset(df_train, variable, scaler=True, train=True, onehot=False)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_sz, shuffle=True)
# train_sclaer
scaler_train = train_dataset.scaler

# valid
valid_dataset = CustomDataset(df_valid, variable, True, train=scaler_train, onehot=False)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_sz, shuffle=True)

# test
test_dataset = CustomDataset(df_test, variable, True, train=scaler_train, onehot=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=len(df_test), shuffle=False)

In [None]:
from pickle import dump

# scaler
dump(scaler_train, open('../model/lstm_scaler.pkl', 'wb'))

In [None]:
val_length # 15

In [None]:
# look the dataset
for idx, batch in enumerate(train_dataloader):
    if idx == 1:
        break
    print(batch[0])
    print(batch[0].shape)
    print(batch[0].dtype)
    print(batch[1])

## Modeling

In [None]:
# zloc estimator model
class Zloc_Estimaotor(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim=1):
        super().__init__()
        
        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=False)
        
        #Layer
        layersize=[306, 154, 76] # 294, 146, 72
        layerlist= []
        n_in=hidden_dim
        for i in layersize:
            layerlist.append(nn.Linear(n_in,i))
            layerlist.append(nn.ReLU())
            #layerlist.append(nn.BatchNorm1d(i))
            #layerlist.append(nn.Dropout(0.1))
            n_in=i           
        layerlist.append(nn.Linear(layersize[-1],1))
        #layerlist.append(nn.Sigmoid())
        
        self.fc=nn.Sequential(*layerlist)

        
    def forward(self, x):
        out, hn = self.rnn(x)
        output = self.fc(out[:,-1])
        return output

In [None]:
# another model(differ version)
class Zloc_Estimaotor_s(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        #Layer
        layersize=[32,64,128,256,128,64,32]
        layerlist= []
        n_in=input_dim
        for i in layersize:
            layerlist.append(nn.Linear(n_in,i))
            layerlist.append(nn.ReLU())
            #layerlist.append(nn.BatchNorm1d(i))
            layerlist.append(nn.Dropout(0.1))
            n_in=i           
        layerlist.append(nn.Linear(layersize[-1],1))
        #layerlist.append(nn.Sigmoid())
        
        self.fc=nn.Sequential(*layerlist)

        
    def forward(self, x):
        #out, hn = self.rnn(x)
        output = self.fc(x)
        return output

## Make  variable

In [None]:

import torch.nn.init as init
#def weight_init(m):
#    if isinstance(m, nn.Linear): # nn.Linear에 있는 가중치에만 적용
#        init.kaiming_uniform_(m.weight.data) # He initialization

# variable 
input_dim = val_length
hidden_dim = 612 # 612
layer_dim = 3
        
model = Zloc_Estimaotor(input_dim, hidden_dim, layer_dim)
#model = Zloc_Estimaotor_s(input_dim)
#model.apply(weight_init)
#loss_fn = nn.MSELoss()
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.5,
                                                       patience = 10,
                                                       mode='min', # 우리는 낮아지는 값을 기대
                                                       verbose=True,
                                                       min_lr=5e-5)
from early_stopping import EarlyStopping
early_stopping = EarlyStopping(70, verbose=True)   

model.to(device)

In [None]:
# train parameters
def count_parameter(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameter(model) # 5686657

## Make Train, Valid function

In [None]:
# train function
def train(model, train_dataloader, idx_interval):
    model.train()
    
    train_loss = 0
    train_rmse = 0
    
    for idx, batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        
        inp = batch[0].reshape(len(batch[0]),1,-1)
        
        prediction = model(inp.to(device))
        loss = loss_fn(prediction, batch[1].to(device)).cpu()
        
        # Backpropagation
        loss.backward()
        optimizer.step()
    
        train_loss += loss.item()
        if idx % idx_interval == 0:
            print("Train Epoch: {} [{}/{}] \t Train Loss(MAE): {:.4f} \t Train RMAE: {:.4f}".format(epoch, batch_sz*(idx+1), \
                                                                            len(train_dataloader)*batch_sz, \
                                                                            loss.item(), np.sqrt(loss.item())))
    
    train_loss /= len(train_dataloader)
    train_rmse = np.sqrt(train_loss)
        
    return train_loss, train_rmse
#return loss and trainrmse

In [None]:
# eval function
def evaluate(model, valid_dataloader):
    model.eval()
    
    valid_loss = 0
    valid_rmse = 0
    
    with torch.no_grad():
        for idx, batch in enumerate(valid_dataloader):
            inp = batch[0].reshape(len(batch[0]),1,-1)
            predictions = model(inp.to(device))
            loss = loss_fn(predictions, batch[1].to(device)).cpu()
            valid_loss += loss.item()
            
    valid_loss /= len(valid_dataloader)
    valid_rmse = np.sqrt(valid_loss)
    
    return valid_loss,valid_rmse

## Train and Validation

In [None]:
# training
Epoch = 1000
best_mae = 99999
best_train_mae = 99999

train_mae_list = []
valid_mae_list = []


for epoch in range(1,(Epoch+1)):
    train_mae, train_rmae = train(model, train_dataloader, 200)
    valid_mae, valid_rmae = evaluate(model, valid_dataloader)

    print("[Epoch: {} \t Valid MAE: {:.4f}".format(epoch, valid_mae))
    print("[Epoch: {} \t Train MAE: {:.4f}".format(epoch, train_mae))
    
    scheduler.step(valid_mae)       
    # Save model
    if valid_mae < best_mae:
        path = "./weights/ODD_variable16.pth"
        torch.save(model.state_dict(), path) # 모델의 가중치만 저장 구조는 저장 x..?
        best_mae = valid_mae
        best_train_mae = train_mae
        
    train_mae_list.append(train_mae)
    valid_mae_list.append(valid_mae)
    
    early_stopping(valid_mae, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

In [None]:
# train result of mae
print('Valid best:',best_mae)
print('Train best:',best_train_mae)

# Epoch visualization

In [None]:
# visualizing
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,10))
ax1 = fig.add_subplot(1,1,1)
ax1.plot(train_mae_list, ls='-', color='blue', label='train')
ax1.set_ylim(0,5)

ax2 = ax1.twinx()
ax2.plot(valid_mae_list, ls='--', color='red', label='valid')
ax2.set_ylim(0,5)

ax1.set_title('MAE error')
ax1.legend(loc='upper right')
ax2.legend(loc='upper left')
plt.show()


In [None]:
fig = plt.figure(figsize=(20,10))
plt.plot(train_mae_list, ls='-', color='blue', label='train')
plt.title('MAE loss - train')
plt.legend(loc='best', labels=['train'])

In [None]:
fig = plt.figure(figsize=(20,10))
plt.plot(valid_mae_list, ls='-', color='red', label='train')
plt.title('MAE loss - valid')
plt.legend(loc='best', labels=['valid'])

# Best performance

In [None]:
# get weights
model = Zloc_Estimaotor(input_dim, hidden_dim,layer_dim)
model.load_state_dict(torch.load('./weights/ODD_variable16.pth'))
model.eval()
model.to(device)

# Predict Train

In [None]:
# predict value
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=len(df_train), shuffle=False)
for idx, batch in enumerate(train_dataloader):
    if idx == 1:
        break
    train_pred = batch[0]
predict_zloc = model(train_pred.reshape(-1,1,input_dim).to(device))

In [None]:
# get the result
df_train['predict'] = predict_zloc.cpu().detach().numpy()
df_train[['zloc','predict']].head(10)

In [None]:
# calculate
import numpy as np
abs0 = np.abs(df_train.zloc-df_train.predict)
abs0

In [None]:
# mae
sum(abs0/len(df_train))

In [None]:
# rmse
np.mean(np.square(df_train['zloc']-df_train['predict']))**(1/2)

In [None]:
# accuracy
func = np.sum(np.abs((df_train.predict - df_train.zloc))/(df_train.predict))
men = func/len(df_train)
1-men

# Predict Valid

In [None]:
# valid set
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=len(df_train), shuffle=False)
for idx, batch in enumerate(valid_dataloader):
    if idx == 1:
        break
    valid_pred = batch[0]
predict_zloc = model(valid_pred.reshape(-1,1,input_dim).to(device))

In [None]:
# estimate valid
df_valid['predict'] = predict_zloc.cpu().detach().numpy()
df_valid[['zloc','predict']].head(10)

In [None]:
abs0 = np.abs(df_valid.zloc-df_valid.predict)
abs0

In [None]:
# mae
sum(abs0/len(df_valid))

In [None]:
#rmse
np.mean(np.square(df_valid['zloc']-df_valid['predict']))**(1/2)

In [None]:
# accuracy
func = np.sum(np.abs((df_valid.predict - df_valid.zloc))/(df_valid.predict))
men = func/len(df_valid)
1-men

# Predict Test

In [None]:
test_mse, test_rmse = evaluate(model, test_dataloader)
print('Test MAE: {:4f} \t Test RMAE: {:4f}'.format(test_mse, test_rmse))

In [None]:
# look dataset
for idx, batch in enumerate(test_dataloader):
    if idx == 1:
        break
    test_pred = batch[0]
predict_zloc = model(test_pred.reshape(-1,1,input_dim).to(device))

In [None]:
df_test['predict'] = predict_zloc.cpu().detach().numpy()
df_test[['zloc','predict']].head(10)

In [None]:
import numpy as np
abs0 = np.abs(df_test.zloc-df_test.predict)
abs0

In [None]:
# mae
sum(abs0/len(df_test))

In [None]:
# rmse
np.mean(np.square(df_test['zloc']-df_test['predict']))**(1/2)

In [None]:
# accuracy
func = np.sum(np.abs((df_test.predict - df_test.zloc))/(df_test.predict))
men = func/len(df_test)
1-men

In [None]:
# Object에 따라서 정확도 계산하기

In [None]:
df_test['class'].unique()

In [None]:
matrix = pd.DataFrame(columns=['type','RMSE','MAE','Accuracy'])
matrix

In [None]:
# truck
truck = df_test['class']=='truck'
df_truck = df_test[truck]

# mae
abs0 = np.abs(df_truck.zloc-df_truck.predict)
print(sum(abs0/len(df_truck))) # 1.8629
      
# rmse 
print(np.mean(np.square(df_truck['zloc']-df_truck['predict']))**(1/2)) # 3.2170

# accuracy
func = np.sum(np.abs((df_truck.predict - df_truck.zloc))/(df_truck.predict))
men = func/len(df_truck)
print(1-men) # 0.9376

matrix.loc[0,'type'] = 'truck'
matrix.loc[0,'RMSE'] = round(np.mean(np.square(df_truck['zloc']-df_truck['predict']))**(1/2),4)
matrix.loc[0,'MAE'] = round(sum(abs0/len(df_truck)),4)
matrix.loc[0,'Accuracy'] = round(1-men,4)
matrix

In [None]:
# car
car = df_test['class']=='car'
df_car = df_test[car]

# mae
abs0 = np.abs(df_car.zloc-df_car.predict)
print(sum(abs0/len(df_car))) # 1.2531
      
# rmse 
print(np.mean(np.square(df_car['zloc']-df_car['predict']))**(1/2)) # 2.2713

# accuracy
func = np.sum(np.abs((df_car.predict - df_car.zloc))/(df_car.predict))
men = func/len(df_car)
print(1-men) # 0.9519

matrix.loc[1,'type'] = 'car'
matrix.loc[1,'RMSE'] = round(np.mean(np.square(df_car['zloc']-df_car['predict']))**(1/2),4)
matrix.loc[1,'MAE'] = round(sum(abs0/len(df_car)),4)
matrix.loc[1,'Accuracy'] = round(1-men,4)

In [None]:
# person
person = df_test['class']=='person'
df_person = df_test[person]

# mae
abs0 = np.abs(df_person.zloc-df_person.predict)
print(sum(abs0/len(df_person))) # 0.7012
      
# rmse 
print(np.mean(np.square(df_person['zloc']-df_person['predict']))**(1/2)) # 1.2880

# accuracy
func = np.sum(np.abs((df_person.predict - df_person.zloc))/(df_person.predict))
men = func/len(df_person)
print(1-men) # 0.9529

matrix.loc[2,'type'] = 'person'
matrix.loc[2,'RMSE'] = round(np.mean(np.square(df_person['zloc']-df_person['predict']))**(1/2),4)
matrix.loc[2,'MAE'] = round(sum(abs0/len(df_person)),4)
matrix.loc[2,'Accuracy'] = round(1-men,4)

In [None]:
# train
train = df_test['class']=='train'
df_train = df_test[train] 

# mae
abs0 = np.abs(df_train.zloc-df_train.predict)
print(sum(abs0/len(df_train)))  # 1.6821
      
# rmse 
print(np.mean(np.square(df_train['zloc']-df_train['predict']))**(1/2)) # 2.3989

# accuracy
func = np.sum(np.abs((df_train.predict - df_train.zloc))/(df_train.predict))
men = func/len(df_train)
print(1-men) # 0.8611

matrix.loc[3,'type'] = 'train'
matrix.loc[3,'RMSE'] = round(np.mean(np.square(df_train['zloc']-df_train['predict']))**(1/2),4)
matrix.loc[3,'MAE'] = round(sum(abs0/len(df_train)),4)
matrix.loc[3,'Accuracy'] = round(1-men,4)

In [None]:
# misc
misc = df_test['class']=='Misc'
df_misc = df_test[misc] 

# mae
abs0 = np.abs(df_misc.zloc-df_misc.predict)
print(sum(abs0/len(df_misc)))  # 1.2972
      
# rmse 
print(np.mean(np.square(df_misc['zloc']-df_misc['predict']))**(1/2)) # 1.7389

# accuracy
func = np.sum(np.abs((df_misc.predict - df_misc.zloc))/(df_misc.predict))
men = func/len(df_misc)
print(1-men) # 0.9384

matrix.loc[4,'type'] = 'Misc'
matrix.loc[4,'RMSE'] = round(np.mean(np.square(df_misc['zloc']-df_misc['predict']))**(1/2),4)
matrix.loc[4,'MAE'] = round(sum(abs0/len(df_misc)),4)
matrix.loc[4,'Accuracy'] = round(1-men,4)

In [None]:
# BICYCLE
bicycle = df_test['class']=='bicycle'
df_bicycle = df_test[bicycle] 

# mae
abs0 = np.abs(df_bicycle.zloc-df_bicycle.predict)
print(sum(abs0/len(df_bicycle)))  # 1.0336
      
# rmse 
print(np.mean(np.square(df_bicycle['zloc']-df_bicycle['predict']))**(1/2)) # 1.1845

# accuracy
func = np.sum(np.abs((df_bicycle.predict - df_bicycle.zloc))/(df_bicycle.predict))
men = func/len(df_bicycle)
print(1-men) # 0.9392

matrix.loc[5,'type'] = 'bicycle'
matrix.loc[5,'RMSE'] = round(np.mean(np.square(df_bicycle['zloc']-df_bicycle['predict']))**(1/2),4)
matrix.loc[5,'MAE'] = round(sum(abs0/len(df_bicycle)),4)
matrix.loc[5,'Accuracy'] = round(1-men,4)

In [None]:
# DataFrame
matrix


In [None]:
matrix.set_index('type', inplace=True)
matrix.loc[['Misc','bicycle','car','person','train','truck'],['RMSE','MAE','Accuracy']]

# 다른 논문을 바탕으로 metric 계산

In [None]:
performance = pd.DataFrame(index=['LSTM'])

In [None]:
# Abs Relative difference (Abs Rel)
Abs_rel = np.sum(np.abs(df_test.predict - df_test.zloc)/df_test.zloc)/len(df_test)
print('Abs_rel', Abs_rel) 
performance['Abs_rel'] = round(Abs_rel,3)

In [None]:
Squa_rel = np.sum((df_test.predict - df_test.zloc)**2/df_test.zloc)/len(df_test)
print('Squa_rel:',Squa_rel) 
performance['Squa_rel'] = round(Squa_rel,3)

In [None]:
RMSE_log = np.sum(np.sqrt(((np.log(df_test.predict)-np.log(df_test.zloc))**2))/len(df_test))
print('RMSE_log', RMSE_log)
performance['RMSE_log'] = round(RMSE_log,3)

In [None]:
def threshold(delta):
    percentage = 0
    for i in range(len(df_test)):
        max_value = max(df_test.loc[i,'zloc']/df_test.loc[i,'predict'], \
                        df_test.loc[i,'predict']/df_test.loc[i,'zloc'])
        
        if max_value < delta:
            percentage += 1
    return percentage/len(df_test)

percentage_1 = round(threshold(1.25),3)
percentage_2 = round(threshold(1.25**2),3)
percentage_3 = round(threshold(1.25**3),3)
print('Delta 1.25', percentage_1)
print('Delta 1.25^2', percentage_2)
print('Delta 1.25^3', percentage_3)


performance['delta_1.25'] = round(percentage_1,3)
performance['delta_1.25^2'] = round(percentage_2,3)
performance['delta_1.25^3'] = round(percentage_3,3)

In [None]:
performance

### 구간 나눠서 정확도 계산해보기
## Divide by distance range and calculate it

In [None]:
first = df_test[df_test['zloc']<=10]
func1 = np.sum(np.abs((first.predict - first.zloc))/(first.predict))
men1 = func1/len(first)
1-men1

In [None]:
mask = (df_test['zloc']>=10) & (df_test['zloc']<20)
second = df_test[mask]
func2 = np.sum(np.abs((second.predict - second.zloc))/(second.predict))
men2 = func2/len(second)
1-men2

In [None]:
mask = (df_test['zloc']>=20) & (df_test['zloc']<30)
third = df_test[mask]
func3 = np.sum(np.abs((third.predict - third.zloc))/(third.predict))
men3 = func3/len(third)
1-men3

In [None]:
mask = (df_test['zloc']>=30) & (df_test['zloc']<40)
fourth = df_test[mask]
func4 = np.sum(np.abs((fourth.predict - fourth.zloc))/(fourth.predict))
men4 = func4/len(fourth)
1-men4

In [None]:
mask = (df_test['zloc']>=40) & (df_test['zloc']<50)
fifth = df_test[mask]
func5 = np.sum(np.abs((fifth.predict - fifth.zloc))/(fifth.predict))
men5 = func5/len(fifth)
1-men5

In [None]:
mask = (df_test['zloc']>=50) & (df_test['zloc']<60)
sixth = df_test[mask]
func6 = np.sum(np.abs((sixth.predict - sixth.zloc))/(sixth.predict))
men6 = func6/len(sixth)
1-men6

In [None]:
mask = (df_test['zloc']>=60) & (df_test['zloc']<70)
seventh = df_test[mask]
func7 = np.sum(np.abs((seventh.predict - seventh.zloc))/(seventh.predict))
men7 = func7/len(seventh)
1-men7

In [None]:
acc_list = []
for i in range(1,12):
    mask = (df_test['zloc']<i*10) & (df_test['zloc'] >= (i-1)*10)
    data = df_test[mask]
    value = np.sum(np.abs((data.predict - data.zloc))/(data.predict))
    output = value/len(data)
    acc_list.append(1-output)

In [None]:
acc_list

# Visualization

In [None]:
df_train.plot(kind='scatter', x='zloc', y='depth_mean', marker='o', alpha=0.3, s=50, figsize=(20,10), color='blue')
plt.show()

In [None]:
df_train.plot(kind='scatter', x='predict', y='zloc', marker='o', alpha=0.3, s=50, figsize=(10,10), color='blue')
plt.show()

In [None]:
df_valid.plot(kind='scatter', x='predict', y='zloc', marker='o', alpha=0.3, s=50, figsize=(10,10), color='blue')
plt.show()

In [None]:
df_test.plot(kind='scatter', x='predict', y='zloc', marker='o', alpha=0.3, s=50, figsize=(10,10), color='blue')