In [None]:
import pandas as pd
import torch 
import numpy as np
from torch.utils.data import Dataset, DataLoader
import sys
sys.path.append("..")
from utils import pad_collate
from dataloader_comma import CommaDataset
from model import VTN
import matplotlib.pyplot as plt 
from PIL import Image
import glob
import os
from utils import * 
import re
from pathlib import Path
import argparse
import yaml

In [99]:
def rmse_loss(input, target, mask, reduction="mean"):
        out = (input[~mask]-target[~mask])**2
        return out.mean().sqrt() if reduction == "mean" else out 

def mae_loss(input, target, mask, reduction="mean"):
        out = (input[~mask]-target[~mask])
        return out.mean().abs() if reduction == "mean" else out 

In [100]:
def argparse_namespace_constructor(loader, node):
    values = loader.construct_mapping(node)
    namespace = argparse.Namespace()
    namespace.__dict__.update(values)
    return namespace

# Register the custom constructor with PyYAML
yaml.add_constructor('tag:yaml.org,2002:python/object:argparse.Namespace', argparse_namespace_constructor)


Load the checkpoint

In [None]:
from pathlib import Path
import torch
import yaml
import os
from torch.utils.data import DataLoader
from dataloader_comma import CommaDataset


def get_regular_ckpt_from_lightning_checkpoint(state_dict):
    try:
        new_sd = {}
        for k, v in state_dict.items():
            nk = k.replace("model.", "") if k.startswith("model.") else k
            new_sd[nk] = v
        return new_sd
    except Exception:
        return state_dict

def find_latest_checkpoint_and_yaml(root_dir: str):
    root = Path(root_dir)
    ckpts = list(root.rglob("*.ckpt"))
    if not ckpts:
        raise FileNotFoundError(f"No .ckpt files found under {root_dir}")
    # last modified
    latest_ckpt = max(ckpts, key=lambda p: p.stat().st_mtime)

    # try to find a yaml in the same folder (or parent)
    yaml_file = None
    candidate_dirs = [latest_ckpt.parent, latest_ckpt.parent.parent]
    for d in candidate_dirs:
        if d is None: continue
        for ext in ("*.yaml", "*.yml"):
            ys = list(d.glob(ext))
            if ys:
                # pick latest yaml by mtime
                yaml_file = max(ys, key=lambda p: p.stat().st_mtime)
                break
        if yaml_file:
            break

    yaml_data = None
    if yaml_file:
        with yaml_file.open("r") as f:
            try:
                yaml_data = yaml.safe_load(f)
            except Exception:
                yaml_data = None

    return str(latest_ckpt), (str(yaml_file) if yaml_file else None), yaml_data

# Example:
root = "/kaggle/input/comma-ckpt-seed123/ckpts_final_comma_distance_none_True_1_clip"
ckpt_path, yaml_path, yaml_data = find_latest_checkpoint_and_yaml(root)
print("Checkpoint:", ckpt_path)
print("Yaml:", yaml_path)
# print("Yaml data:", yaml_data)


dataset_comma = CommaDataset(
    dataset_type="test", 
    use_transform=False,
    multitask="distance",
    ground_truth="desired",
    return_full=True,  
    dataset_path="/kaggle/input/final-hdf5-files",
    dataset_fraction=1.0
)

print(f"Dataset created with {len(dataset_comma)} samples")

dataloader_comma = DataLoader(
    dataset_comma,
    batch_size=1, 
    shuffle=False, 
    num_workers=0,
   
)

model = VTN(multitask='distance', backbone='none', concept_features= True, device = 'cuda:1', return_concepts=True, concept_source = 'clip')


# Safe loading of the checkpoint (map to CPU if necessary)
device = "cuda:1" if torch.cuda.is_available() else "cpu"
ckpt = torch.load(checkpoint_path)
state_dict = ckpt['state_dict']
state_dict = get_regular_ckpt_from_lightning_checkpoint(state_dict)
# if you want to avoid mismatch

try:
    model.load_state_dict(state_dict)
except Exception as e:
    print("Load state_dict strict failed, retrying with strict=False:", e)
    model.load_state_dict(state_dict, strict=False)
    
model.eval()
model = model.to(device)
print("Model loaded.")

In [None]:
elems = []
n_feats = []
p = '/kaggle/input/comma-ckpt-seed123/'

for elem in os.listdir(p):
    l = p + elem + "/lightning_logs"
    if len(os.listdir(p + elem)) == 0: continue
    for version in os.listdir(l):
        k = f'{l}/{version}/checkpoints/'
        if 'checkpoints' not in os.listdir(f'{l}/{version}'): continue 
        for filename in os.listdir(k):
            if "yaml" in filename:
                with open(k + filename, "r") as file:
                    yaml_data = yaml.load(file, Loader=yaml.FullLoader)
                    data_dict = vars(yaml_data)
                if 'scenario_type' in data_dict:
                    print(k + filename)
                    elems.append(k)
                    n_feats.append(data_dict['scenario_type'])

res = {}
for i, elem in enumerate(elems):
    task = []
    for filename in os.listdir(elem):
        if '.csv' in filename: 
            df = pd.read_csv(elem + filename)
            df.columns = ['preds', 'targets']
            m = (df['targets'] == 0).astype(bool) | (df['targets'] > 70).astype(bool)  if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            
            
            loss3 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            task.append(round(loss3.item(), 2))
            res[filename + n_feats[i]] = task

rows = []
for elem in res.keys():
    task = "dist" if "dist" in elem else ("angle" if 'angle' in elem else "multitask")
    dataset = "comma"
    size = 100 if '100' in elem else (300 if '300' in elem else (24 if '24' in elem else 48 if '48' in elem else "none"))
    val = res[elem]
    if task == 'multitask' or size == "none": continue
    rows.append({'task': task, 'dataset': dataset, "MAE": val[0], "size":size})

In [None]:
p = '/kaggle/input/comma-ckpt-seed123/'

experiments = os.listdir(p)
res = {}
res_abl = {}
res_ckpt = {}
for elem in experiments:
    if 'ablation' in elem or "0.75" in elem or '0.25' in elem or "1" in elem or '0.5' in elem: continue
    path = p + elem + "/lightning_logs/" 
    if len(os.listdir(p + elem)) == 0: continue
    vs = os.listdir(path)
    filt = []
    for elem1 in vs: 
        if 'version' in elem1:
            filt.append(elem1)
    versions =[int(elem.split("_")[-1])for elem in filt]
    versions = sorted(versions)
    if len(versions) == 0: continue
    version = f"version_{versions[-1]}"
    #print(versions, version)
    

    checkpoint_path = path + version + "/checkpoints/"
    if "checkpoints" not in os.listdir(path + version): continue
    files = os.listdir(checkpoint_path)

    task = []
    ckpt = []
    task_abl = []
    files = sorted(files)

    if "hparams.yaml" in files: 
        with open(checkpoint_path + "/hparams.yaml" , "r") as file:   
            yaml_data = yaml.load(file, Loader=yaml.FullLoader)
            data_dict = vars(yaml_data)
            if 'dataset_fraction' in data_dict:
                if  data_dict['dataset_fraction'] != 1:
                    print(elem)
                    continue
            if 'n_scenarios' in data_dict:
                if  data_dict['n_scenarios'] != 1 :
                    print(elem, data_dict['n_scenarios'])
                    continue
    for filename in files: 
        if filename.endswith(".csv"):
            df = pd.read_csv(checkpoint_path + filename)
            df.columns = ['preds', 'targets']
            m = (df['targets'] == 0).astype(bool) | (df['targets'] > 70).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            
            loss3 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            loss4 = rmse_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            if "00" in filename:
                task_abl.append(round(loss3.item(), 2))
            else:
                task.append((round(loss3.item(), 2),round(loss4.item(), 2)))
        if filename.endswith(".ckpt"):
            ckpt.append(checkpoint_path + '/' + filename)
    res[elem] = task
    if len(task_abl) != 0:
        res_abl[elem] = task_abl
    res_ckpt[elem] = ckpt
rows = []
for elem in res.keys():
    splitted = elem.split("_")
    data = splitted[2]
    task = splitted[3]
    backbone = splitted[4]
    if len(splitted) > 5:
        concept = splitted[5]
    else: 
        concept = False
    rows.append({"Concept": concept, "Task": task, "Backbone": backbone, "Dataset": data, "t-RMSE": res[elem]})

#pd.DataFrame(rows).sort_values('size').sort_values('task')


In [None]:
rows

In [None]:
# The list of dictionaries
df = pd.DataFrame(rows)

# Pivot the DataFrame
df_pivot = pd.pivot_table(df, values='t-RMSE', index=['Dataset', 'Concept', 'Concept'], columns='Task', aggfunc=lambda x: x)

# Rename the columns
df_pivot.columns = [f'{col}-MAE' for col in df_pivot.columns]
# Reset the index
df_pivot = df_pivot.reset_index()
#df_pivot['Feat. Size'] = [512, , 512, 768, 512, , 512, 768]
#df_pivot = df_pivot[["Dataset", "Concept", 'Feat. Size', 'angle-MAE', 'distance-MAE', 'multiangle-MAE', 'multidistance-MAE']].round(2)



In [None]:
df_pivot

In [None]:
df

In [None]:
df_pivot = df_pivot[["Dataset", "Concept", 'angle-MAE', 'distance-MAE', 'multitask-MAE']].round(2)
df_pivot

In [None]:

p = '/kaggle/working/preds/'

experiments = os.listdir(p)
res = {}
res_ckpt = {}
for filename in experiments: 
    if filename.endswith(".csv"):
        df = pd.read_csv(p + filename)
        df.columns = ['preds', 'targets']
        m = (df['targets'] == 0).astype(bool) | (df['targets'] > 40).astype(bool)  if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
        
        loss3 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
        res[filename] = round(loss3.item(), 2)
rows = []
for elem in res.keys():
    splitted = elem.split("_")
    print(elem)
    task = "multi" if 'multi' in elem else ("angle" if 'angle' in elem else 'distance')
    if "multi" in task:
        task = task + ("angle" if "angle" in elem else "distance")
    backbone = "clip"  
    data = 'comma'
    rows.append({"Task": task, "Concept": backbone, "Dataset": data, "t-RMSE": round(res[elem], 2)})

In [None]:
# The list of dictionaries
df = pd.DataFrame(rows)
data = rows

# Convert the list to a DataFrame
df = pd.DataFrame(data)

# Pivot the DataFrame
df_pivot = pd.pivot_table(df, values='t-RMSE', index=['Dataset', 'Concept'], columns='Task', aggfunc=lambda x: x)

# Rename the columns
df_pivot.columns = [f'{col}-MAE' for col in df_pivot.columns]
# Reset the index   
df_pivot = df_pivot.reset_index()
#df_pivot['Feat. Size'] = [512, , 512, 768, 512, , 512, 768]
#df_pivot = df_pivot[["Dataset", "Concept", 'Feat. Size', 'angle-MAE', 'distance-MAE', 'multiangle-MAE', 'multidistance-MAE']].round(2)



In [None]:

df_pivot = df_pivot[["Dataset", "Concept", 'angle-MAE', 'distance-MAE', 'multiangle-MAE', 'multidistance-MAE']].round(2)
df_pivot

In [None]:
p = '/kaggle/input/comma-ckpt-seed123/'

experiments = os.listdir(p)
res = {}
res_abl = {}
res_ckpt = {}
for elem in experiments:
    path = p + elem + "/lightning_logs/" 
    if len(os.listdir(p + elem)) == 0: continue
    vs = os.listdir(path)
    filt = []
    for elem1 in vs: 
        if 'version' in elem1:
            filt.append(elem1)
    versions =[int(elem.split("_")[-1])for elem in filt]
    versions = sorted(versions)
    if len(versions) == 0: continue
    version = f"version_{versions[-1]}"
    
    checkpoint_path = path + version + "/checkpoints/"
    if "checkpoints" not in os.listdir(path + version): continue
    files = os.listdir(checkpoint_path)

    task = []
    ckpt = []
    task_abl = []
    files = sorted(files)
    if sum([".csv" in filename for filename in files]) == 0: continue
    print(files)
    for filename in files: 
        if filename.endswith(".ckpt"):
            ckpt.append(checkpoint_path + '/' + filename)
    res_ckpt[elem] = ckpt

rows = []

for elem in res_ckpt.keys():
    minint = 1000000000
    for ckpt in res_ckpt[elem]:
        ckpt_num = int(ckpt.split('epoch=')[1].split("-step=")[0])
        if minint > ckpt_num: 
            minint = ckpt_num
        

    splitted = elem.split("_")
    data = splitted[2]
    task = splitted[3]
    backbone = splitted[4]
    if len(splitted) > 5:
        concept = splitted[5]
    else: 
        concept = backbone == "none"
    rows.append({"Concept": concept, "Task": task, "Concept_source": backbone, "Dataset": data, "ckpt": minint})




In [None]:
pd.DataFrame(rows).groupby(by='Backbone')

In [12]:
df = pd.DataFrame(rows)

In [17]:
srt = df.sort_values("Task").sort_values("Dataset")

In [None]:
p = '/kaggle/input/comma-ckpt-seed123'

experiments = os.listdir(p)
res = {}
res_abl = {}
res_ckpt = {}
for elem in experiments:
    if 'ablation' in elem or "0.75" in elem or '0.25' in elem or "1" in elem or '0.5' in elem: continue
    path = p + elem + "/lightning_logs/" 
    if len(os.listdir(p + elem)) == 0: continue
    vs = os.listdir(path)
    filt = []
    for elem1 in vs: 
        if 'version' in elem1:
            filt.append(elem1)
    versions =[int(elem.split("_")[-1])for elem in filt]
    versions = sorted(versions)
    if len(versions) == 0: continue
    version = f"version_{versions[-1]}"
    #print(versions, version)    

    checkpoint_path = path + version + "/checkpoints/"
    if "checkpoints" not in os.listdir(path + version): continue
    files = os.listdir(checkpoint_path)

    task = []
    ckpt = []
    task_abl = []
    files = sorted(files)

    if "hparams.yaml" in files: 
        with open(checkpoint_path + "/hparams.yaml" , "r") as file:   
            yaml_data = yaml.load(file, Loader=yaml.FullLoader)
            data_dict = vars(yaml_data)
            if 'dataset_fraction' in data_dict:
                if  data_dict['dataset_fraction'] != 1:
                    print(elem)
                    continue
            if 'n_scenarios' in data_dict:
                if  data_dict['n_scenarios'] != 1:
                    print(elem, data_dict['n_scenarios'])
                    continue
    for filename in files: 
        if filename.endswith(".csv"):
            df = pd.read_csv(checkpoint_path + filename)
            df.columns = ['preds', 'targets']
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 20).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            
            loss3 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 40).astype(bool) | (df['targets'].abs() <= 20).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss4 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 60).astype(bool) | (df['targets'].abs() <= 40).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss5 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() < 60).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss7 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            if "00" in filename:
                task_abl.append(round(loss3.item(), 2))
            else:
                task.append((round(loss3.item(), 2),round(loss4.item(), 2),round(loss5.item(), 2),round(loss7.item(), 2)))
        if filename.endswith(".ckpt"):
            ckpt.append(checkpoint_path + '/' + filename)
    res[elem] = task
    if len(task_abl) != 0:
        res_abl[elem] = task_abl
    res_ckpt[elem] = ckpt
rows = []
for elem in res.keys():
    splitted = elem.split("_")
    data = splitted[2]
    task = splitted[3]
    backbone = splitted[4]
    if len(splitted) > 5:
        concept = splitted[5]
    else: 
        concept = False
    rows.append({"Concept": concept, "Task": task, "Concept_source": backbone, "Dataset": data, "t-RMSE": res[elem]})

#pd.DataFrame(rows).sort_values('size').sort_values('task')


In [None]:
# The list of dictionaries
df = pd.DataFrame(rows)

# Pivot the DataFrame
df_pivot = pd.pivot_table(df, values='t-RMSE', index=['Dataset', 'Concept_source', 'Concept'], columns='Task', aggfunc=lambda x: x)

# Rename the columns
df_pivot.columns = [f'{col}-MAE' for col in df_pivot.columns]
# Reset the index
df_pivot = df_pivot.reset_index()
#df_pivot['Feat. Size'] = [512, , 512, 768, 512, , 512, 768]
#df_pivot = df_pivot[["Dataset", "Backbone", 'Feat. Size', 'angle-MAE', 'distance-MAE', 'multiangle-MAE', 'multidistance-MAE']].round(2)



In [None]:
df_pivot

In [None]:
df_pivot_comma = df_pivot[df_pivot['Dataset'] =='comma']
categories = ["[0,19]", "[20,40]", "[41,60]", "[60,]"]
for i in range(df_pivot_comma['distance-MAE'].shape[0]):
    angle = list(pd.Series(df_pivot_comma['distance-MAE'].iloc[i][0]).interpolate(method='linear'))
    x_positions = np.arange(len(categories))
    plt.plot(x_positions, list(angle))

# Relabel the x-axis tick positions with the original categorical values
plt.xticks(x_positions, categories)
plt.title("Comma2k19",fontsize=16)
plt.legend(['CLIP', "Retinanet"], fontsize=13)
plt.xlabel('Distance', fontsize=16)

plt.ylabel('MAE', fontsize=16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.savefig("dist_comma.pdf")


In [None]:
p = '/data1/jessica/data/toyota/ckpts_final/'

experiments = os.listdir(p)
res = {}
res_abl = {}
res_ckpt = {}
for elem in experiments:
    if 'ablation' in elem or "0.75" in elem or '0.25' in elem or "1" in elem or '0.5' in elem: continue
    path = p + elem + "/lightning_logs/" 
    if len(os.listdir(p + elem)) == 0: continue
    vs = os.listdir(path)
    filt = []
    for elem1 in vs: 
        if 'version' in elem1:
            filt.append(elem1)
    versions =[int(elem.split("_")[-1])for elem in filt]
    versions = sorted(versions)
    if len(versions) == 0: continue
    version = f"version_{versions[-1]}"
   
    checkpoint_path = path + version + "/checkpoints/"
    if "checkpoints" not in os.listdir(path + version): continue
    files = os.listdir(checkpoint_path)

    task = []
    ckpt = []
    task_abl = []
    files = sorted(files)

    if "hparams.yaml" in files: 
        with open(checkpoint_path + "/hparams.yaml" , "r") as file:   
            yaml_data = yaml.load(file, Loader=yaml.FullLoader)
            data_dict = vars(yaml_data)
            if 'dataset_fraction' in data_dict:
                if  data_dict['dataset_fraction'] != 1:
                    print(elem)
                    continue
            if 'n_scenarios' in data_dict:
                if  data_dict['n_scenarios'] != 1:
                    print(elem, data_dict['n_scenarios'])
                    continue
    for filename in files: 
        if filename.endswith(".csv"):
            df = pd.read_csv(checkpoint_path + filename)
            df.columns = ['preds', 'targets']
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 10).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            
            loss3 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 20).astype(bool) | (df['targets'].abs() <= 10).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss4 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 30).astype(bool) | (df['targets'].abs() <= 20).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss5 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            m = (df['targets'] == 0).astype(bool) | (df['targets'].abs() > 40).astype(bool) | (df['targets'].abs() <= 30).astype(bool)  #if "angle" not in elem  else (df['targets'] == np.inf).astype(bool)
            loss6 = mae_loss(torch.tensor(df['preds']),  torch.tensor(df['targets']), torch.tensor(m))
            if "00" in filename:
                task_abl.append(round(loss3.item(), 2))
            else:
                task.append((round(loss3.item(), 2),round(loss4.item(), 2),round(loss5.item(), 2),round(loss5.item(), 2)))
        if filename.endswith(".ckpt"):
            ckpt.append(checkpoint_path + '/' + filename)
    res[elem] = task
    if len(task_abl) != 0:
        res_abl[elem] = task_abl
    res_ckpt[elem] = ckpt
rows = []
for elem in res.keys():
    splitted = elem.split("_")
    data = splitted[2]
    task = splitted[3]
    backbone = splitted[4]
    if len(splitted) > 5:
        concept = splitted[5]
    else: 
        concept = False
    rows.append({"Concept": concept, "Task": task, "Concept_source": backbone, "Dataset": data, "t-RMSE": res[elem]})

#pd.DataFrame(rows).sort_values('size').sort_values('task')


In [None]:
# The list of dictionaries
df = pd.DataFrame(rows)

# Pivot the DataFrame
df_pivot = pd.pivot_table(df, values='t-RMSE', index=['Dataset', 'Concept_source', 'Concept'], columns='Task', aggfunc=lambda x: x)

# Rename the columns
df_pivot.columns = [f'{col}-MAE' for col in df_pivot.columns]
# Reset the index
df_pivot = df_pivot.reset_index()
#df_pivot['Feat. Size'] = [512, , 512, 768, 512, , 512, 768]
#df_pivot = df_pivot[["Dataset", "Backbone", 'Feat. Size', 'angle-MAE', 'distance-MAE', 'multiangle-MAE', 'multidistance-MAE']].round(2)


In [None]:
df_pivot

In [None]:
df_pivot_comma = df_pivot[df_pivot['Dataset'] =='comma']
categories = ["[0,9]", "[10,19]", "[20,19]", "[30,40]"]
for i in range(df_pivot_comma['angle-MAE'].shape[0]):
    angle = list(pd.Series(df_pivot_comma['angle-MAE'].iloc[i][0]).interpolate(method='linear'))
    x_positions = np.arange(len(categories))
    plt.plot(x_positions, list(angle))

# Relabel the x-axis tick positions with the original categorical values
plt.xticks(x_positions, categories)
plt.legend(['CLIP', "Retinanet"],fontsize=13)
plt.xlabel('Steering Angle', fontsize=16)
plt.ylabel('MAE', fontsize=16)
plt.title("Comma2k19",fontsize=16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.savefig("angle_comma.pdf")