In [None]:
import json
import pandas as pd
import numpy as np
import pickle
from cluster_plots import plot_2d, plot_silhouette
from extract_extrinsic_camera_position import download_reconstruction_data, download_images
import os
from itertools import product
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from scipy.spatial.distance import euclidean
from pathlib import Path

scenes_dict = {
        "grossmünster": {"id" : "311/918", "recon": "0", "clusters": 7},
        "tower_bridge": {"id" : "001/945", "recon": "0", "clusters": 20},
    }

selected_scene = "tower_bridge"

scene_id = scenes_dict[selected_scene]["id"]
reconstruction_id = scenes_dict[selected_scene]["recon"]


In [None]:
if False:
    download_reconstruction_data(location_id=scene_id, reconstruction_id=reconstruction_id)

    with open(f"camera-location/{scene_id}/{reconstruction_id}/coordinates.json", "r") as f:
        coordinates = json.load(f)

In [None]:
df = pd.read_json(f"camera-location/{scene_id}/{reconstruction_id}/coordinates.json", orient="index")
df = df.set_axis(["x", "y", "z"], axis=1)
df.reset_index(inplace=True, names="camera_id")
df.head()

In [None]:
"""range_n_clusters = [2, 3, 4, 5, 6]

for n_clusters in range_n_clusters:
    # Initialize the clusterer with n_clusters value and a random generator
    # seed of 10 for reproducibility.
    clusterer = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = clusterer.fit_predict(x)

    # Create a subplot with 1 row and 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(13, 5)

    plot_silhouette(axis=ax1, data=x, labels=cluster_labels, n_clusters=n_clusters)
    plot_2d(axis=ax2, data=x, model=clusterer, labels=cluster_labels, n_clusters=n_clusters)
    
    plt.show()"""

In [None]:
if not scenes_dict[selected_scene]["clusters"]:
    number_of_clusters = 20
else:
    number_of_clusters = scenes_dict[selected_scene]["clusters"]

model = KMeans(n_clusters = number_of_clusters, random_state=42)
x = df[["x", "y", "z"]].values

cluster_labels = model.fit_predict(x)
df["assigned_label"] = cluster_labels

# Create a subplot with 1 row and 2 columns
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(10, 4)

plot_silhouette(axis=ax1, data=x, labels=cluster_labels, n_clusters=number_of_clusters)
plot_2d(axis=ax2, data=x, model=model, labels=cluster_labels, n_clusters=number_of_clusters, cutoff_axis=35)

In [None]:
def distance_to_cluster_centers(row, centers):
    distances = []
    for center in centers:
        distances.append(euclidean(center, row.values))
    
    return distances

cluster_names = [f"dist_cluster_{i}" for i in range(len(model.cluster_centers_))]
df[cluster_names] = df[["x", "y", "z"]].apply(distance_to_cluster_centers, args=(model.cluster_centers_, ), axis=1, result_type="expand")
df = df.loc[~df.index.duplicated(),:].copy()
df.head(5)

In [None]:
def z_scores(cluster_columns):
    z_columns = []
    z_names = []
    for series_name, series in cluster_columns.items():
        z_columns.append((series - series.mean())/series.std())
        z_columns[-1].rename(f"z_score_{series_name.split('_')[-1]}", inplace=True)

    return pd.concat(z_columns, axis=1)

df_z = pd.concat([df, z_scores(df.loc[:,df.columns.str.contains('dist_cluster*')])], axis = 1)
df_z = df_z.loc[~df_z.index.duplicated(),:].copy()
df_z.head(5)

In [None]:
def is_outlier(row, threshold=2.5):
    assigned_cluster = int(row["assigned_label"])
    outlier = int(np.abs(row[f"z_score_{assigned_cluster}"]) > threshold)
    return outlier

outlier_quantile = 0.95

def is_quantile_outlier(df, quantile=outlier_quantile):
    dist_names = [n for n in df.columns if "dist_cluster" in n]
    quantiles = np.diagonal(df.groupby("assigned_label").quantile(quantile)[dist_names])
    quantile_list = [quantiles[clus] for clus in df_z["assigned_label"]]
    cluster_distances = np.array([df_z[dist_names].to_numpy()[row,cluster] for row, cluster in enumerate(df_z["assigned_label"])])
    is_outlier = cluster_distances > quantile_list
    print(f"proportion of outliers: {sum(is_outlier) /len(cluster_distances)}") # # sanity check 
    return pd.Series(is_outlier, index=df.index)

#df_z["is_outlier"] = df_z.apply(is_outlier, axis=1) # this does not work.........
df_z["is_outlier"] = is_quantile_outlier(df_z)
df_z.head()

In [None]:
with open(f"./image-paths/{scene_id}/{reconstruction_id}/outlier_list.pickle", "wb") as file:
    pickle.dump(df_z[df_z["is_outlier"]]["camera_id"].tolist(), file)
print("Saved outlier list.")

In [None]:
download_images(location_id=scene_id, reconstruction_id=reconstruction_id, outlier_only=True)

In [None]:
outliers = df_z[df_z["is_outlier"]]["camera_id"].tolist()
with open(f"./image-paths/{scene_id}/{reconstruction_id}/paths.json", "r") as file:
    paths = json.load(file)

In [None]:
"""fig, ax = plt.subplots()
# only your own z score
for i in range(len(model.cluster_centers_)):
    df_z[df_z["assigned_label"] == i][f"z_score_{i}"].plot.hist(ax=ax)
    ax.legend()"""

In [None]:
print(df_z.groupby("assigned_label")["is_outlier"].sum())

for cluster in range(len(model.cluster_centers_)):
    print(f"Showing outliers for cluster {cluster}")


In [None]:
cluster_df = pd.DataFrame(model.cluster_centers_, columns= ["x", "y", "z"])
cluster_df["color"] = "red"
cluster_df["camera_id"] = df_z["camera_id"]
cluster_df

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import Output, VBox

color_clusters = px.colors.sequential.Plasma[:len(model.cluster_centers_)]
color_map = {idx: color for idx, color in enumerate(color_clusters)}

"""layout = go.Layout(
             scene=dict(
                 aspectmode='data'
         ))"""

fig = px.scatter_3d(df_z, x="x", y="y", z="z", 
            color = "assigned_label", 
            opacity = 0.3,
            width=800,
            height=800,
            color_discrete_map=color_map,
            hover_data=["camera_id"],
            )

fig.update_scenes(aspectmode='data')
camera = dict(
    eye=dict(x=2, y=2, z=0.1)
)

fig.update_layout(scene_camera=camera)

fig.add_traces(
    px.scatter_3d(df_z[df_z["is_outlier"] == 1], x="x", y="y", z="z",
        opacity = 1,
        symbol="is_outlier",
        symbol_sequence=['diamond-open'],
        hover_data=["camera_id"],
    ).update_traces(marker_line_width=2).data
)

fig.add_traces(
    px.scatter_3d(
        cluster_df, x="x", y="y", z="z", 
        symbol="color",
        symbol_sequence=["x"],
        color="color"
    ).data
)

fig.show()

In [None]:
def get_image_paths_by_ids(id_list:list, scene_id:str, reconstruction_id:str)->str:
    path_metadata = os.path.normpath(f"./image-paths/{scene_id}/{reconstruction_id}/paths.json")
    with open(path_metadata, "r") as file:
        metadata_paths = json.load(file)

    path_prefix = f"./images/{scene_id}/"
    return [os.path.join(path_prefix, img_path) for idx, img_path in metadata_paths.items() if int(idx) in id_list]

In [None]:
outliers_per_cluster = []
paths_per_cluster = []
jinja_export = []
for cluster_id in range(number_of_clusters):
    o_per_cluster = df_z[df_z["is_outlier"] & (df_z["assigned_label"] == cluster_id)]["camera_id"].tolist()
    p_per_cluster = get_image_paths_by_ids(id_list=o_per_cluster, scene_id=scene_id, reconstruction_id=reconstruction_id)
    outliers_per_cluster.append(o_per_cluster)
    paths_per_cluster.append(p_per_cluster)
    jinja_export.append([(o, p) for o, p in zip(o_per_cluster, p_per_cluster)])
    

In [None]:
from jinja2 import Template, Environment, FileSystemLoader

env = Environment(loader=FileSystemLoader('./html/templates'))
template = env.get_template('outlier-export.html')
output_from_parsed_template = template.render(
        scene_id=scenes_dict[selected_scene]["id"],
        scene_name=selected_scene,
        recon_id=scenes_dict[selected_scene]["recon"],
        clusters=jinja_export,
    )

html_path = Path(os.path.join("html", "exports"))
html_path.mkdir(parents=True, exist_ok=True)

html_name = f'scene{scenes_dict[selected_scene]["id"].replace("/", "")}-recon{scenes_dict[selected_scene]["recon"]}.html'
with open(os.path.join(html_path, html_name), "w") as fh:
    fh.write(output_from_parsed_template) 

In [None]:
"""import matplotlib.pyplot as plt
import matplotlib.image as mpimg

n_horizontal = 3

for idx, (cam_ids, image_paths) in enumerate(zip(outliers_per_cluster, paths_per_cluster)):
    print(f"For cluster: {idx}")
    plt.figure()
    
    for cam_id, image_path in zip(cam_ids, image_paths):
        image = mpimg.imread(f"{image_path}")
        plt.imshow(image)
        plt.show()"""