In [None]:
import json
from collections import defaultdict, Counter
with open('output.json', 'r') as f:
    output = json.load(f)

In [None]:
rack_types = ['rack_1', 'rack_2', 'rack_3', 'rack_4']
box_types = ['klt_box_full', 'klt_box_empty']

capacities_by_rack = {
    'rack_1': 4 * 6 * 2,  # 48
    'rack_2': 3 * 3 * 3 + 1 * 4 * 3,  # 39
    'rack_3': 3 * 1 * 2,  # 6
    'rack_4': 3 * 2 * 2 + 2 * 2  # 16
}

n_shelfes = {
    'rack_1': 4,  # 48
    'rack_2': 4,  # 39
    'rack_3': 3,
    'rack_4': 5
}

racks = defaultdict(
    lambda: {
        'label': None,
        'first_frame': None,
        'x_min_on_first_frame': None,
        'confidences': [],
        'average_confidence': None,
        'boxes': [],
        'relative_boxes': [],
        'box_on_shelf': []
    }
)
frame_out = output['5']
frame_out

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
for frame_number, detections in output.items():
    box_threshold = 0.5
    racks_in_frame = []
    boxes_in_frame = []
    for detection in  detections:
            object_id = detection['obj_id']
            label = detection['obj_label']
            x_min = detection['detector_bbox'][0]
            x_max = x_min + detection['detector_bbox'][2]
            y_min = detection['detector_bbox'][1]
            y_max = y_min + detection['detector_bbox'][3]
            
            if detection['obj_label'] in rack_types:        

                racks[object_id]['label'] = label
                racks[object_id]['confidences'].append(detection['confidence'])
                racks_in_frame.append((object_id, x_min, x_max,  y_min, y_max))

                # if racks[object_id]['first_frame'] is None:
                #     racks[object_id]['first_frame'] = int(frame_number)
                #     racks[object_id]['x_min_on_first_frame'] = x_min
    
            if detection['obj_label'] in box_types and detection['confidence'] > box_threshold:
                boxes_in_frame.append((object_id, label, x_min, x_max, y_min, y_max))

    for box in boxes_in_frame:
            object_id_box, label_box, x_min_box, x_max_box, y_min_box, y_max_box = box
            box_x_centroid = 0.5 * (x_min_box + x_max_box)
            for rack in racks_in_frame:
                object_id_rack, x_min_rack, x_max_rack, y_min_rack, y_max_rack = rack
                rack_height = y_max_rack - y_min_rack
                rack_width = x_max_rack - x_min_rack
                if box_x_centroid < x_max_rack and box_x_centroid > x_min_rack:
                    racks[object_id_rack]['boxes'].append((object_id_box, label_box, x_min_box, x_max_box))
                    racks[object_id_rack]['relative_boxes'].append((
                        object_id_box,
                        label_box,
                        max((x_min_box - x_min_rack) / rack_width, 0),
                        min((x_max_box - x_min_rack) / rack_width, 1),
                        max((y_min_box - y_min_rack) / rack_height, 0),
                        min((y_max_box - y_min_rack) / rack_height, 1)
                    ))
    for rack, items in racks.items():
        if len(racks[rack]['box_on_shelf']) > 10:
            continue
        y_min_list = [box[4] for box in racks[rack]['relative_boxes']]
        y_max_list = [box[5] for box in racks[rack]['relative_boxes']]
        if len(y_min_list) < 5:
            continue
        d = {
            'y_min': y_min_list,
            'y_max': y_max_list
        }
        df = pd.DataFrame.from_dict(d)
        rack_type = racks[rack]['label']
        # Create a model based on 3 centroids
        model = KMeans(n_clusters=n_shelfes[rack_type], init='k-means++', n_init=100, max_iter=1000)
        # Fit to the data and predict the cluster assignments for each data point
        km_clusters = model.fit_predict(df.values)
        df['cluster'] = km_clusters
        sorted_df = df.groupby('cluster').mean().sort_values(by='y_min', ascending=False)
        sorted_df['shelf'] = range(1, sorted_df.shape[0] + 1)
        racks[rack]['box_on_shelf'].append(Counter([int(sorted_df.iloc[pred]['shelf']) for pred in km_clusters]))
# for rack, items in racks.items():
#     print(racks[33]['relative_boxes'])


In [None]:
 racks[rack]['box_on_shelf']

In [None]:
Counter([1, 3, 3, 2, 3, 4])

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
for rack, items in racks.items():
    y_min_list = [box[4] for box in racks[rack]['relative_boxes']]
    y_max_list = [box[5] for box in racks[rack]['relative_boxes']]
    if len(y_min_list) < 5:
       continue
    d = {
        'y_min': y_min_list,
        'y_max': y_max_list
    }
    df = pd.DataFrame.from_dict(d)
    rack_type = racks[rack]['label']
    # Create a model based on 3 centroids
    model = KMeans(n_clusters=n_shelfes[rack_type], init='k-means++', n_init=100, max_iter=1000)
    # Fit to the data and predict the cluster assignments for each data point
    km_clusters = model.fit_predict(df.values)
    df['cluster'] = km_clusters
    sorted_df = df.groupby('cluster').mean().sort_values(by='y_min', ascending=False)
    sorted_df['shelf'] = range(1, sorted_df.shape[0] + 1)
    racks[rack]['box_on_shelf'] = [int(sorted_df.iloc[pred]['shelf']) for pred in km_clusters]
    # View the cluster assignments
    #print(n_shelfes[rack_type])

In [None]:
sorted_racks = sorted(list(racks.values()), key=lambda x: (x['first_frame'], x['x_min_on_first_frame']))

In [None]:
for rack in sorted_racks:
    rack['average_confidence'] = np.mean(rack['confidences'])

In [None]:
sorted_and_filtered_racks = list(filter(lambda rack: len(rack['confidences']) > 90 and rack['average_confidence'] > 0.7, sorted_racks))

In [None]:
for rack in sorted_and_filtered_racks:
    rack['boxes'] = Counter(
        [
            box[1] 
            for box, count in dict(Counter(rack['boxes'])).items()
            if count > 0.33 * len(rack['confidences'])
        ]
    )

In [None]:
sorted_and_filtered_racks[0]['box_on_shelf']

In [None]:
km_clusters

In [None]:
racks[rack]['box_on_shelf']
    

In [None]:
km_clusters

sorted_df.iloc[2]['shelf']
[int(sorted_df.iloc[pred]['shelf']) for pred in km_clusters]

In [None]:
df

In [None]:
sorted_df

In [None]:
y_min_list = [box[4] for box in racks[33]['relative_boxes']]
y_max_list = [box[5] for box in racks[33]['relative_boxes']]

import pandas as pd
d = {
   'y_min': y_min_list,
   'y_max': y_max_list
}
df = pd.DataFrame.from_dict(d)

In [None]:
from sklearn.cluster import KMeans

# Create a model based on 3 centroids
model = KMeans(n_clusters=3, init='k-means++', n_init=100, max_iter=1000)
# Fit to the data and predict the cluster assignments for each data point
km_clusters = model.fit_predict(df.values)
# View the cluster assignments
km_clusters

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
def plot_clusters(samples, clusters):
    col_dic = {0:'blue',1:'green',2:'orange'}
    mrk_dic = {0:'*',1:'x',2:'+'}
    colors = [col_dic[x] for x in clusters]
    markers = [mrk_dic[x] for x in clusters]
    for sample in range(len(clusters)):
        plt.scatter(samples[sample][0], samples[sample][1], color = colors[sample], marker=markers[sample], s=100)
    plt.xlabel('Dimension 1')
    plt.ylabel('Dimension 2')
    plt.title('Assignments')
    plt.show()

plot_clusters(np.array(df), km_clusters)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
%matplotlib inline

# Create 10 models with 1 to 10 clusters
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters = i)
    # Fit the data points
    kmeans.fit(df.values)
    # Get the WCSS (inertia) value
    wcss.append(kmeans.inertia_)
    
#Plot the WCSS values onto a line graph
plt.plot(range(1, 11), wcss)
plt.title('WCSS by Clusters')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()