In [1]:
import os
import pandas
from pandas import DataFrame
import onshape.brepio as brepio
import json
from IPython.display import JSON as DJSON
from IPython.display import clear_output
import numpy as np
from utils import adjacency_list, adjacency_list_from_brepdata, connected_components, homogenize, adjacency_matrix
import sys
import time

In [2]:
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

In [3]:
datapath = '/projects/grail/benjones/cadlab'
loader = brepio.Loader(datapath)

In [4]:
occs, mates = loader.load_flattened('b1962d9eb0863de9d3befdfb_0e59c1f11f56bbf035659f09_a60cbcfa67d608b28ffb10fa.json',geometry=False)

In [86]:
path = os.path.join(datapath, 'data','flattened_assemblies','2af72abb876be93ec0053c43_bbd975735483a4a23906294c_d2bc5f905323eb86065a54c3.json')
with open(path) as f:
    s = json.load(f)

In [87]:
RenderJSON(s)

### things to count:
- no mates
- disconnected components
- hisogram of number of moving parts

datalistpath = os.path.join('data', 'dof_assemblies.txt')
with open(datalistpath) as f:
    datalist = [[el[1] for el in enumerate(l.strip().split('/')[-7:]) if el[0] == 0 or el[0] == 4 or el[0] == 6] for l in f.readlines()]

## Make dataframes

In [4]:
generate_dataframes=False
name = '/fast/jamesn8/assembly_data/assembly_data2.h5'

In [5]:
if generate_dataframes:
    with open('filter_list.txt') as f:
        filter_list = f.readlines()
    print('making filter set')
    filter_set = set()
    for l in filter_list:
        filter_set.add(l.strip())
    del filter_list
    
    assembly_rows = []
    assembly_indices = []

    part_rows = []

    mate_rows = []

    j=0 #assembly id
    for entry in os.scandir(os.path.join(datapath,'data','flattened_assemblies')):
        #if j > 3:
        #    break
        if not entry.name.endswith('.json') or entry.name in filter_set:
            continue

        try:
            occs, mates = loader.load_flattened(entry.name,geometry=False)
        except KeyError:
            continue

        if j % 500 == 0:
            clear_output(wait=True)
            display(f'num_processed: {j}')


        adj = homogenize(adjacency_list_from_brepdata(occs, mates))
        num_lone = len([el for el in adj[:,0] if el < 0])
        #num_connections = np.sum(adj, 0)
        #num_lone = len([c for c in num_connections if c == 0])

        num_components = connected_components(adj)
        num_rigid = connected_components(adj, connectionType='fasten')

        assembly_rows.append([os.path.splitext(entry.name)[0], num_components, num_rigid])
        assembly_indices.append(np.int32(j))

        for mate in mates:
            if len(mate.matedEntities) == 2:
                axes = []
                origins = []
                for me in mate.matedEntities:
                    tf = occs[me[0]][0]
                    newaxes = tf[:3, :3] @ me[1][1]
                    neworigin = tf[:3,:3] @ me[1][0] + tf[:3,3]
                    axes.append(newaxes)
                    origins.append(neworigin)
                mate_rows.append([np.int32(j), mate.matedEntities[0][0], mate.matedEntities[1][0], mate.type, origins[0].astype(np.float32), axes[0].astype(np.float32), origins[1].astype(np.float32), axes[1].astype(np.float32)])

        for occ in occs:
            part_rows.append([np.int32(j), occ, occs[occ][1]['documentId'], occs[occ][1]['documentMicroversion'], occs[occ][1]['elementId'],occs[occ][1]['fullConfiguration'],occs[occ][1]['partId']])

        j += 1

    print('building dataframes...')
    assembly_df = DataFrame(assembly_rows, index=assembly_indices, columns=['AssemblyPath','ConnectedComponents','RigidPieces'])
    mate_df = DataFrame(mate_rows, columns=['Assembly','Part1','Part2','Type','Origin1','Axes1','Origin2','Axes2'])
    part_df = DataFrame(part_rows, columns=['Assembly','PartOccurrenceID','did','mv','eid','config','PartId'])

    print('saving dataframes...')
    assembly_df.to_hdf(name,'assembly')
    mate_df.to_hdf(name,'mate')
    part_df.to_hdf(name,'part')
    del assembly_rows
    del part_rows
    del mate_rows
    del assembly_indices
else:
    assembly_df = pandas.read_hdf(name,'assembly')
    mate_df = pandas.read_hdf(name,'mate')
    part_df = pandas.read_hdf(name,'part')

In [13]:
assembly_df

Unnamed: 0,AssemblyPath,ConnectedComponents,RigidComponents,LoneParts
0,58aace5054540c1fba909bab_ac500a73d2324ea4fa3af...,1,40,0
1,22da7798aeb64e7fa324441b_f6b092ab676335605c8bd...,9,10,8
2,fcb368fc6fe7bc05018166a5_40bf905092772433681b9...,8,9,7
3,1cb6975490fc95c9b8bee219_6115dafce42a983d94516...,1,1,0
4,516215b6b243e909153fc811_c82f99e1fc73866dd6137...,4,20,0
...,...,...,...,...
114980,35643154ddf9fdf2beeaa0b4_38b1f115023e745b129fb...,1,3,0
114981,1a3d947712c59dc0a548809f_d0a8684c411e9ea5e7f74...,9,14,8
114982,b907cf0f75c6a518d69b63ad_3621598c6de35944a757b...,1,12,0
114983,af00672ea3bca15e6ff0dc97_4528afd393ba3d40fcb24...,4,7,2


### Generate statistics (if not already present)

In [14]:
def generate_statistics(assembly_df, part_df, mate_df):
    part_df_indexed = part_df.set_index('Assembly')
    mate_df_indexed = mate_df.set_index('Assembly')
    statistics = []

    for index in assembly_df.index:
        if index % 500 == 0:
            clear_output(wait=True)
            display(f'num_processed: {index}')

        try:
            parts = part_df_indexed.loc[index]
        except KeyError:
            occs = []
        else:
            if parts.ndim == 2:
                occs = list(parts['PartOccurrenceID'])
            else:
                occs = [parts['PartOccurrenceID']]

        try:
            mates = mate_df_indexed.loc[index]
        except KeyError:
            mates_tuples = []
        else:
            if mates.ndim == 2:
                mates_tuples = [(row[0], row[1], row[2]) for row in mates.itertuples(index=False)]
            else:
                mates_tuples = [(mates[0], mates[1], mates[2])]
        if len(occs) == 0:
            statistics.append([0, 0, 0])
        else:
            adj_list = adjacency_list(occs, mates_tuples)
            adj = homogenize(adj_list)
            num_lone = adj.shape[0] if adj.shape[1] == 0 else len([el for el in adj[:,0] if el < 0])
            num_components = connected_components(adj)
            num_rigid = connected_components(adj, connectionType='fasten')
            #print('num_rigid',num_rigid)
            #print('num_components',num_components)
            #print('num_lone',num_lone)
            statistics.append([num_components,num_rigid,num_lone])
    statistics_np = np.array(statistics)
    assembly_df['ConnectedComponents'] = statistics_np[:,0]
    assembly_df['RigidComponents'] = statistics_np[:,1]
    assembly_df['LoneParts'] = statistics_np[:,2]

fixedname = '/fast/jamesn8/assembly_data/assembly_data2.h5'

assembly_df.to_hdf(fixedname,'assembly','w')

mate_df.to_hdf(fixedname,'mate','r+')
part_df_fixed.to_hdf(fixedname,'part','r+')

### Count parts in each assembly

In [15]:
mate_groups = mate_df.groupby('Assembly')
mate_counts = mate_groups['Part1'].count()
part_groups = part_df.groupby('Assembly')
part_counts = part_groups['PartOccurrenceID'].count()

part_counts.sort_values(ascending=False, inplace=True)
joined = pandas.concat([part_counts, mate_counts, assembly_df], axis=1, join='inner').rename(columns={'PartOccurrenceID':'PartCount', 'Part1':'MateCount'})

In [16]:
joined[joined['PartCount'] > 1000]

Unnamed: 0,PartCount,MateCount,AssemblyPath,ConnectedComponents,RigidComponents,LoneParts
34578,46647,98,b1962d9eb0863de9d3befdfb_0e59c1f11f56bbf035659...,46549,46549,46500
88748,18985,2321,5cb81916135ff1e97304faa5_a62a53175bf488f00d7b6...,16664,16665,15118
113616,11155,13183,038595d4c14be05f13549193_3920cd08854308b9a5994...,1014,7098,0
8259,10180,1804,7220271e301f7f432ac33c73_3dfbeb0d1bcb97844902c...,8430,8484,8159
18250,5414,5,2741239cb1173efbe46fa076_cc71945206ba647d6e194...,5409,5409,5408
...,...,...,...,...,...,...
56533,1010,5,4154da5a354ebfc511a03049_6be6e3cd3cc5e5b7fd0c3...,1006,1008,1005
99422,1009,2,58f8a2851f2965103997314d_4c8173750ddf67e0b7173...,1007,1007,1006
58263,1007,60,458afc4b6e07ef51c4d54197_adb5fbc2aa6b15f527984...,963,971,952
23235,1005,59,c689d0fd1283250a6318e45d_0e3e5139ee11da9647588...,962,970,952


### Filter out mates with the same part

In [18]:
mate_df_filtered = mate_df[mate_df['Part1'] != mate_df['Part2']]

### Find mates between the same part (TODO: handle order)

In [19]:
mate_df_indexed = mate_df_filtered.copy()
mate_df_indexed['MateID'] = mate_df_indexed['Assembly'].astype(str) + '-' + mate_df_indexed['Part1'] + '-' + mate_df_indexed['Part2']
mate_df_indexed.set_index('MateID', inplace=True)
mate_duplicates = mate_df_indexed[mate_df_indexed.index.duplicated(False)]

In [20]:
mate_duplicates

Unnamed: 0_level_0,Assembly,Part1,Part2,Type,Origin1,Axes1,Origin2,Axes2
MateID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10-MS8WYyXlytoCXG1pZ-MMb6JZl4ks61uaBwr,10,MS8WYyXlytoCXG1pZ,MMb6JZl4ks61uaBwr,PLANAR,"[-0.1109657, 0.0006012997, -0.019294163]","[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0,...","[-0.10084174, 0.0076013, -0.019294163]","[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0,..."
10-MS8WYyXlytoCXG1pZ-MMb6JZl4ks61uaBwr,10,MS8WYyXlytoCXG1pZ,MMb6JZl4ks61uaBwr,FASTENED,"[-0.10084174, -0.0063987, -0.019294163]","[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0,...","[-0.10084174, -0.0063987, -0.019294163]","[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0,..."
19-MuLlRrV+etDEQ59tI-McIwN7KZ0OTQieKHB,19,MuLlRrV+etDEQ59tI,McIwN7KZ0OTQieKHB,PLANAR,"[0.8919538, -0.21571445, -0.17983559]","[[1.0, -1.4311469e-16, 1.5065959e-15], [-1.368...","[0.8919538, -0.2250271, -0.17783557]","[[1.0, -2.8622937e-17, 8.6042284e-16], [-2.797..."
19-MuLlRrV+etDEQ59tI-McIwN7KZ0OTQieKHB,19,MuLlRrV+etDEQ59tI,McIwN7KZ0OTQieKHB,PLANAR,"[0.8919538, -0.07840066, 0.41417435]","[[1.0, 1.4155344e-15, 1.4311469e-16], [-1.3879...","[0.8919538, -0.21573521, -0.22529466]","[[1.0, 8.6042284e-16, 2.8622937e-17], [-2.7979..."
29-MHPOUswYQVDYSuina-MZcNWrWavY9JHe15v,29,MHPOUswYQVDYSuina,MZcNWrWavY9JHe15v,BALL,"[0.034226157, 0.046530746, 0.0526554]","[[-0.89259946, 0.4508505, 1.0931191e-16], [-0....","[0.034226157, 0.046530746, 0.0526554]","[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, ..."
...,...,...,...,...,...,...,...,...
114982-Mh/0tUyxn4Yxn1njz-M8mz8WNP4WultrNkB,114982,Mh/0tUyxn4Yxn1njz,M8mz8WNP4WultrNkB,REVOLUTE,"[0.41482982, -0.3779846, 0.013403496]","[[-2.6818574e-31, 1.7347235e-16, -1.0], [1.249...","[0.41482982, -0.3779846, 0.013403496]","[[-1.7347235e-16, 0.0, -1.0], [1.0, 0.0, -1.73..."
114982-MW5LQNEDrVq5McJ1y-M8mz8WNP4WultrNkB,114982,MW5LQNEDrVq5McJ1y,M8mz8WNP4WultrNkB,REVOLUTE,"[0.41482982, -0.5679846, 0.013403496]","[[-1.7347235e-16, -1.1844469e-31, -1.0], [1.0,...","[0.41482982, -0.5679846, 0.013403496]","[[-1.7347235e-16, 0.0, -1.0], [1.0, 0.0, -1.73..."
114982-MW5LQNEDrVq5McJ1y-M8mz8WNP4WultrNkB,114982,MW5LQNEDrVq5McJ1y,M8mz8WNP4WultrNkB,REVOLUTE,"[0.41482982, -0.5479846, 0.013403496]","[[-1.7347235e-16, -1.1844469e-31, -1.0], [1.0,...","[0.41482982, -0.5479846, 0.013403496]","[[0.0, 1.7347235e-16, -1.0], [0.0, -1.0, -1.73..."
114982-M3K9ryqX6vkgBFeiS-MKZHpJc11acyvWQOq,114982,M3K9ryqX6vkgBFeiS,MKZHpJc11acyvWQOq,REVOLUTE,"[0.08282983, -0.5679846, 0.013403496]","[[-1.7347235e-16, 1.0344651e-31, 1.0], [1.0, -...","[0.08282983, -0.5679846, 0.013403496]","[[-1.7347235e-16, 0.0, 1.0], [1.0, 0.0, 1.7347..."


In [None]:
mate_duplicates.loc['10-MS8WYyXlytoCXG1pZ-MMb6JZl4ks61uaBwr']

# Legacy statistics

In [None]:
skipped = 0

#number of assemblies with lone pieces

c_num_lone_pieces = []

#number of assemblies with disconnected components

c_num_components = []

#number of parts in fully connected assemblies

c_num_parts_connected = []

#number of rigid components in fully connected assemblies

c_num_rigid_connected = []

#same, but without the condition?

c_num_parts = []

c_num_rigid = []

#for data in datalist:
for j,entry in enumerate(os.scandir(os.path.join(datapath,'data','flattened_assemblies'))):
    if j % 1000 == 0:
        print(j)
    if not entry.name.endswith('.json') or entry.name in filter_set:
        continue
    try:
        #occs, mates = loader.load(data[0], data[1], data[2], geometry=False)
        occs, mates = loader.load_flattened(entry.path, geometry=False)
    except KeyError as e:
        #print(e)
        skipped += 1
        continue
    except FileNotFoundError as e:
        #print(e)
        skipped += 1
        continue
    #print(occs, mates)
    adj = adjacency_matrix(occs, mates)
    num_connections = np.sum(adj, 0)
    num_lone = len([c for c in num_connections if c == 0])
    c_num_lone_pieces.append(num_lone)
    
    num_components = connected_components(adj)
    num_rigid = connected_components(adj, connectionType='fasten')
    c_num_components.append(num_components)
    
    c_num_rigid.append(num_rigid)
    c_num_parts.append(adj.shape[0])
    if num_components == 1:
        c_num_parts_connected.append(adj.shape[0])
        c_num_rigid_connected.append(num_rigid)

    
    
print('skipped',skipped)

In [None]:
num_with_lone_pieces = len([c for c in c_num_lone_pieces if c > 0])
num_fully_connected = len([c for c in c_num_components if c == 1])
num_processed = len(c_num_parts)
print('number of assemblies processed: %d'%num_processed)
print('assemblies with lone pieces: %d (%.2f%%)'%(num_with_lone_pieces, num_with_lone_pieces/num_processed*100))
print('assemblies fully connected: %d (%.2f%%)'%(num_fully_connected, num_fully_connected/num_processed*100))
nbins = 40
plt.hist(c_num_parts, nbins, range=(0, nbins+1))
plt.title('num parts')
plt.show()
plt.hist(c_num_rigid, nbins, range=(0,nbins+1))
plt.title('num rigid components')
plt.show()
plt.hist(c_num_parts_connected, nbins, range=(0, nbins+1))
plt.title('num parts in connected models')
plt.show()
plt.hist(c_num_rigid_connected, nbins, range=(0, nbins+1))
plt.title('num rigid components in connected models')
plt.show()
plt.hist(c_num_lone_pieces, nbins, range=(0, nbins+1))
plt.title('num lone pieces')
plt.show()
plt.hist(c_num_components, nbins, range=(0, nbins+1))
plt.title('num connected components')
plt.show()


In [None]:
len([c for c in c_num_rigid_connected if c >= 2])

In [None]:
len([c for c in c_num_parts_connected if c == 2])

In [None]:
num_with_more_than_2_rigid = len([c for c in c_num_rigid_connected if c > 2])
print(num_with_more_than_2_rigid)
print(num_with_more_than_2_rigid/num_processed*100)

In [None]:
num_with_more_than_2_parts = len([c for c in c_num_parts_connected if c > 2])
print(num_with_more_than_2_parts)
print(num_with_more_than_2_parts/num_processed*100)

In [None]:
num_with_lone_parts = len([c for c in c_num_lone_pieces if c > 0])
print(num_with_lone_parts)
print(num_with_lone_parts/num_processed*100)

In [None]:
num_with