In [1]:
from json import load
import numpy as np
from scipy.spatial import ConvexHull
import os
import csv

In [8]:
jd_loc = r'json'
cd_loc = r'csv\data.csv'
classes = ['car', 'fish', 'house', 'tree', 'bicycle', 'guitar', 'pencil', 'clock']
DATA = []

In [9]:
data_to_remove = {'car': [70, 82, 88, 100, 106, 110, 170, 201, 225, 242, 246, 285, 308, 338, 385, 399, 403, 422, 428, 430, 442, 449, 461, 462, 477, 501, 576, 582, 601, 614, 608, 609, 616, 619, 620, 656, 657, 653, 670, 676, 673, 697], 'fish': [44, 54, 82, 100, 105, 111, 170, 201, 252, 282, 331, 337, 349, 399, 422, 428, 442, 461, 464, 462, 471, 477, 482, 501, 576, 581, 608, 609, 616, 655, 668, 678, 676, 701, 698], 'house': [42, 47, 54, 80, 85, 100, 151, 164, 170, 197, 198, 201, 203, 252, 259, 325, 331, 340, 358, 385, 399, 388, 422, 425, 442, 461, 462, 482, 484, 499, 526, 551, 550, 559, 576, 565, 604, 608, 609, 616, 619, 655, 657, 676, 686, 698, 703], 'tree': [54, 82, 170, 252, 294, 313, 331, 385, 422, 428, 433, 442, 448, 454, 461, 466, 462, 483, 500, 550, 608, 609, 619, 616, 657, 652, 670, 692, 703, 698, 699], 'bicycle': [698, 703, 47, 54, 65, 82, 100, 166, 170, 190, 287, 331, 399, 422, 428, 442, 462, 500, 584, 587, 576, 608, 609, 616, 646, 657, 670, 673, 678, 676, 692], 'guitar': [8, 9, 54, 72, 111, 170, 245, 252, 259, 358, 372, 385, 428, 427, 461, 462, 484, 576, 608, 609, 616, 618, 652, 656, 657, 653, 661, 676, 673, 678, 688, 686, 698, 706], 'pencil': [9, 27, 57, 54, 157, 161, 170, 185, 253, 344, 348, 422, 424, 442, 462, 461, 500, 576, 601, 608, 609, 619, 616, 620, 624, 650, 652, 656, 657, 670, 673, 676, 683, 678, 692], 'clock': [711, 36, 54, 70, 75, 79, 116, 142, 161, 254, 256, 258, 344, 348, 385, 399, 422, 424, 427, 442, 461, 462, 477, 500, 535, 608, 609, 616, 619, 620, 625, 628, 649, 656, 657, 673, 670, 676, 678, 693, 713]}

In [None]:
def get_n_paths(shape):
    return len(shape)

def get_n_points(shape):
    return sum( len(path) for path in shape )

def get_width(shape):
    x_min = float('inf')
    x_max = -float('inf')

    for path in shape:
        for point in path:
            x,y = point
            x_min = min(x, x_min)
            x_max = max(x, x_max)

    return x_max - x_min

def get_height(shape):
    y_min = float('inf')
    y_max = -float('inf')

    for path in shape:
        for point in path:
            x,y = point
            y_min = min(y, y_min)
            y_max = max(y, y_max)

    return y_max - y_min

def get_elongation(shape):
    w, h = get_width(shape), get_height(shape)
    return (1 + max(w, h)) / (1 + min(w, h))


def get_roundness(shape):
    points = [ [point[0], 400-point[1]] for path in shape for point in path ]
    # Exemple de points
    points = np.array(points)

    # Calculer le rectangle englobant minimal
    hull = ConvexHull(points)

    # Dimensions du rectangle englobant
    width = np.max(hull.points[hull.vertices, 0]) - np.min(hull.points[hull.vertices, 0])
    height = np.max(hull.points[hull.vertices, 1]) - np.min(hull.points[hull.vertices, 1])

    # Calculer le roundness (circularité) en utilisant l'aspect ratio
    r = (width+height)/np.pi
    roundness = (1 + width*height)/(1+ np.pi*r**2)

    return roundness

info = {
    'n_paths': get_n_paths,
    'n_points' : get_n_points,
    'width' : get_width,
    'height' : get_height,
    'elongation' : get_elongation,
    'roundness' : get_roundness,
    # class
}
DATA.append(
    list(info.keys()) + [ 'class' ]
)

In [None]:
def turn_jd_to_cd(jf_loc, jf_name):
    with open(jf_loc, 'r') as file:
        jf = load(file)
        for _class in classes:
            if int(jf_name.replace('.json','')) in data_to_remove[_class]:
                continue

            l = [] 
            shape = jf.get('drawings').get(_class)
            for _, info_func in info.items():
                l.append(info_func(shape))
            l.append(_class) # class
            DATA.append(l)

In [16]:
jf_names = os.listdir(jd_loc)
exceptions = []
for n, jf_name in enumerate(jf_names):
    jf_loc = os.path.join(jd_loc, jf_name)
    try : 
        print(f'Transforming data: {int(100*((n+1) / len(jf_names)))}% ({n+1}/{len(jf_names)})', end='\r')
        turn_jd_to_cd(jf_loc, jf_name)
    except : 
        print("Exception :", f'{n+1}/{len(jf_names)} : {jf_name}')
        exceptions.append(jf_name)

Exception : 5/716 : 101.json)
Exception : 71/716 : 161.json)
Exception : 77/716 : 167.json6)
Exception : 127/716 : 211.json6)
Exception : 643/716 : 677.json6)
Transforming data: 95% (681/716)

TypeError: remove: path should be string, bytes or os.PathLike, not NoneType

: 

In [13]:
# saving Data into a csv file
with open(cd_loc, 'w') as file:
    writer = csv.writer(file, lineterminator='\n')
    writer.writerows(DATA)