# Generate a set of data

We start by creating randomly generating a set of 4 + 4 points by applying a random transformation that stretch and sqeezes a square


In [1]:
import numpy as np
import pandas as pd

# Transform a (x,y) vector to a deformed version

Applies a non linear transformation to a vector

In [2]:
def transform(x: np.ndarray, epsilon: np.ndarray) -> np.ndarray:
    x_ = x[0]
    y_ = x[1]
    ex = epsilon[0]
    ey = epsilon[1]
    return np.array((x_ * (ex * y_ + 1), y_ * (ey * x_ + 1)))

## Some test

In [3]:
e_1 = np.array((0.1, -0.1))

transform(np.array((1, 1)), e_1)

array([1.1, 0.9])

## Generate some random data

Let's generate a serial of random deformed squares. The output of this function should have shape (4, num_squares).

In [4]:
unit_square = np.array([[1, 1], [1, -1], [-1, -1], [-1, 1]])
print(unit_square.shape)
print([x.shape for x in unit_square])


def deformed_unit_square(epsilon: np.ndarray) -> np.ndarray:
    return np.array([transform(x, epsilon) for x in unit_square]).reshape(4, 2)


(4, 2)
[(2,), (2,), (2,), (2,)]


In [5]:
x = deformed_unit_square(np.array([0.1, -0.1]))
print(f"Result {x}\nShape: {x.shape}")

Result [[ 1.1  0.9]
 [ 0.9 -0.9]
 [-0.9 -1.1]
 [-1.1  1.1]]
Shape: (4, 2)


In [6]:
def generate_random_squares(num_squares: int) -> np.ndarray:
    epsilons = np.random.rand(num_squares, 2).reshape((-1, 2))
    epsilons = epsilons * 0.2 - 0.1
    print(epsilons.shape)

    return np.array([deformed_unit_square(e) for e in epsilons])

### Prepare bones squares

Bones are stretched on Y direction by 2.

In [7]:
NUM_SAMPLES = 1000

In [8]:
bone_data = generate_random_squares(NUM_SAMPLES) * [1, 2]
bone_data.reshape(100, -1)

(1000, 2)


array([[ 0.99250879,  2.16826872,  1.00749121, ..., -1.99295609,
        -1.06757314,  1.99295609],
       [ 0.94299066,  2.09913713,  1.05700934, ..., -1.87124618,
        -1.05288607,  1.87124618],
       [ 1.05875503,  2.1741393 ,  0.94124497, ..., -1.80235034,
        -1.08740699,  1.80235034],
       ...,
       [ 1.07349645,  1.91517966,  0.92650355, ..., -2.10072109,
        -1.08122724,  2.10072109],
       [ 0.92605266,  2.09529794,  1.07394734, ..., -2.18967206,
        -1.03129767,  2.18967206],
       [ 1.07702437,  1.88346061,  0.92297563, ..., -2.17433142,
        -0.95079799,  2.17433142]])

### Prepare teeth

Teeth are rotate by a random angle and translated by 2 units on Y direction

In [9]:
tooth_data = generate_random_squares(NUM_SAMPLES) + [0, 2]
tooth_data.reshape(100, -1)

(1000, 2)


array([[ 0.97133985,  3.08129015,  1.02866015, ...,  1.03235922,
        -1.06894926,  2.96764078],
       [ 0.96444293,  2.91255725,  1.03555707, ...,  1.06365361,
        -0.99166141,  2.93634639],
       [ 0.97142902,  2.93689316,  1.02857098, ...,  0.94273666,
        -0.92214647,  3.05726334],
       ...,
       [ 1.03687971,  3.01642181,  0.96312029, ...,  1.04326973,
        -1.05483559,  2.95673027],
       [ 0.90618518,  3.09016221,  1.09381482, ...,  1.03354936,
        -0.97536858,  2.96645064],
       [ 1.0069703 ,  2.93410301,  0.9930297 , ...,  0.93156576,
        -1.00421164,  3.06843424]])

### Find weighted center of mass

In [10]:
from typing import Callable, Tuple


def find_mass_center(square: np.ndarray,
                     weights: Callable[[np.ndarray], Tuple[float, float]]) -> np.ndarray:
    w = np.array([weights(p) for p in square])
    #print(f"w:{w}")
    s = np.sum(w, axis=0)
    #print(f"s:{s}")
    t = w * square
    #print(f"t:{t}")
    return (np.sum(t, axis=0) / s).reshape(1,-1)

In [11]:
def uniform(_p: np.ndarray) -> Tuple[float, float]:
    return 1., 1.


find_mass_center(unit_square, uniform)

array([[0., 0.]])

In [12]:
def threshold(t=0.):
    def weight(p: np.ndarray)->Tuple[float, float]:
        return (1, 2) if (p[1] <= t) else (1, 1)
    return weight

find_mass_center(unit_square, threshold())

array([[ 0.        , -0.33333333]])

In [13]:
bone_centers = np.array([ find_mass_center(s, threshold()) for s in bone_data]).reshape(-1,2)

In [14]:
bone_centers

array([[ 0.00000000e+00, -6.66666667e-01],
       [ 0.00000000e+00, -6.66666667e-01],
       [-2.77555756e-17, -6.66666667e-01],
       ...,
       [ 0.00000000e+00, -6.66666667e-01],
       [ 2.77555756e-17, -6.66666667e-01],
       [ 0.00000000e+00, -6.66666667e-01]])

In [15]:
teeth_centers = np.array([ find_mass_center(s, uniform) for s in tooth_data]).reshape(-1,2)

In [16]:
teeth_centers

array([[-2.77555756e-17,  2.00000000e+00],
       [-2.77555756e-17,  2.00000000e+00],
       [-5.55111512e-17,  2.00000000e+00],
       ...,
       [ 0.00000000e+00,  2.00000000e+00],
       [ 0.00000000e+00,  2.00000000e+00],
       [ 5.55111512e-17,  2.00000000e+00]])

In [17]:
bone_data.reshape(NUM_SAMPLES, -1)

array([[ 0.99250879,  2.16826872,  1.00749121, ..., -1.83173128,
        -0.99250879,  1.83173128],
       [ 1.06540544,  2.01985219,  0.93459456, ..., -1.98014781,
        -1.06540544,  1.98014781],
       [ 0.9053662 ,  2.08040024,  1.0946338 , ..., -1.91959976,
        -0.9053662 ,  1.91959976],
       ...,
       [ 0.99305176,  1.81433989,  1.00694824, ..., -2.18566011,
        -0.99305176,  2.18566011],
       [ 0.95452977,  2.191537  ,  1.04547023, ..., -1.808463  ,
        -0.95452977,  1.808463  ],
       [ 0.95079799,  1.82566858,  1.04920201, ..., -2.17433142,
        -0.95079799,  2.17433142]])

In [18]:
tooth_data.reshape(NUM_SAMPLES, -1)

array([[ 0.97133985,  3.08129015,  1.02866015, ...,  1.08129015,
        -0.97133985,  2.91870985],
       [ 0.95565595,  3.05962344,  1.04434405, ...,  1.05962344,
        -0.95565595,  2.94037656],
       [ 1.07667753,  3.05821188,  0.92332247, ...,  1.05821188,
        -1.07667753,  2.94178812],
       ...,
       [ 0.96192787,  2.98159883,  1.03807213, ...,  0.98159883,
        -0.96192787,  3.01840117],
       [ 0.92757421,  2.9937219 ,  1.07242579, ...,  0.9937219 ,
        -0.92757421,  3.0062781 ],
       [ 1.00421164,  2.93156576,  0.99578836, ...,  0.93156576,
        -1.00421164,  3.06843424]])

In [19]:

input_set = np.append(bone_data.reshape(NUM_SAMPLES, -1), tooth_data.reshape(NUM_SAMPLES,-1), axis=1)

In [20]:
input_set

array([[ 0.99250879,  2.16826872,  1.00749121, ...,  1.08129015,
        -0.97133985,  2.91870985],
       [ 1.06540544,  2.01985219,  0.93459456, ...,  1.05962344,
        -0.95565595,  2.94037656],
       [ 0.9053662 ,  2.08040024,  1.0946338 , ...,  1.05821188,
        -1.07667753,  2.94178812],
       ...,
       [ 0.99305176,  1.81433989,  1.00694824, ...,  0.98159883,
        -0.96192787,  3.01840117],
       [ 0.95452977,  2.191537  ,  1.04547023, ...,  0.9937219 ,
        -0.92757421,  3.0062781 ],
       [ 0.95079799,  1.82566858,  1.04920201, ...,  0.93156576,
        -1.00421164,  3.06843424]])

In [21]:
output_set = np.append(bone_centers, teeth_centers, axis=1)

In [22]:
output_set

array([[ 0.00000000e+00, -6.66666667e-01, -2.77555756e-17,
         2.00000000e+00],
       [ 0.00000000e+00, -6.66666667e-01, -2.77555756e-17,
         2.00000000e+00],
       [-2.77555756e-17, -6.66666667e-01, -5.55111512e-17,
         2.00000000e+00],
       ...,
       [ 0.00000000e+00, -6.66666667e-01,  0.00000000e+00,
         2.00000000e+00],
       [ 2.77555756e-17, -6.66666667e-01,  0.00000000e+00,
         2.00000000e+00],
       [ 0.00000000e+00, -6.66666667e-01,  5.55111512e-17,
         2.00000000e+00]])

In [23]:
import pandas as pd
input_data = pd.DataFrame(input_set)
output_data = pd.DataFrame(output_set)

In [24]:
input_data.to_pickle("input.pkl")
output_data.to_pickle("output.pkl")

In [25]:
input_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.992509,2.168269,1.007491,-2.168269,-1.007491,-1.831731,-0.992509,1.831731,0.971340,3.081290,1.028660,0.918710,-1.028660,1.081290,-0.971340,2.918710
1,1.065405,2.019852,0.934595,-2.019852,-0.934595,-1.980148,-1.065405,1.980148,0.955656,3.059623,1.044344,0.940377,-1.044344,1.059623,-0.955656,2.940377
2,0.905366,2.080400,1.094634,-2.080400,-1.094634,-1.919600,-0.905366,1.919600,1.076678,3.058212,0.923322,0.941788,-0.923322,1.058212,-1.076678,2.941788
3,0.972741,1.917176,1.027259,-1.917176,-1.027259,-2.082824,-0.972741,2.082824,0.904411,3.039489,1.095589,0.960511,-1.095589,1.039489,-0.904411,2.960511
4,1.085864,2.022899,0.914136,-2.022899,-0.914136,-1.977101,-1.085864,1.977101,0.989150,2.920222,1.010850,1.079778,-1.010850,0.920222,-0.989150,3.079778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1.054787,1.817858,0.945213,-1.817858,-0.945213,-2.182142,-1.054787,2.182142,1.095665,3.006984,0.904335,0.993016,-0.904335,1.006984,-1.095665,2.993016
996,1.027045,2.141205,0.972955,-2.141205,-0.972955,-1.858795,-1.027045,1.858795,1.067019,3.000774,0.932981,0.999226,-0.932981,1.000774,-1.067019,2.999226
997,0.993052,1.814340,1.006948,-1.814340,-1.006948,-2.185660,-0.993052,2.185660,0.961928,2.981599,1.038072,1.018401,-1.038072,0.981599,-0.961928,3.018401
998,0.954530,2.191537,1.045470,-2.191537,-1.045470,-1.808463,-0.954530,1.808463,0.927574,2.993722,1.072426,1.006278,-1.072426,0.993722,-0.927574,3.006278


In [26]:
output_data

Unnamed: 0,0,1,2,3
0,0.000000e+00,-0.666667,-2.775558e-17,2.0
1,0.000000e+00,-0.666667,-2.775558e-17,2.0
2,-2.775558e-17,-0.666667,-5.551115e-17,2.0
3,0.000000e+00,-0.666667,2.775558e-17,2.0
4,5.551115e-17,-0.666667,0.000000e+00,2.0
...,...,...,...,...
995,5.551115e-17,-0.666667,5.551115e-17,2.0
996,0.000000e+00,-0.666667,0.000000e+00,2.0
997,0.000000e+00,-0.666667,0.000000e+00,2.0
998,2.775558e-17,-0.666667,0.000000e+00,2.0
