# Phyloshape test w/ Gesneriaceae data

In [1]:
from pathlib import Path
import numpy as np
import k3d
from sklearn.decomposition import PCA
import phyloshape
from phyloshape.shape.src.mapper import VectorMapper
# from phyloshape.shape.src.vectors import VertexVectorMapper
# from phyloshape.vectors.transform import transform_vector_to_absolute

phyloshape.set_log_level("DEBUG")

🐞 logger_setup:set_log_level | [34m[1mphyloshape v.0.0.1 logging enabled[0m


## Load data

In [2]:
# load some test data
models = phyloshape.data.get_gesneriaceae_models()
models

{'34_HC3403-3_17': Model(nverts=420, nfaces=0),
 '58_K039170_01': Model(nverts=420, nfaces=0),
 '70_K039203_04': Model(nverts=420, nfaces=0),
 '69_K039200_02': Model(nverts=420, nfaces=0),
 '70_K039203_05': Model(nverts=420, nfaces=0)}

In [3]:
models["34_HC3403-3_17"].draw()

Plot(antialias=5, axes=['x', 'y', 'z'], axes_helper=1.0, axes_helper_colors=[16711680, 65280, 255], background…

## Create comparative Mapper

In [4]:
mapper = VectorMapper(models, num_neighbors=420, num_iterations=5)

ℹ️ mapper:__init__            | [1mVectorMapper num_models=5 num_vertices=420[0m
⚠️ mapper:_remove_duplicates  | [33m[1m9 identical vertices will be excluded: [15, 31, 47, 63, 87, 95, 103, 111, 119][0m
🐞 mapper:_set_vertex_path    | [34m[1mfinding optimal stable path visiting all Vertices[0m
ℹ️ mapper:_set_vertex_path    | [1mvertex path: [166, 270, 165, 167, 269], ... [185, 315, 288, 184, 314][0m
🐞 mapper:_set_vector_faces   | [34m[1msetting reference faces to all Vectors along path[0m


In [5]:
test = mapper.get_model_from_vectors("test", vectors=mapper.vectors["34_HC3403-3_17"], weighted=True)

🐞 mapper:get_model_from_vectors | [34m[1minferring Vertex coords from mean path accumulated relative Vectors[0m
ℹ️ mapper:get_model_from_vectors | [1mrefining iteration 0; sum diff = 1.476e+05[0m
🐞 mapper:_set_vector_faces      | [34m[1msetting reference faces to all Vectors along path[0m
ℹ️ mapper:get_model_from_vectors | [1mrefining iteration 1; sum diff = 4.6093e-11[0m
🐞 mapper:_set_vector_faces      | [34m[1msetting reference faces to all Vectors along path[0m
ℹ️ mapper:get_model_from_vectors | [1mrefining iteration 2; sum diff = 4.1738e-11[0m
🐞 mapper:_set_vector_faces      | [34m[1msetting reference faces to all Vectors along path[0m
ℹ️ mapper:get_model_from_vectors | [1mrefining iteration 3; sum diff = 3.6121e-11[0m
🐞 mapper:_set_vector_faces      | [34m[1msetting reference faces to all Vectors along path[0m
ℹ️ mapper:get_model_from_vectors | [1mrefining iteration 4; sum diff = 3.2092e-11[0m
🐞 mapper:_set_vector_faces      | [34m[1msetting reference fa

In [9]:
#mapper.vector_weights[0]

In [6]:
test.draw()

Plot(antialias=5, axes=['x', 'y', 'z'], axes_helper=1.0, axes_helper_colors=[16711680, 65280, 255], background…

### Transform relative vectors to PCs

In [25]:
# mapper.vectors["34_HC3403-3_17"]

In [26]:
# rvecs = [(i, j.relative) for i,j in mapper.vectors["34_HC3403-3_17"].items()]

In [7]:
from sklearn.decomposition import PCA

In [8]:
# fdat = mapper.get_PCs()

In [9]:
# fdat.shape

In [53]:
p.components_

array([[ 0.45896443,  0.13516665,  0.66764531,  0.40500114,  0.40162848],
       [-0.35176035,  0.01182079,  0.73477987, -0.41454717, -0.40543077],
       [-0.81552484,  0.07841716,  0.05789033,  0.38662376,  0.41945292],
       [-0.02269458, -0.44357794,  0.05121245,  0.67330106, -0.58886886],
       [-0.00452645,  0.88242825, -0.09151106,  0.24761355, -0.38937535]])

In [52]:
p = PCA(svd_solver="full")
p.fit(fdat.T)
print(p.explained_variance_ratio_.round(2))
print(p.transform(fdat.T).shape)
print(p.transform(fdat.T))

[0.64 0.2  0.1  0.03 0.02]
(24660, 5)
[[ 58.3562179  -10.39338414   1.75284206   1.31774007   6.36264541]
 [ 19.96182584 -25.76720589 -25.56874477  -7.07121398   9.35777205]
 [ 73.07398025  21.96754715  -4.49684279  -1.39266911  -1.57769184]
 ...
 [-25.00135796  40.20857118  -6.16075532 -29.42621443  -2.03883992]
 [121.91585525 -15.68389401   8.84796155   3.99386281 -60.53879535]
 [279.15450781  71.36777053   3.6877666   23.64482796  10.11932403]]


## BETTER

In [58]:
p = PCA(svd_solver="full")
p.fit(fdat)
print(p.explained_variance_ratio_.round(2))
print(p.transform(fdat).shape)
print(p.transform(fdat))

[0.52 0.23 0.18 0.07 0.  ]
(5, 5)
[[-1.62136899e+03  5.62039856e+03  1.20287102e+03  8.95385585e+01
   3.33064132e-12]
 [-1.46903688e+03 -2.94476001e+03  4.37670406e+03 -1.86400348e+02
  -2.48689958e-13]
 [ 8.65831032e+03 -2.32644843e+02 -6.25183939e+02  1.22211434e+01
  -1.58983937e-12]
 [-2.78418475e+03 -1.03304412e+03 -2.66156662e+03 -2.46736787e+03
  -3.26405569e-14]
 [-2.78371970e+03 -1.40994959e+03 -2.29282451e+03  2.55200852e+03
   5.36792832e-14]]


In [59]:
p.components_

array([[ 7.91972096e-04, -1.71574459e-03,  4.46753363e-03, ...,
         3.31343831e-03,  3.18090277e-03,  1.48285743e-02],
       [ 1.77523910e-03,  4.52690109e-03,  2.42604705e-03, ...,
        -1.02026527e-03,  6.13690712e-03,  5.05816160e-03],
       [-2.60368854e-03,  1.47902100e-03, -2.28825413e-03, ...,
         4.24344417e-03, -1.43083841e-02, -9.40735456e-03],
       [-8.84492362e-04,  5.70993552e-04,  8.38107780e-04, ...,
         7.29555873e-03,  8.11402071e-03, -6.21776613e-03],
       [-4.82767780e-01, -4.96558534e-01, -9.36134499e-04, ...,
        -3.92225395e-05, -4.14562470e-03,  1.27767679e-03]])

In [62]:
p.fit_transform(fdat)

array([[-1.62136899e+03,  5.62039856e+03,  1.20287102e+03,
         8.95385585e+01,  1.25587199e-12],
       [-1.46903688e+03, -2.94476001e+03,  4.37670406e+03,
        -1.86400348e+02,  1.25587199e-12],
       [ 8.65831032e+03, -2.32644843e+02, -6.25183939e+02,
         1.22211434e+01,  1.25587199e-12],
       [-2.78418475e+03, -1.03304412e+03, -2.66156662e+03,
        -2.46736787e+03,  1.25587199e-12],
       [-2.78371970e+03, -1.40994959e+03, -2.29282451e+03,
         2.55200852e+03,  1.25587199e-12]])

In [85]:
p = PCA(svd_solver="full")
p.fit(fdat.T)
p.transform(fdat.T).shape

(12330, 5)

In [81]:
p.transform(fdat)

array([[-1.13951912e+03,  2.89304529e+03,  9.81982582e+02,
         2.37242475e+01,  3.41282558e-13],
       [-6.47480001e+02, -1.97694937e+03,  2.14931334e+03,
        -1.06364351e+02, -8.52651283e-14],
       [ 4.57880805e+03,  1.24305942e+02, -3.12483706e+02,
        -1.19706568e+00,  4.93383112e-13],
       [-1.42468701e+03, -4.36305908e+02, -1.52467866e+03,
        -1.35954288e+03,  1.44773082e-13],
       [-1.36712192e+03, -6.04095954e+02, -1.29413356e+03,
         1.44338005e+03,  4.12558876e-13]])

In [28]:
mapper.get_vectors_relative(mapper.labels[0], 2)

array([[ 35.09254987,   1.84823109, -26.95524783],
       [ 20.62345762,  -6.97518185,  14.26437289],
       [-10.00837374,  -9.36250917, -19.14650039],
       [ 71.71991148,  -8.03803101,   2.46462067],
       [ 99.82916223,  -4.23573798,   4.44893158],
       [ 11.83820417,  -6.23836955, -24.83277204],
       [126.50877949,   9.37732919,  39.01930944],
       [131.03109394,  10.18264907,  20.80133118],
       [118.3463893 ,   1.30253536,  54.05596768],
       [127.06485821,   3.13388586,   6.79978443]])

In [37]:
np.product((3, 4, 5))

60

In [29]:
# data for three models
z = np.array([mapper.get_vectors_relative(mapper.labels[0], i) for i in mapper.vertex_ids])

In [36]:
z.flatten().reshape((z.

array([ 39.40183396,  49.64633372,  39.96769731, ..., -55.78841887,
        24.71469287,  12.1653359 ])

In [16]:
# 159 and 79 seem to be identical
mapper.vertex_ids[211]

159

In [17]:
mapper.verts[0, 159], mapper.verts[0, 78] 

(array([ 658.16,  479.37, 1097.3 ], dtype=float32),
 array([ 678.37,  489.96, 1107.8 ], dtype=float32))

In [19]:
mapper.get_vector_ids(mapper.labels[0], 159)

array([[159,  79],
       [159,  78],
       [159, 158],
       [159,   0],
       [159, 120],
       [159,  81],
       [159,  80],
       [159,   8],
       [159,  82],
       [159,  83]])

In [20]:
mapper.get_vectors_relative(mapper.labels[0], 79)

array([[  0.        ,   0.        ,   0.        ],
       [ -4.32882362,  14.4830352 ,  20.05854375],
       [ -5.30833184,  22.8074959 , -20.00675496],
       [  6.64321941,  14.19409901,  12.45107582],
       [-16.52592147,   5.55977066,  24.72728392],
       [-80.22358634, -61.46085111, -52.36223902],
       [-81.9887038 , -44.4378293 , -60.11437995],
       [-99.88630129, -43.54115984, -58.00495164],
       [-79.6701191 , -73.3484335 , -37.65485792],
       [-80.00086089, -76.81475235, -18.92297752]])

In [22]:
mapper.get_vectors_absolute(mapper.labels[0], 79)

array([[   0.  ,    0.  ,    0.  ],
       [  20.21,   10.59,   10.5 ],
       [ -21.67,   20.04,    8.8 ],
       [  -4.71,    6.6 ,   18.3 ],
       [ -20.29,  -16.04,   15.7 ],
       [ -21.25,  -51.1 ,  -99.46],
       [ -31.24,  -35.22, -100.47],
       [ -46.48,  -43.23, -105.87],
       [ -16.25,  -67.42,  -91.3 ],
       [ -17.41,  -80.07,  -77.1 ]])

In [24]:
mapper.get_vectors_relative(mapper.labels[0], 159)

array([[  0.        ,   0.        ,   0.        ],
       [ -4.32882362,  14.4830352 ,  20.05854375],
       [ -5.30833184,  22.8074959 , -20.00675496],
       [  6.64321941,  14.19409901,  12.45107582],
       [-16.52592147,   5.55977066,  24.72728392],
       [-80.22358634, -61.46085111, -52.36223902],
       [-81.9887038 , -44.4378293 , -60.11437995],
       [-99.88630129, -43.54115984, -58.00495164],
       [-79.6701191 , -73.3484335 , -37.65485792],
       [-80.00086089, -76.81475235, -18.92297752]])

In [26]:
mapper.get_vector_faces(mapper.labels[0], 159)

array([[Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=78, coords=(678.37, 489.96, 1107.8)),
        Vertex(id=158, coords=(636.49, 499.41, 1106.1))],
       [Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=158, coords=(636.49, 499.41, 1106.1)),
        Vertex(id=0, coords=(653.45, 485.97, 1115.6))],
       [Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=78, coords=(678.37, 489.96, 1107.8)),
        Vertex(id=0, coords=(653.45, 485.97, 1115.6))],
       [Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=78, coords=(678.37, 489.96, 1107.8)),
        Vertex(id=158, coords=(636.49, 499.41, 1106.1))],
       [Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=78, coords=(678.37, 489.96, 1107.8)),
        Vertex(id=158, coords=(636.49, 499.41, 1106.1))],
       [Vertex(id=159, coords=(658.16, 479.37, 1097.3)),
        Vertex(id=78, coords=(678.37, 489.96, 1107.8)),
        Vertex(id=158, coords=(636.

## DATA

In [2]:
DIR = Path("../../PhyloShapeTest/data/Gesneriaceae.Gigascience.2020/")
LANDMARKS = DIR / "Landmark_description_rev_1205.csv"
M0 = "12_K039105_04"

{'34_HC3403-3_17': Model(nverts=420, nfaces=0),
 '58_K039170_01': Model(nverts=420, nfaces=0),
 '70_K039203_04': Model(nverts=420, nfaces=0),
 '69_K039200_02': Model(nverts=420, nfaces=0),
 '70_K039203_05': Model(nverts=420, nfaces=0)}

⚠️ 2023-05-12-14:54:21.66 | [35m      mapper.py | [0m[36m_set_init_vectors         | [0m[33m[1m9 identical vertices will be excluded: [15, 31, 47, 63, 87, 95, 103, 111, 119][0m
ℹ️ 2023-05-12-14:54:24.64 | [35m      mapper.py | [0m[36m_set_ordered_vectors      | [0m[1mbest vertex path: [166, 270, 165, 167, 269], ... [185, 315, 288, 184, 314][0m


In [36]:
np.vstack([i.relative for i in mapper.vectors['34_HC3403-3_17'][3]])

array([[ -9.06463216,   1.84823109,  26.95524783],
       [ -8.27878173,  22.04015151, -11.09711649],
       [-11.37326254,  38.02715506,  19.14650039],
       [ 55.75600112,  -8.60897693,  -2.46462067],
       [ 76.97942911, -27.42794381,  -4.44893158],
       [  5.02748257,  23.26068038,  24.83277204],
       [ 89.27040413, -42.34411299, -54.05596768],
       [ 91.54945124, -53.59723937, -39.01930944],
       [ 88.09159089, -25.06460629, -61.99556134],
       [ 94.8560865 , -56.78566082, -20.80133118]])

In [23]:
sorted(DIR.glob("samples*"))

[PosixPath('../../PhyloShapeTest/data/Gesneriaceae.Gigascience.2020/samples.manual_sum.tab')]

In [25]:
from phyloshape.shape.src.shape_alignment import ShapeAlignment

In [30]:
ali = ShapeAlignment()

In [31]:
ali.append(label="A", sample=np.array([]))

In [32]:
ali.

<phyloshape.shape.src.shape_alignment.ShapeAlignment at 0x7f46e4988310>

In [6]:
def visualize_points(*input_points, **kwargs):
    plot = k3d.plot(grid_visible=False) #,
                #camera_auto_fit=False)
    offset_v = np.array([0, 0, 0])
    for _points in input_points:
        plt_points = k3d.points(_points + offset_v,
                                point_size=kwargs.get("point_size", 20),
                                shader="flat")
        plot += plt_points
        offset_v[0] = offset_v[0] + kwargs.get("xgap", 2000)
    return plot

In [7]:
# PCA transformator

class VectorTrans:
    
    def __init__(self):
        self.sample_data = None
        self.sample_size = None
        self.each_s_shape = None
        self.maxs = None
        self.sample_normalized = None
        self.pca = None
        
    def transform(self, sample_data):
        if self.sample_data is None:
            self.sample_data = sample_data
            self.sample_size = len(self.sample_data)
            self.each_s_shape = self.sample_data[0].shape
            # Normalize data to [0,1] intervals. Supply the scale factor or
            # compute the maximum value among all the samples.
            self.maxs = np.max(sample_data)
            self.sample_normalized = np.array([np.ravel(s) / self.maxs for s in sample_data])
        if self.pca is None:
            self.pca = PCA()
            self.pca.fit(self.sample_normalized)
        return self.pca.transform(self.sample_normalized)
    
    def inverse_transform(self, transformed_data_per_sample):
        reconstructed_sample = self.pca.inverse_transform(transformed_data_per_sample)
        reconstructed_sample *= self.maxs
        return reconstructed_sample.reshape(self.each_s_shape)
    
    