In [1]:
import itertools

import pandas as pd
import numpy as np
from scipy.spatial.distance import squareform, cdist, pdist

import ovwmols

In [2]:
dfs = ovwmols.readFiles(['sample/test1.xyz','sample/test2.xyz'], 'XYZ')
#test1.xyzでそれぞれphenolのC2,C1,O,H
refAtomIndexes = [0,1,11,10]

In [3]:
dfs[0]

Unnamed: 0,elementSymbol,x,y,z
0,C,0.080457,-0.863302,0.000218
1,C,1.458946,-0.625066,1.1e-05
2,C,1.948032,0.685583,-0.000158
3,C,1.04999,1.757404,-9.9e-05
4,C,-0.327549,1.52762,0.00012
5,C,-0.808717,0.212371,0.000265
6,H,-0.276298,-1.888352,0.000345
7,H,3.020953,0.86802,-0.000329
8,H,1.435398,2.773253,-0.000223
9,H,-1.021912,2.362133,0.000171


In [4]:
dfs[1]

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
7,O,-1.296117,-1.181977,0.001276
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198


In [5]:
#後の原子の対応関係を調べる際に楽をするために元素記号でソートしておく
df0 = dfs[0].sort_values('elementSymbol')
df1 = dfs[1].sort_values('elementSymbol')
df0

Unnamed: 0,elementSymbol,x,y,z
0,C,0.080457,-0.863302,0.000218
1,C,1.458946,-0.625066,1.1e-05
2,C,1.948032,0.685583,-0.000158
3,C,1.04999,1.757404,-9.9e-05
4,C,-0.327549,1.52762,0.00012
5,C,-0.808717,0.212371,0.000265
6,H,-0.276298,-1.888352,0.000345
7,H,3.020953,0.86802,-0.000329
8,H,1.435398,2.773253,-0.000223
9,H,-1.021912,2.362133,0.000171


In [6]:
df1

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198
10,H,0.978066,-2.376464,-0.000788


In [7]:
#元素記号のソートの結果をrefAtomIndexesに反映する
#内容は変わらないが、df0のデータの並び順に合うようになる
refAtomIndexes = [i for i in df0.index if i in refAtomIndexes]
#df0のインデックスの順番は変わっているのでlocで取得
#refAtomIndexesを並び替えたので、df0refsも元素記号順になっている
df0refs =df0.loc[refAtomIndexes]

print(refAtomIndexes)
df0refs

[0, 1, 11, 10]


Unnamed: 0,elementSymbol,x,y,z
0,C,0.080457,-0.863302,0.000218
1,C,1.458946,-0.625066,1.1e-05
11,H,3.21043,-1.433662,-0.0002
10,O,2.288102,-1.720862,-4.7e-05


In [8]:
#
#df0refsとdf1内の原子の数を元素毎に求める
#後でdf0refsとdf1の原子間距離の比較をする際に、データを複製する必要があるのでその数を求める
#また、df0refs内に1つしかない元素を探すのも目的
#indexが元素記号、値が各元素の原子数のSeriesになる
#value_counts()では多い順に元素が並んでしまうため、sort_index()で元素記号順に並べ直す
df0refs_numEachElements = df0refs['elementSymbol'].value_counts().sort_index()
df1_numEachElements = df1['elementSymbol'].value_counts().sort_index()

# ref内で1つしかない元素を探す
onlyone_ref_symbolList = df0refs_numEachElements[df0refs_numEachElements==1].index.tolist()

print(df0refs_numEachElements)
print(df1_numEachElements)
print(onlyone_ref_symbolList)

C    2
H    1
O    1
Name: elementSymbol, dtype: int64
C    6
H    8
O    2
Name: elementSymbol, dtype: int64
['H', 'O']


In [9]:
df1_numEachElements.loc[onlyone_ref_symbolList]

H    8
O    2
Name: elementSymbol, dtype: int64

In [10]:
#onlyone_ref_symbolListに含まれる元素の内、df1内でも少ない元素を選定
ele = df1_numEachElements.loc[onlyone_ref_symbolList].idxmin()
ele

'O'

In [11]:
#df1中の各原子とele候補(df1内で1つだけとは限らない)の距離行列を計算
#df0refsの距離行列と比較することでdf1の原子を絞る
#
#df0refs内でのeleとの距離行列を計算
df0refs_eleIndex = np.where(df0refs['elementSymbol']==ele)
df0refs_dis = squareform(pdist(df0refs[['x','y','z']]))[df0refs_eleIndex]
#df1内でのele(候補)との距離行列を計算
df1_dis = cdist(df1[df1['elementSymbol']==ele][['x','y','z']], df1[['x','y','z']])
#
df0refs_dis

array([[2.36835506, 1.37414284, 0.9660087 , 0.        ]])

In [12]:
df1_dis

array([[3.68753408, 2.42718425, 1.36418849, 2.36525955, 3.64519993,
        4.17083721, 5.25635971, 0.97602038, 4.51034919, 2.56879578,
        2.6845307 , 4.56993013, 3.36628831, 3.36471049, 0.        ,
        2.83903112],
       [4.93656241, 3.59918163, 3.63193717, 4.98744471, 6.01622882,
        6.00759503, 7.01257554, 1.86650078, 7.02971372, 5.39348426,
        2.84707565, 5.31985713, 0.965995  , 0.96599867, 2.83903112,
        0.        ]])

In [13]:
df0refs

Unnamed: 0,elementSymbol,x,y,z
0,C,0.080457,-0.863302,0.000218
1,C,1.458946,-0.625066,1.1e-05
11,H,3.21043,-1.433662,-0.0002
10,O,2.288102,-1.720862,-4.7e-05


In [14]:
df1

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198
10,H,0.978066,-2.376464,-0.000788


In [15]:
#df0refs_disとdf1_disの原子対同士を比較するため、
#それぞれを列・行方向に複製し、差をとることで全組合せを比較
#→ゼロに近い=原子対がdf0とdf1とで対応
#まずはdf0refs_disについて
df0refs_disrepeat = np.repeat(df0refs_dis,
                              np.repeat(df1_numEachElements,df0refs_numEachElements), #[6,6,8,2]
                             )
df0refs_disrepeat = np.tile(df0refs_disrepeat,(2,1))
df0refs_disrepeat

array([[2.36835506, 2.36835506, 2.36835506, 2.36835506, 2.36835506,
        2.36835506, 1.37414284, 1.37414284, 1.37414284, 1.37414284,
        1.37414284, 1.37414284, 0.9660087 , 0.9660087 , 0.9660087 ,
        0.9660087 , 0.9660087 , 0.9660087 , 0.9660087 , 0.9660087 ,
        0.        , 0.        ],
       [2.36835506, 2.36835506, 2.36835506, 2.36835506, 2.36835506,
        2.36835506, 1.37414284, 1.37414284, 1.37414284, 1.37414284,
        1.37414284, 1.37414284, 0.9660087 , 0.9660087 , 0.9660087 ,
        0.9660087 , 0.9660087 , 0.9660087 , 0.9660087 , 0.9660087 ,
        0.        , 0.        ]])

In [16]:
print(df0refs_numEachElements)
print(df1_numEachElements)

C    2
H    1
O    1
Name: elementSymbol, dtype: int64
C    6
H    8
O    2
Name: elementSymbol, dtype: int64


In [17]:
#df1_disについて
df1_dis

array([[3.68753408, 2.42718425, 1.36418849, 2.36525955, 3.64519993,
        4.17083721, 5.25635971, 0.97602038, 4.51034919, 2.56879578,
        2.6845307 , 4.56993013, 3.36628831, 3.36471049, 0.        ,
        2.83903112],
       [4.93656241, 3.59918163, 3.63193717, 4.98744471, 6.01622882,
        6.00759503, 7.01257554, 1.86650078, 7.02971372, 5.39348426,
        2.84707565, 5.31985713, 0.965995  , 0.96599867, 2.83903112,
        0.        ]])

In [18]:
np.split(df1_dis, np.cumsum(df1_numEachElements), axis=1)[:-1]
#それぞれにtileをして、結合してやればいい

[array([[3.68753408, 2.42718425, 1.36418849, 2.36525955, 3.64519993,
         4.17083721],
        [4.93656241, 3.59918163, 3.63193717, 4.98744471, 6.01622882,
         6.00759503]]),
 array([[5.25635971, 0.97602038, 4.51034919, 2.56879578, 2.6845307 ,
         4.56993013, 3.36628831, 3.36471049],
        [7.01257554, 1.86650078, 7.02971372, 5.39348426, 2.84707565,
         5.31985713, 0.965995  , 0.96599867]]),
 array([[0.        , 2.83903112],
        [2.83903112, 0.        ]])]

In [19]:
df1_disrepeat = np.hstack(
    [np.tile(ar, n) for ar, n in zip(
        np.split(df1_dis, np.cumsum(df1_numEachElements), axis=1)[:-1],
        df0refs_numEachElements
    )]
)
df1_disrepeat

array([[3.68753408, 2.42718425, 1.36418849, 2.36525955, 3.64519993,
        4.17083721, 3.68753408, 2.42718425, 1.36418849, 2.36525955,
        3.64519993, 4.17083721, 5.25635971, 0.97602038, 4.51034919,
        2.56879578, 2.6845307 , 4.56993013, 3.36628831, 3.36471049,
        0.        , 2.83903112],
       [4.93656241, 3.59918163, 3.63193717, 4.98744471, 6.01622882,
        6.00759503, 4.93656241, 3.59918163, 3.63193717, 4.98744471,
        6.01622882, 6.00759503, 7.01257554, 1.86650078, 7.02971372,
        5.39348426, 2.84707565, 5.31985713, 0.965995  , 0.96599867,
        2.83903112, 0.        ]])

In [20]:
delta = np.abs(df1_disrepeat - df0refs_disrepeat)
delta

array([[1.31917902e+00, 5.88291905e-02, 1.00416657e+00, 3.09550275e-03,
        1.27684487e+00, 1.80248215e+00, 2.31339123e+00, 1.05304140e+00,
        9.95435272e-03, 9.91116711e-01, 2.27105708e+00, 2.79669437e+00,
        4.29035101e+00, 1.00116808e-02, 3.54434049e+00, 1.60278709e+00,
        1.71852200e+00, 3.60392144e+00, 2.40027961e+00, 2.39870179e+00,
        0.00000000e+00, 2.83903112e+00],
       [2.56820736e+00, 1.23082657e+00, 1.26358211e+00, 2.61908966e+00,
        3.64787376e+00, 3.63923997e+00, 3.56241957e+00, 2.22503879e+00,
        2.25779433e+00, 3.61330187e+00, 4.64208598e+00, 4.63345218e+00,
        6.04656685e+00, 9.00492080e-01, 6.06370503e+00, 4.42747556e+00,
        1.88106696e+00, 4.35384843e+00, 1.36945483e-05, 1.00231864e-05,
        2.83903112e+00, 0.00000000e+00]])

In [21]:
# trueの箇所は原子対が対応していた場所(と思われる)(閾値の関係)
( delta < 0.5 ) & ( delta > 0 )

array([[False,  True, False,  True, False, False, False, False,  True,
        False, False, False, False,  True, False, False, False, False,
        False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
         True,  True, False, False]])

In [22]:
delta == 0

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False,  True, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False,  True]])

In [23]:
matchingResult = ( delta < 0.5 )
matchingResult

array([[False,  True, False,  True, False, False, False, False,  True,
        False, False, False, False,  True, False, False, False, False,
        False, False,  True, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
         True,  True, False,  True]])

In [24]:
#O-Cペアの比較
#行:O
#列:C
matchingResult[:,0:12]

array([[False,  True, False,  True, False, False, False, False,  True,
        False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False]])

In [25]:
#[i]はdf0refs.iloc[i]のC原子
#[*][i]はdf1[df1['elementSymbol']==ele].iloc[i]のO原子
#[*][*][i]はdf1[df1['elementSymbol']=='C'].iloc[i]のC原子
#
#なので、
#ele = df1[df1['elementSymbol']==ele].iloc[0]のとき
#    df0refs.iloc[0]とはdf1[df1['elementSymbol']=='C'].iloc[1]または[3]が対応しうる ([False,  True, False,  True, False, False]より
#    df0refs.iloc[1]とはdf1[df1['elementSymbol']=='C'].iloc[2]が対応しうる ([False, False,  True, False, False, False]より
np.split(matchingResult[:,0:12],2,axis=1)

#これを一般化する

[array([[False,  True, False,  True, False, False],
        [False, False, False, False, False, False]]),
 array([[False, False,  True, False, False, False],
        [False, False, False, False, False, False]])]

In [26]:
np.split(matchingResult,
         np.cumsum(np.repeat(df1_numEachElements,df0refs_numEachElements)),
         axis=1)[:-1]
#[6,6,8,2]に分割
#それぞれ順にdf0refsの原子に対応している

[array([[False,  True, False,  True, False, False],
        [False, False, False, False, False, False]]),
 array([[False, False,  True, False, False, False],
        [False, False, False, False, False, False]]),
 array([[False,  True, False, False, False, False, False, False],
        [False, False, False, False, False, False,  True,  True]]),
 array([[ True, False],
        [False,  True]])]

In [27]:
#df1[df1['elementSymbol']==ele].iloc[0]の原子とdf0refs[df0refs['elementSymbol']==ele]の原子が対応しているとき
np.split(matchingResult[0],
         np.cumsum(np.repeat(df1_numEachElements,df0refs_numEachElements))
        )[:-1]
#[0,1,2,3,4,5],
#[0,1,2,3,4,5],
#[6,7,8,9,10,11,12,13],
#[14,15]

[array([False,  True, False,  True, False, False]),
 array([False, False,  True, False, False, False]),
 array([False,  True, False, False, False, False, False, False]),
 array([ True, False])]

In [28]:
np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1]

[array([0, 1, 2, 3, 4, 5]),
 array([ 6,  7,  8,  9, 10, 11, 12, 13]),
 array([14, 15])]

In [29]:
np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)

  return array(a, dtype, copy=False, order=order)


array([array([0, 1, 2, 3, 4, 5]), array([0, 1, 2, 3, 4, 5]),
       array([ 6,  7,  8,  9, 10, 11, 12, 13]), array([14, 15])],
      dtype=object)

In [30]:
for matchingResult_df0refsi, df1_eleiidxs in zip(
                np.split(matchingResult[0],
                         np.cumsum(np.repeat(df1_numEachElements,df0refs_numEachElements))
                        )[:-1],
                np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)
            ):
    print(df1_eleiidxs[np.where(matchingResult_df0refsi)])
    #[0] <-> [1,3] : df0refs.iloc[0]はdf1.iloc[1]かdf1.iloc[3]に対応
    #[1] <-> [2]   : df0refs.iloc[1]はdf1.iloc[2]に対応
    #[2] <-> [7]   : df0refs.iloc[2]はdf1.iloc[7]に対応
    #[3] <-> [14]  : df0refs.iloc[3]はdf1.iloc[14]に対応

[1 3]
[2]
[7]
[14]


In [31]:
for matchingResult_df0refsi, df1_eleiidxs in zip(
                np.split(matchingResult[0],
                         np.cumsum(np.repeat(df1_numEachElements,df0refs_numEachElements))
                        )[:-1],
                np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)
            ):
    print(df1_eleiidxs[np.where(matchingResult_df0refsi)])
    print(df1.iloc[df1_eleiidxs[np.where(matchingResult_df0refsi)].tolist()].index)
    print('----')

[1 3]
Int64Index([1, 3], dtype='int64')
----
[2]
Int64Index([2], dtype='int64')
----
[7]
Int64Index([8], dtype='int64')
----
[14]
Int64Index([7], dtype='int64')
----


In [32]:
df1

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198
10,H,0.978066,-2.376464,-0.000788


In [33]:
X_formOrigin = [
    [
        df1_eleiidxs[np.where(matchingResult_df0refsi)].tolist() for matchingResult_df0refsi, df1_eleiidxs in zip(
                    np.split(matchingResult[i],
                             np.cumsum(np.repeat(df1_numEachElements,df0refs_numEachElements))
                            )[:-1],
                    np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)
                )
    ] for i in range(len(matchingResult))
]
X_formOrigin
#matchingResult[1]も含めて配列化
#
#ここから、直積を生成する必要がある -> itertools.product()
#(X構築のため)(df1をdf0に合わせるように変換させるので、df1はX構築に使われる)
#今回の場合
#[1,2,7,14], [3,2,7,14]の2通り

  return array(a, dtype, copy=False, order=order)


[[[1, 3], [2], [7], [14]], [[], [], [12, 13], [15]]]

In [34]:
for i in range(len(X_formOrigin)):
    for X_formIndexes in itertools.product(*X_formOrigin[i]):
        print(X_formIndexes)
        print(df1.iloc[list(X_formIndexes)])
        print('----')
        #ilocで取得
        #indexとX_formIndexesが一致していないのは元素記号で並び替えたため（問題なし
        #この原子がdf0refsと順に対応する(可能性がある)ことになる
        #因みに正解は2番目の[3,2,7,14](X_formIndexes)、換言してdf1のインデックスで[3,2,8,7]の組

(1, 2, 7, 14)
  elementSymbol         x         y         z
1             C -0.012837  0.878221  0.001362
2             C -0.102538 -0.521387  0.000770
8             H -2.043843 -0.554664  0.001191
7             O -1.296117 -1.181977  0.001276
----
(3, 2, 7, 14)
  elementSymbol         x         y         z
3             C  1.066458 -1.294624 -0.000390
2             C -0.102538 -0.521387  0.000770
8             H -2.043843 -0.554664  0.001191
7             O -1.296117 -1.181977  0.001276
----


In [35]:
l1 = [[[1,3],[2],[7],[13,14]], [[0],[5],[7,8],[17]]]
for i in range(len(l1)):
    for formIndexes in itertools.product(*l1[i]):
        print(formIndexes)

(1, 2, 7, 13)
(1, 2, 7, 14)
(3, 2, 7, 13)
(3, 2, 7, 14)
(0, 5, 7, 17)
(0, 5, 8, 17)


In [36]:
[formIndexes for formIndexes in itertools.chain.from_iterable([itertools.product(*X_formOrigin[i]) for i in range(len(X_formOrigin))])]

[(1, 2, 7, 14), (3, 2, 7, 14)]

In [37]:
#R,tを計算し、誤差を評価する
minimumScore = np.inf
Y = df0refs[['x','y','z']].values
df1xyz = df1[['x','y','z']]
for i in range(len(X_formOrigin)):
    for X_formIndexes in itertools.product(*X_formOrigin[i]):
        X = df1xyz.iloc[list(X_formIndexes)].values
        R, t, score = ovwmols.estimate_conversionParameter(X, Y)
        print(score)
        print('----')
        if score < minimumScore:
            minimumScore = score
            minimumX_formIndexes = X_formIndexes
            minimumR = R
            minimumt = t

print('minimum score:{}'.format(minimumScore))
print('X:{}'.format(df0refs))
print('Y:{}'.format(df1.iloc[list(minimumX_formIndexes)]))
print('R:{}'.format(minimumR))
print('t:{}'.format(minimumt))

1.4889986805291908
----
0.0002586661734124043
----
minimum score:0.0002586661734124043
X:   elementSymbol         x         y         z
0              C  0.080457 -0.863302  0.000218
1              C  1.458946 -0.625066  0.000011
11             H  3.210430 -1.433662 -0.000200
10             O  2.288102 -1.720862 -0.000047
Y:  elementSymbol         x         y         z
3             C  1.066458 -1.294624 -0.000390
2             C -0.102538 -0.521387  0.000770
8             H -2.043843 -0.554664  0.001191
7             O -1.296117 -1.181977  0.001276
R:[[-9.15272526e-01  4.02835032e-01  3.74201589e-04]
 [ 4.02835020e-01  9.15272601e-01 -1.10633877e-04]
 [-3.87063663e-04  4.94813560e-05 -9.99999924e-01]]
t:[ 1.57358562e+00 -1.08523632e-01  5.21277769e-04]


In [38]:
#df1全体を変換して、df0に合わせてみる
df0

Unnamed: 0,elementSymbol,x,y,z
0,C,0.080457,-0.863302,0.000218
1,C,1.458946,-0.625066,1.1e-05
2,C,1.948032,0.685583,-0.000158
3,C,1.04999,1.757404,-9.9e-05
4,C,-0.327549,1.52762,0.00012
5,C,-0.808717,0.212371,0.000265
6,H,-0.276298,-1.888352,0.000345
7,H,3.020953,0.86802,-0.000329
8,H,1.435398,2.773253,-0.000223
9,H,-1.021912,2.362133,0.000171


In [39]:
#df1の変換前と変換後
#だいたいdf0と合っている
pd.concat([df1, df1[['x','y','z']] @ R.T + t], axis=1)

Unnamed: 0,elementSymbol,x,y,z,0,1,2
0,C,1.242635,1.492466,0.000579,1.037454,1.758066,-0.000465
1,C,-0.012837,0.878221,0.001362,1.939114,0.690117,-0.000792
2,C,-0.102538,-0.521387,0.00077,1.457403,-0.627041,-0.000235
3,C,1.066458,-1.294624,-0.00039,0.075966,-0.863851,0.000434
4,C,2.312751,-0.668607,-0.001069,-0.812551,0.211176,0.000662
5,C,2.411912,0.727578,-0.000634,-0.340877,1.529011,0.000258
6,H,3.38451,1.209893,-0.00121,-1.036777,2.362258,0.000481
8,H,-2.043843,-0.554664,0.001191,3.220821,-1.439524,9.4e-05
9,H,3.213234,-1.276807,-0.00198,-1.881743,0.017253,0.001194
10,H,0.978066,-2.376464,-0.000788,-0.278935,-1.889637,0.000813


In [40]:
#総当たり式
#意外とこれでも早い
#一応どっちも実装しておく
X_formOrigin_test = [[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], [6,7,8,9,10,11,12,13], [14,15]]
for X_formIndexes in itertools.product(*X_formOrigin_test):
    X = df1xyz.iloc[list(X_formIndexes)].values
    R, t, score = ovwmols.estimate_conversionParameter(X, Y)
    print(score)
    print('----')
    if score < minimumScore:
        minimumScore = score
        minimumX_formIndexes = X_formIndexes
        minimumR = R
        minimumt = t
        
print('minimum score:{}'.format(minimumScore))
print('X:{}'.format(df0refs))
print('Y:{}'.format(df1.iloc[list(minimumX_formIndexes)]))
print('R:{}'.format(minimumR))
print('t:{}'.format(minimumt))

13.555278044420955
----
26.297024602974645
----
3.742045768265428
----
7.745807223691062
----
9.006243878636349
----
22.551129919190963
----
4.902157494278358
----
14.99276144024698
----
4.976842702327696
----
8.938942914864368
----
12.839048132646504
----
16.64921569743836
----
9.082555984637947
----
10.564015264500824
----
9.060484534569847
----
10.541040665310936
----
14.726716603807304
----
27.189949801073926
----
1.177731345785403
----
4.500016688598768
----
9.830397925529775
----
22.694084090752106
----
4.292534932134083
----
13.470447113146491
----
4.130513204836515
----
7.380140577382016
----
13.974897671075253
----
16.743666187996233
----
5.791571681969653
----
6.225526128383811
----
5.777651263970056
----
6.213704381421495
----
17.287873176563068
----
30.538648442319477
----
1.99650943249255
----
4.508952006877565
----
10.507953719107933
----
24.09833276565314
----
3.555395126121541
----
13.387647700426308
----
6.478253441594928
----
8.619660440246816
----
16.302659523074333


18.530111169355813
----
4.976675803763515
----
13.127579703032415
----
9.13117645766631
----
18.182750449896307
----
12.456580487218648
----
18.2630159885758
----
12.429386605466593
----
18.238098324785835
----
14.744645328968932
----
26.17463731645913
----
5.260822121761779
----
14.156947682126628
----
13.864831215506713
----
30.19592380909094
----
7.7872173666028
----
20.9470232380163
----
4.919676942081196
----
12.247909195422732
----
8.905489089025254
----
17.031168392220703
----
12.917802565994682
----
18.30837498886733
----
12.88877048189983
----
18.278476207538034
----
18.517052915618322
----
28.710611840399224
----
3.829741034610839
----
10.821116113464623
----
12.219339176092356
----
26.449571074904316
----
7.21326341066074
----
18.165826472121758
----
4.125737065427021
----
9.543893268492397
----
10.425839159563958
----
16.224364932959478
----
10.133392429401441
----
13.420442264036556
----
10.108087275161218
----
13.39488625480915
----
20.51097688998607
----
30.1353169192476

In [41]:
X_formIndexes

(5, 5, 13, 15)

In [42]:
df1.iloc[list(X_formIndexes)]

Unnamed: 0,elementSymbol,x,y,z
5,C,2.411912,0.727578,-0.000634
5,C,2.411912,0.727578,-0.000634
15,H,-4.159728,0.404029,-0.776923
13,O,-3.590963,0.489482,-0.000806


In [43]:
df1.iloc[list(X_formIndexes)].index

Int64Index([5, 5, 15, 13], dtype='int64')

In [44]:
df1

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198
10,H,0.978066,-2.376464,-0.000788


In [45]:
np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)

  return array(a, dtype, copy=False, order=order)


array([array([0, 1, 2, 3, 4, 5]), array([0, 1, 2, 3, 4, 5]),
       array([ 6,  7,  8,  9, 10, 11, 12, 13]), array([14, 15])],
      dtype=object)

In [46]:
np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements).tolist()

[array([0, 1, 2, 3, 4, 5]),
 array([0, 1, 2, 3, 4, 5]),
 array([ 6,  7,  8,  9, 10, 11, 12, 13]),
 array([14, 15])]

In [47]:
list(itertools.product(*np.repeat(np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1], df0refs_numEachElements)))

[(0, 0, 6, 14),
 (0, 0, 6, 15),
 (0, 0, 7, 14),
 (0, 0, 7, 15),
 (0, 0, 8, 14),
 (0, 0, 8, 15),
 (0, 0, 9, 14),
 (0, 0, 9, 15),
 (0, 0, 10, 14),
 (0, 0, 10, 15),
 (0, 0, 11, 14),
 (0, 0, 11, 15),
 (0, 0, 12, 14),
 (0, 0, 12, 15),
 (0, 0, 13, 14),
 (0, 0, 13, 15),
 (0, 1, 6, 14),
 (0, 1, 6, 15),
 (0, 1, 7, 14),
 (0, 1, 7, 15),
 (0, 1, 8, 14),
 (0, 1, 8, 15),
 (0, 1, 9, 14),
 (0, 1, 9, 15),
 (0, 1, 10, 14),
 (0, 1, 10, 15),
 (0, 1, 11, 14),
 (0, 1, 11, 15),
 (0, 1, 12, 14),
 (0, 1, 12, 15),
 (0, 1, 13, 14),
 (0, 1, 13, 15),
 (0, 2, 6, 14),
 (0, 2, 6, 15),
 (0, 2, 7, 14),
 (0, 2, 7, 15),
 (0, 2, 8, 14),
 (0, 2, 8, 15),
 (0, 2, 9, 14),
 (0, 2, 9, 15),
 (0, 2, 10, 14),
 (0, 2, 10, 15),
 (0, 2, 11, 14),
 (0, 2, 11, 15),
 (0, 2, 12, 14),
 (0, 2, 12, 15),
 (0, 2, 13, 14),
 (0, 2, 13, 15),
 (0, 3, 6, 14),
 (0, 3, 6, 15),
 (0, 3, 7, 14),
 (0, 3, 7, 15),
 (0, 3, 8, 14),
 (0, 3, 8, 15),
 (0, 3, 9, 14),
 (0, 3, 9, 15),
 (0, 3, 10, 14),
 (0, 3, 10, 15),
 (0, 3, 11, 14),
 (0, 3, 11, 15),
 (0, 3, 12, 

In [48]:
[df1.iloc[idxs].index for idxs in np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1]]

[Int64Index([0, 1, 2, 3, 4, 5], dtype='int64'),
 Int64Index([6, 8, 9, 10, 11, 12, 14, 15], dtype='int64'),
 Int64Index([7, 13], dtype='int64')]

In [49]:
df1

Unnamed: 0,elementSymbol,x,y,z
0,C,1.242635,1.492466,0.000579
1,C,-0.012837,0.878221,0.001362
2,C,-0.102538,-0.521387,0.00077
3,C,1.066458,-1.294624,-0.00039
4,C,2.312751,-0.668607,-0.001069
5,C,2.411912,0.727578,-0.000634
6,H,3.38451,1.209893,-0.00121
8,H,-2.043843,-0.554664,0.001191
9,H,3.213234,-1.276807,-0.00198
10,H,0.978066,-2.376464,-0.000788


In [50]:
np.repeat(
            [df1.iloc[idxs].index for idxs in np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1]],
            df0refs_numEachElements)

  return array(a, dtype, copy=False, order=order)


array([Int64Index([0, 1, 2, 3, 4, 5], dtype='int64'),
       Int64Index([0, 1, 2, 3, 4, 5], dtype='int64'),
       Int64Index([6, 8, 9, 10, 11, 12, 14, 15], dtype='int64'),
       Int64Index([7, 13], dtype='int64')], dtype=object)

In [51]:
list(
    itertools.product(*np.repeat(
            [df1.iloc[idxs].index for idxs in np.split(range(len(df1)),np.cumsum(df1_numEachElements))[:-1]],
            df0refs_numEachElements)
        )
)

[(0, 0, 6, 7),
 (0, 0, 6, 13),
 (0, 0, 8, 7),
 (0, 0, 8, 13),
 (0, 0, 9, 7),
 (0, 0, 9, 13),
 (0, 0, 10, 7),
 (0, 0, 10, 13),
 (0, 0, 11, 7),
 (0, 0, 11, 13),
 (0, 0, 12, 7),
 (0, 0, 12, 13),
 (0, 0, 14, 7),
 (0, 0, 14, 13),
 (0, 0, 15, 7),
 (0, 0, 15, 13),
 (0, 1, 6, 7),
 (0, 1, 6, 13),
 (0, 1, 8, 7),
 (0, 1, 8, 13),
 (0, 1, 9, 7),
 (0, 1, 9, 13),
 (0, 1, 10, 7),
 (0, 1, 10, 13),
 (0, 1, 11, 7),
 (0, 1, 11, 13),
 (0, 1, 12, 7),
 (0, 1, 12, 13),
 (0, 1, 14, 7),
 (0, 1, 14, 13),
 (0, 1, 15, 7),
 (0, 1, 15, 13),
 (0, 2, 6, 7),
 (0, 2, 6, 13),
 (0, 2, 8, 7),
 (0, 2, 8, 13),
 (0, 2, 9, 7),
 (0, 2, 9, 13),
 (0, 2, 10, 7),
 (0, 2, 10, 13),
 (0, 2, 11, 7),
 (0, 2, 11, 13),
 (0, 2, 12, 7),
 (0, 2, 12, 13),
 (0, 2, 14, 7),
 (0, 2, 14, 13),
 (0, 2, 15, 7),
 (0, 2, 15, 13),
 (0, 3, 6, 7),
 (0, 3, 6, 13),
 (0, 3, 8, 7),
 (0, 3, 8, 13),
 (0, 3, 9, 7),
 (0, 3, 9, 13),
 (0, 3, 10, 7),
 (0, 3, 10, 13),
 (0, 3, 11, 7),
 (0, 3, 11, 13),
 (0, 3, 12, 7),
 (0, 3, 12, 13),
 (0, 3, 14, 7),
 (0, 3, 14, 13),
 