# 時刻をキーに2つのテーブルを結合し、演算する
左からkey_time(時分秒ミリ秒),key_time(マイクロ秒ナノ秒),key_label,data_1_1,data_1_2,data_2_1,data_2_2というカラム名のテーブルが2つある。  
これらをkey_time(時分秒ミリ秒),key_time(マイクロ秒ナノ秒),key_labelをキーとして結合し、data_1_2 - data_1_1、data_2_2 - data_2_1の演算結果を比較したい場合を考える。

In [263]:
import pandas as pd
import polars as pl
import numpy as np

In [264]:
# カラム名定義
col_names = """
key_time(時分秒ミリ秒)
key_time(マイクロ秒ナノ秒)
key_label
data_1_1(時分秒ミリ秒)
data_1_1(マイクロ秒ナノ秒)
data_1_2(時分秒ミリ秒)
data_1_2(マイクロ秒ナノ秒)
data_2_1(時分秒ミリ秒)
data_2_1(マイクロ秒ナノ秒)
data_2_2(時分秒ミリ秒)
data_2_2(マイクロ秒ナノ秒)
"""
col_names = [col for col in col_names.split("\n")][1:-1]

# データ読み込み。先頭の0が消えないようにstrで読み込む
df_1 = pd.read_csv('input/time_table1.csv', names=col_names, dtype=str)
df_2 = pd.read_csv('input/time_table2.csv', names=col_names, dtype=str)

In [265]:
df_1

Unnamed: 0,key_time(時分秒ミリ秒),key_time(マイクロ秒ナノ秒),key_label,data_1_1(時分秒ミリ秒),data_1_1(マイクロ秒ナノ秒),data_1_2(時分秒ミリ秒),data_1_2(マイクロ秒ナノ秒),data_2_1(時分秒ミリ秒),data_2_1(マイクロ秒ナノ秒),data_2_2(時分秒ミリ秒),data_2_2(マイクロ秒ナノ秒)
0,93654000,123123,AAA,93654100,101000,93654350,102001,93654050,103002,93654350,102001
1,93654001,124123,BBB,93654101,101000,93654351,102001,93654051,103002,93654351,102001
2,93654003,124123,BBB,93654103,101000,93654353,102001,93654053,103002,93654353,102001
3,93654005,127123,BBB,93654105,101000,93654355,102001,93654055,103002,93654355,102001
4,93654002,125123,CCC,93654102,101000,93654352,102001,93654052,103002,93654352,102001
5,93654004,126123,AAA,93654104,101000,93654354,102001,93654054,103002,93654354,102001


In [266]:
df_2

Unnamed: 0,key_time(時分秒ミリ秒),key_time(マイクロ秒ナノ秒),key_label,data_1_1(時分秒ミリ秒),data_1_1(マイクロ秒ナノ秒),data_1_2(時分秒ミリ秒),data_1_2(マイクロ秒ナノ秒),data_2_1(時分秒ミリ秒),data_2_1(マイクロ秒ナノ秒),data_2_2(時分秒ミリ秒),data_2_2(マイクロ秒ナノ秒)
0,93654000,123123,AAA,93654110,101000,93654350,101001,93654150,101002,93654350,101001
1,93659999,999123,YYY,93659999,102000,93660100,102001,93661000,102002,93660100,102001
2,93654001,124123,BBB,93654111,103000,93654351,103001,93654151,103002,93654351,103001
3,93659002,999123,XXX,93659002,104000,93659802,104001,93659102,104002,93659802,104001


In [267]:
# 結合用keyを作成し、keyで昇順にして格納
df_1['key'] = df_1['key_time(時分秒ミリ秒)'] + df_1['key_time(マイクロ秒ナノ秒)']
df_2['key'] = df_2['key_time(時分秒ミリ秒)'] + df_2['key_time(マイクロ秒ナノ秒)']
df_1 = df_1.sort_values(by='key').copy()
df_2 = df_2.sort_values(by='key').copy()

# key_labelもキーとして使用する場合。多分最初のkey作る段階でlabel混ぜてもちゃんと昇順になるはず。
# df_1['key'] = df_1['key'] + df_1['key_label']

In [268]:
# 演算に必要なカラムのリストを定義
cols_ex = ['key'] + col_names[3:]

df_1_ex = df_1.loc[:, cols_ex]
df_2_ex = df_2.loc[:, cols_ex]

df_1_ex.head(1)

Unnamed: 0,key,data_1_1(時分秒ミリ秒),data_1_1(マイクロ秒ナノ秒),data_1_2(時分秒ミリ秒),data_1_2(マイクロ秒ナノ秒),data_2_1(時分秒ミリ秒),data_2_1(マイクロ秒ナノ秒),data_2_2(時分秒ミリ秒),data_2_2(マイクロ秒ナノ秒)
0,93654000123123,93654100,101000,93654350,102001,93654050,103002,93654350,102001


In [269]:
# iteratorを定義し、data_X_X(時分秒ミリ秒)、data_X_X(マイクロ秒ナノ秒)の組をタプルで定義する。
it = iter(cols_ex[1:])
data_pairs = []
while True:
    try:
        data_pairs.append((next(it), next(it)))
    except StopIteration:
        break

print(data_pairs)

[('data_1_1(時分秒ミリ秒)', 'data_1_1(マイクロ秒ナノ秒)'), ('data_1_2(時分秒ミリ秒)', 'data_1_2(マイクロ秒ナノ秒)'), ('data_2_1(時分秒ミリ秒)', 'data_2_1(マイクロ秒ナノ秒)'), ('data_2_2(時分秒ミリ秒)', 'data_2_2(マイクロ秒ナノ秒)')]


In [270]:
df_li = [df_1_ex.copy(), df_2_ex.copy()]
df_converted_dict = {}

for i, df in enumerate(df_li):
    # 変換後のdfを定義
    df_converted = df.copy()
        
    # data_X_X(時分秒ミリ秒)、data_X_X(マイクロ秒ナノ秒)をそれぞれマイクロ秒に変換する
    for data_hhmmssms, data_usns in data_pairs:
        # マイクロ秒変換後の新カラム名を定義
        new_col_name = data_hhmmssms.split( "(" )[0] + '_us'

        # マイクロ秒に変換し、結果をSeriesとして格納。
        convert_result = df[data_hhmmssms].apply(
                        lambda x: int(x[0:2]) * 60 * 60 * 1000 * 1000 + int(x[2:4]) * 60 * 1000 * 1000 + int(x[4:]) * 1000
                    ) + df[data_usns].astype(int) / 1000 

        # 新カラムの挿入位置を定義。data_X_X(マイクロ秒ナノ秒)の隣とする。
        insert_loc = df_converted.columns.get_loc(data_usns) + 1
        
        #変換した列を変換後のdfに挿入。強制inplaceであることに注意
        df_converted.insert(insert_loc, new_col_name, convert_result)
      
    
    # 各列の挿入結果を
    df_converted_dict[f'table:{i}'] = df_converted

    

In [271]:
df_converted_dict['table:0']

Unnamed: 0,key,data_1_1(時分秒ミリ秒),data_1_1(マイクロ秒ナノ秒),data_1_1_us,data_1_2(時分秒ミリ秒),data_1_2(マイクロ秒ナノ秒),data_1_2_us,data_2_1(時分秒ミリ秒),data_2_1(マイクロ秒ナノ秒),data_2_1_us,data_2_2(時分秒ミリ秒),data_2_2(マイクロ秒ナノ秒),data_2_2_us
0,93654000123123,93654100,101000,34614100000.0,93654350,102001,34614350000.0,93654050,103002,34614050000.0,93654350,102001,34614350000.0
1,93654001124123,93654101,101000,34614100000.0,93654351,102001,34614350000.0,93654051,103002,34614050000.0,93654351,102001,34614350000.0
4,93654002125123,93654102,101000,34614100000.0,93654352,102001,34614350000.0,93654052,103002,34614050000.0,93654352,102001,34614350000.0
2,93654003124123,93654103,101000,34614100000.0,93654353,102001,34614350000.0,93654053,103002,34614050000.0,93654353,102001,34614350000.0
5,93654004126123,93654104,101000,34614100000.0,93654354,102001,34614350000.0,93654054,103002,34614050000.0,93654354,102001,34614350000.0
3,93654005127123,93654105,101000,34614110000.0,93654355,102001,34614360000.0,93654055,103002,34614060000.0,93654355,102001,34614360000.0


In [272]:
df_converted_dict['table:1']

Unnamed: 0,key,data_1_1(時分秒ミリ秒),data_1_1(マイクロ秒ナノ秒),data_1_1_us,data_1_2(時分秒ミリ秒),data_1_2(マイクロ秒ナノ秒),data_1_2_us,data_2_1(時分秒ミリ秒),data_2_1(マイクロ秒ナノ秒),data_2_1_us,data_2_2(時分秒ミリ秒),data_2_2(マイクロ秒ナノ秒),data_2_2_us
0,93654000123123,93654110,101000,34614110000.0,93654350,101001,34614350000.0,93654150,101002,34614150000.0,93654350,101001,34614350000.0
2,93654001124123,93654111,103000,34614110000.0,93654351,103001,34614350000.0,93654151,103002,34614150000.0,93654351,103001,34614350000.0
3,93659002999123,93659002,104000,34619000000.0,93659802,104001,34619800000.0,93659102,104002,34619100000.0,93659802,104001,34619800000.0
1,93659999999123,93659999,102000,34620000000.0,93660100,102001,34620100000.0,93661000,102002,34621000000.0,93660100,102001,34620100000.0


In [275]:
# 演算対象のカラムを定義
target_cols = ['key'] + list(df_converted_dict['table:0'].columns[df_converted_dict['table:0'].columns.str.contains('.*_us$')])
df_1_converted = df_converted_dict['table:0'].loc[:, target_cols]
df_2_converted = df_converted_dict['table:1'].loc[:, target_cols]

print('-------- df_1_converted ----------- ')
print(df_1_converted)
print()
print('-------- df_2_converted ----------- ')
print(df_2_converted)

-------- df_1_converted ----------- 
               key   data_1_1_us   data_1_2_us   data_2_1_us   data_2_2_us
0  093654000123123  3.461410e+10  3.461435e+10  3.461405e+10  3.461435e+10
1  093654001124123  3.461410e+10  3.461435e+10  3.461405e+10  3.461435e+10
4  093654002125123  3.461410e+10  3.461435e+10  3.461405e+10  3.461435e+10
2  093654003124123  3.461410e+10  3.461435e+10  3.461405e+10  3.461435e+10
5  093654004126123  3.461410e+10  3.461435e+10  3.461405e+10  3.461435e+10
3  093654005127123  3.461411e+10  3.461436e+10  3.461406e+10  3.461436e+10

-------- df_2_converted ----------- 
               key   data_1_1_us   data_1_2_us   data_2_1_us   data_2_2_us
0  093654000123123  3.461411e+10  3.461435e+10  3.461415e+10  3.461435e+10
2  093654001124123  3.461411e+10  3.461435e+10  3.461415e+10  3.461435e+10
3  093659002999123  3.461900e+10  3.461980e+10  3.461910e+10  3.461980e+10
1  093659999999123  3.462000e+10  3.462010e+10  3.462100e+10  3.462010e+10


In [283]:
# 2つのテーブルを時刻で結合
df_result = df_1_converted.merge(df_2_converted, on='key', suffixes=['_left', '_right'])
df_result

Unnamed: 0,key,data_1_1_us_left,data_1_2_us_left,data_2_1_us_left,data_2_2_us_left,data_1_1_us_right,data_1_2_us_right,data_2_1_us_right,data_2_2_us_right
0,93654000123123,34614100000.0,34614350000.0,34614050000.0,34614350000.0,34614110000.0,34614350000.0,34614150000.0,34614350000.0
1,93654001124123,34614100000.0,34614350000.0,34614050000.0,34614350000.0,34614110000.0,34614350000.0,34614150000.0,34614350000.0


In [285]:
# 結合結果を使って演算
df_result = df_result.assign(
    diff_1_left = df_result['data_1_2_us_left'] - df_result['data_1_1_us_left'],
    diff_2_left = df_result['data_2_2_us_left'] - df_result['data_2_1_us_left'],
    diff_1_right = df_result['data_1_2_us_right'] - df_result['data_1_1_us_right'],
    diff_2_right = df_result['data_2_2_us_right'] - df_result['data_2_1_us_right']
)

df_result

Unnamed: 0,key,data_1_1_us_left,data_1_2_us_left,data_2_1_us_left,data_2_2_us_left,data_1_1_us_right,data_1_2_us_right,data_2_1_us_right,data_2_2_us_right,diff_1_left,diff_2_left,diff_1_right,diff_2_right
0,93654000123123,34614100000.0,34614350000.0,34614050000.0,34614350000.0,34614110000.0,34614350000.0,34614150000.0,34614350000.0,250001.000999,299998.999001,240000.000999,199999.999001
1,93654001124123,34614100000.0,34614350000.0,34614050000.0,34614350000.0,34614110000.0,34614350000.0,34614150000.0,34614350000.0,250001.000999,299998.999001,240000.000999,199999.999001
