# logチェック

In [1]:
import os
import glob
import re
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
# 3桁区切りのカンマ形式での表示設定
pd.options.display.float_format = '{:,.0f}'.format
from scipy.stats import gmean

## logファイル読み込み

In [2]:
def extract_data_from_logs(log_contents):
    entries = re.split(r'(?=====================)', log_contents)
    data_list = []

    for entry in entries:
        if not entry.strip():
            continue

        # if not re.search(r'Processing input: (\d+)', entry):
        #     print(f"Skipped entry: {entry[:100]}...")  # 先頭100文字だけを表示
        #     continue

        data = {}

        data['input'] = re.search(r'Processing input: (\d+)', entry).group(1) if re.search(r'Processing input: (\d+)', entry) else None
        data['L'] = int(re.search(r'L=(\d+)', entry).group(1)) if re.search(r'L=(\d+)', entry) else None
        data['N'] = int(re.search(r'N=(\d+)', entry).group(1)) if re.search(r'N=(\d+)', entry) else None
        data['S'] = int(re.search(r'S=(\d+)', entry).group(1)) if re.search(r'S=(\d+)', entry) else None
        data['pass_flg'] = re.search(r'pass_flg=(\w+)', entry).group(1) == 'True' if re.search(r'pass_flg=(\w+)', entry) else None

        temp_search = re.search(r'max_temp(erture|erature)=(\d+)', entry)
        data['max_temperature'] = int(temp_search.group(2)) if temp_search else None

        data['interval'] = int(re.search(r'interval=(\d+)', entry).group(1)) if re.search(r'interval=(\d+)', entry) else None
        data['interval_num'] = int(re.search(r'interval_num=(\d+)', entry).group(1)) if re.search(r'interval_num=(\d+)', entry) else None

        data['Score'] = int(re.search(r'Score = (\d+)', entry).group(1)) if re.search(r'Score = (\d+)', entry) else None
        data['Number of wrong answers'] = int(re.search(r'Number of wrong answers = (\d+)', entry).group(1)) if re.search(r'Number of wrong answers = (\d+)', entry) else None
        data['Placement cost'] = int(re.search(r'Placement cost = (\d+)', entry).group(1)) if re.search(r'Placement cost = (\d+)', entry) else None
        data['Measurement cost'] = int(re.search(r'Measurement cost = (\d+)', entry).group(1)) if re.search(r'Measurement cost = (\d+)', entry) else None
        data['Measurement count'] = int(re.search(r'Measurement count = (\d+)', entry).group(1)) if re.search(r'Measurement count = (\d+)', entry) else None

        data_list.append(data)

    return data_list

In [3]:
# for path in glob.glob('./log*.txt'):
#     print(path)

In [4]:
df = pd.DataFrame()
for path in glob.glob('log*.txt'):
    # print(path)
    # サンプルのログファイルを読み込む
    with open(path, 'r') as file:
        log_contents = file.read()
    data_list = extract_data_from_logs(log_contents)
    # for data in data_list:
    #     print(data)
    _df = pd.DataFrame(data_list)
    _df['path'] = path    
    df = pd.concat([df, _df])

In [5]:
df.dropna(subset=['input'], inplace=True)
df

Unnamed: 0,input,L,N,S,pass_flg,max_temperature,interval,interval_num,Score,Number of wrong answers,Placement cost,Measurement cost,Measurement count,path
0,0000,50,95,36,,71,71,2,10583953,0,4769266,4579000,3990,log_049.txt
1,0001,35,66,289,,567,567,2,502098,0,196252750,2811600,2442,log_049.txt
2,0002,23,89,324,,636,636,2,536399,0,182234622,4094000,3560,log_049.txt
3,0003,35,61,4,,32,8,5,85393451,0,137750,933300,854,log_049.txt
4,0004,33,81,529,,1000,1000,2,150886,0,656231144,6423300,5589,log_049.txt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0095,23,94,225,,441,441,2,1066622,0,87280744,6373200,5546,log_050.txt
96,0096,26,72,900,,1000,1000,2,100538,3,497636566,11524800,10000,log_050.txt
97,0097,33,87,529,,1000,1000,2,145796,0,680991704,4802400,4176,log_050.txt
98,0098,26,84,676,,1000,1000,2,144662,1,541394928,11520800,10000,log_050.txt


## 合計集計

In [6]:
df.groupby('path')['Score'].sum().sort_values(ascending=False).head(20)

path
log_048.txt   1,350,637,143
log_046.txt   1,334,709,950
log_050.txt   1,324,992,024
log_047.txt   1,320,661,568
log_049.txt   1,310,339,840
log_045.txt   1,292,642,653
log_042.txt   1,286,659,414
log_027.txt   1,282,359,324
log_026.txt   1,275,975,391
log_040.txt   1,272,054,760
log_025.txt   1,271,808,339
log_032.txt   1,270,705,461
log_039.txt   1,270,084,562
log_038.txt   1,265,478,503
log_037.txt   1,263,911,822
log_044.txt   1,260,475,496
log_028.txt   1,254,777,625
log_030.txt   1,240,661,142
log_043.txt   1,238,350,037
log_041.txt   1,226,692,653
Name: Score, dtype: float64

In [7]:
top_path = df.groupby('path')['Score'].sum().sort_values(ascending=False)[:15].index.tolist()
top_path.append('log.txt')

In [8]:
df[df['path'].isin(top_path)].pivot_table(values='Score', index=['S'], columns='path', aggfunc='mean')

path,log.txt,log_025.txt,log_026.txt,log_027.txt,log_032.txt,log_037.txt,log_038.txt,log_039.txt,log_040.txt,log_042.txt,log_045.txt,log_046.txt,log_047.txt,log_048.txt,log_049.txt,log_050.txt
S,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,129582241,132942564,132942564,132942564,132942564,132942564,132942564,132942564,132942564,132942564,131592212,152291916,168074233,168074233,158574696,162237742
4,74523175,89301668,89301668,89301668,89301668,89301668,89301668,89301668,89301668,89301668,92289806,94349184,78501176,91432434,83038371,83038371
9,36596640,33482849,33482849,39905098,33482849,39905098,39905098,39905098,39905098,39905098,50636932,44950045,37671789,37671789,43958991,43958991
16,21196718,19571856,25480078,29514290,19571856,29514290,29514290,29514290,29514290,29514290,21476477,24660520,20398531,20398531,18059478,18059478
25,17263651,20386965,22121783,20986790,20386965,20986790,20986790,20986790,20986790,20986790,18985437,16934349,19866056,19866056,17565204,17565204
36,12673518,17004187,17479371,16170801,17004187,16170801,16170801,16170801,16170801,16170801,16270955,13993540,14181480,14523884,14714904,14714904
49,5674142,11309193,8171784,6022606,11309193,6022606,6022606,6022606,6022606,6022606,7873248,9659048,9648465,11134353,11134353,11134353
81,4414490,7804300,6643325,5369374,7804300,5114670,5271189,5481777,5595408,5369374,5808572,5423569,5333442,5333442,5275557,5275557
100,6783421,10170327,8344967,7914594,10170327,7508136,7508136,7632618,6596902,7914594,5018211,3792388,5760879,5760879,6918862,6918862
121,1218024,2690838,1956483,1491308,2690838,1875416,1875416,2017354,2214688,1491308,2327698,2596334,1930638,1930638,1930638,1930638


In [9]:
df[df['path'].isin(top_path)].pivot_table(values='Score', index=['S', 'input'], columns='path', aggfunc='sum')

Unnamed: 0_level_0,path,log.txt,log_025.txt,log_026.txt,log_027.txt,log_032.txt,log_037.txt,log_038.txt,log_039.txt,log_040.txt,log_042.txt,log_045.txt,log_046.txt,log_047.txt,log_048.txt,log_049.txt,log_050.txt
S,input,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,10,141721750,154918668,154918668,154918668,154918668,154918668,154918668,154918668,154918668,154918668,143264834,131702332,136640769,136640769,94928899,131914288
1,13,102933608,85145804,85145804,85145804,85145804,85145804,85145804,85145804,85145804,85145804,107683428,134692833,146909754,146909754,146909754,140726543
1,34,142726650,144757604,144757604,144757604,144757604,144757604,144757604,144757604,144757604,144757604,154581964,220618084,233618660,233618660,237332385,226825263
1,88,130946957,146948181,146948181,146948181,146948181,146948181,146948181,146948181,146948181,146948181,120838621,122154413,155127748,155127748,155127748,149484876
4,3,60769513,98506834,98506834,98506834,98506834,98506834,98506834,98506834,98506834,98506834,92043570,100611112,102181577,102181577,85393451,85393451
4,55,88276837,80096501,80096501,80096501,80096501,80096501,80096501,80096501,80096501,80096501,92536043,88087256,54820775,80683291,80683291,80683291
9,30,31237076,29325325,29325325,26692021,29325325,26692021,26692021,26692021,26692021,26692021,39098086,38078412,41544906,41544906,28978183,28978183
9,41,34400104,35713087,35713087,55049710,35713087,55049710,55049710,55049710,55049710,55049710,58363556,58363556,51391899,51391899,51391899,51391899
9,79,40664689,33752518,33752518,42266606,33752518,42266606,42266606,42266606,42266606,42266606,54186798,46206664,29928149,29928149,49202526,49202526
9,82,40084691,35140467,35140467,35612054,35140467,35612054,35612054,35612054,35612054,35612054,50899289,37151547,27822201,27822201,46263356,46263356


## 幾何平均順

In [10]:
df.groupby('path').agg(geo_mean=('Score', lambda x: gmean(x))).sort_values('geo_mean', ascending=False)[:10]

Unnamed: 0_level_0,geo_mean
path,Unnamed: 1_level_1
log_042.txt,1835705
log_045.txt,1778564
log_046.txt,1727189
log_044.txt,1711663
log_040.txt,1618016
log_041.txt,1611322
log_039.txt,1527734
log_047.txt,1488324
log_048.txt,1481702
log_050.txt,1476867


In [129]:
df[df['path'].isin(top_path)].pivot_table(values='Score', index=['S'], columns='path', aggfunc=gmean)

path,log.txt,log_025.txt,log_026.txt,log_027.txt,log_028.txt,log_032.txt,log_037.txt,log_038.txt,log_039.txt,log_040.txt,log_042.txt,log_044.txt,log_045.txt,log_046.txt,log_047.txt
S,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,164231699,129424993,129424993,129424993,119599205,129424993,129424993,129424993,129424993,129424993,129424993,147597970,130290654,147867270,164231699
4,74844327,88825969,88825969,88825969,96680496,88825969,88825969,88825969,88825969,88825969,88825969,94528091,92289478,94141153,74844327
9,36514970,33384585,33384585,38564058,42067735,33384585,38564058,38564058,38564058,38564058,38564058,24802492,50087068,44195243,36514970
16,19729804,19547911,25121616,28211018,31866745,19547911,28211018,28211018,28211018,28211018,28211018,21277326,21277326,24168832,19729804
25,19522830,20350175,21510750,20268172,19602945,20350175,20268172,20268172,20268172,20268172,20268172,18922137,18922137,16528192,19522830
36,13971519,15891384,16246197,14705034,12993872,15891384,14705034,14705034,14705034,14705034,14705034,15064922,15064922,13315529,13971519
49,9648465,11309193,8171784,6022606,4978829,11309193,6022606,6022606,6022606,6022606,6022606,7873248,7873248,9659048,9648465
81,5260177,7412595,6147895,4855837,3794762,7412595,4707026,5010868,5230058,5393938,4855837,4526667,5664485,5162515,5260177
100,5755211,10151942,8343472,7817976,6575736,10151942,7425971,7425971,7544670,6491912,7817976,4904877,4904877,3788782,5755211
121,1930283,2571026,1814716,1397122,1048301,2571026,1756258,1756258,1888155,2059628,1397122,2251872,2251872,2576459,1930283


## Group集計

In [160]:
df.groupby('S')[['Score', 'Number of wrong answers', 'Placement cost', 'Measurement cost', 'Measurement count']].agg(['count', 'mean', 'std', 'min', 'max'])

Unnamed: 0_level_0,Score,Score,Score,Score,Score,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Placement cost,Placement cost,Placement cost,Placement cost,Placement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement count,Measurement count,Measurement count,Measurement count,Measurement count
Unnamed: 0_level_1,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max
S,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
1,164,129170451,41184426,49178716,246935531,164,0,1,0,2,164,368952,217005,108814,1520600,164,355307,136962,130000,640200,164,337,121,130,582
4,82,72216709,25203419,5981184,112639505,82,0,0,0,2,82,957977,1911386,87568,16356800,82,732770,279457,262300,1478400,82,676,240,244,1260
9,164,27133250,12071271,932359,60191361,164,0,0,0,2,164,4063774,12626943,80104,106636500,164,1736110,918730,322500,4953600,164,1504,694,300,3840
16,123,15919545,9734451,207111,42423430,123,0,1,0,6,123,9473736,45857517,74574,482342400,123,3629112,2053990,391300,12130300,123,3113,1546,364,8372
25,123,13794248,6853884,146540,28558358,123,0,1,0,8,123,16254059,85230619,311352,681968750,123,4057003,2114031,339700,9322000,123,3626,2078,316,9322
36,328,10534699,7063857,1238,29986051,328,1,4,0,40,328,15720741,83989623,136264,925538400,328,5549917,3183149,351000,13843200,328,5087,3283,325,10000
49,41,6724631,2276391,61641,11959703,41,1,4,0,24,41,30146216,137435542,3461356,888370000,41,5694005,3494794,448800,10000000,41,5319,3667,408,10000
81,287,3362948,3266298,1,11533736,287,13,23,0,97,287,21928476,74853451,0,848665350,287,6488428,3595238,0,14729400,287,5812,3630,0,10000
100,82,4362982,4032057,1,10852902,82,33,46,0,98,82,16732305,38502614,0,217000000,82,3843232,4004472,0,13939200,82,2848,2844,0,9504
121,82,1154179,1256356,1,4678292,82,31,44,0,93,82,98596504,195958657,0,842589550,82,3365151,3606111,0,13832000,82,2515,2556,0,9373


In [161]:
df[df['path']=='log.txt'].groupby('S')[['Score', 'Number of wrong answers', 'Placement cost', 'Measurement cost', 'Measurement count']].agg(['count', 'mean', 'std', 'min', 'max'])

Unnamed: 0_level_0,Score,Score,Score,Score,Score,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Placement cost,Placement cost,Placement cost,Placement cost,Placement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement count,Measurement count,Measurement count,Measurement count,Measurement count
Unnamed: 0_level_1,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max
S,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
1,4,132942564,32162255.0,85145804,154918668,4,0,0.0,0,0,4,263144,141139.0,137000,440856,4,434675,146793.0,279500,633600,4,401,131.0,260,576
4,2,89301668,13018071.0,80096501,98506834,2,0,0.0,0,0,2,191526,143447.0,90094,292958,2,840300,308440.0,622200,1058400,2,736,265.0,549,924
9,4,39905098,11943640.0,26692021,55049710,4,0,0.0,0,0,4,310862,162073.0,104040,440038,4,2273375,677862.0,1612500,3206400,4,1866,528.0,1350,2592
16,3,29514290,10458707.0,18924448,39836734,3,0,0.0,0,0,3,919128,862521.0,241246,1889970,3,2698267,565555.0,2169000,3294200,3,2196,421.0,1800,2639
25,3,20986790,6854921.0,15033909,28481233,3,0,0.0,0,0,3,2410255,990004.0,1283084,3138830,3,2591533,713202.0,2128000,3412800,3,2086,521.0,1748,2686
36,8,16170801,7278773.0,7700381,27664492,8,0,0.0,0,0,8,5031236,3758781.0,629534,10473372,8,2373525,649483.0,1625600,3475200,8,1933,505.0,1344,2784
49,1,6022606,,6022606,6022606,1,0,,0,0,1,14776910,,14776910,14776910,1,1727200,,1727200,1727200,1,1428,,1428,1428
81,7,5369374,2439889.0,2366887,9106998,7,0,0.0,0,0,7,20359427,12333962.0,6509768,40010690,7,2531471,862877.0,1904000,4370800,7,2057,651.0,1564,3430
100,2,7914594,1743596.0,6681686,9147503,2,0,0.0,0,0,2,9210315,2382906.0,7525346,10895284,2,3638800,469802.0,3306600,3971000,2,2904,327.0,2673,3135
121,2,1491308,737630.0,969724,2012891,2,0,0.0,0,0,2,73404701,37947438.0,46571810,100237592,2,2896300,157968.0,2784600,3008000,2,2360,120.0,2275,2444


In [162]:
df.groupby(['S', 'input'])[['Score', 'Number of wrong answers', 'Placement cost', 'Measurement cost', 'Measurement count']].agg(['count', 'mean', 'std', 'min', 'max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Score,Score,Score,Score,Score,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Number of wrong answers,Placement cost,Placement cost,Placement cost,Placement cost,Placement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement cost,Measurement count,Measurement count,Measurement count,Measurement count,Measurement count
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max,count,mean,std,min,max
S,input,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2
1,10,41,122048800,31239192,65661499,154918668,41,1,1,0,2,41,318277,222622,108814,684696,41,356159,107237,190000,627000,41,340,95,190,570
1,13,41,97098560,25299111,49178716,144780658,41,0,0,0,2,41,539029,210771,352700,1520600,41,439332,167077,192000,633600,41,413,143,192,576
1,34,41,165480704,47137005,70400226,246935531,41,0,0,0,0,41,305711,150017,148964,1040950,41,249378,75534,130000,429000,41,238,66,130,390
1,88,41,132053740,24653327,82689048,159782696,41,0,0,0,0,41,312792,189190,138378,624046,41,376361,110129,194000,640200,41,359,96,194,582
4,3,41,84252317,19261345,5981184,104744508,41,0,0,0,0,41,885971,2483867,271208,16356800,41,593300,199686,262300,1311500,41,550,170,244,1098
4,55,41,60181100,24866665,17421603,112639505,41,0,0,0,2,41,1029983,1103587,87568,5085600,41,872239,279989,401000,1478400,41,803,234,401,1260
9,30,41,19887977,8452993,932359,37675086,41,0,0,0,2,41,5749161,16404223,362690,106636500,41,2212429,1167322,518400,4953600,41,1902,874,480,3840
9,41,41,32943797,14679592,3821411,60191361,41,0,0,0,0,41,2479653,4175448,80104,25745850,41,1493524,744094,322500,2820000,41,1299,554,300,2250
9,79,41,29144016,10679749,1634587,44950609,41,0,0,0,2,41,3397238,9350049,216424,60737850,41,1611146,754919,339700,2638600,41,1401,558,316,2133
9,82,41,26557210,9994658,933840,42530135,41,0,1,0,2,41,4629044,16416266,358740,106636500,41,1627339,795197,348300,3045600,41,1414,596,324,2430


In [163]:
df[df['path']=='log.txt'].groupby(['S', 'input'])[['Score', 'Number of wrong answers', 'Placement cost', 'Measurement cost', 'Measurement count']].agg(['mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Score,Number of wrong answers,Placement cost,Measurement cost,Measurement count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean
S,input,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,10,154918668,0,137000,408500,380
1,13,85145804,0,440856,633600,576
1,34,144757604,0,311310,279500,260
1,88,146948181,0,163412,417100,388
4,3,98506834,0,292958,622200,549
4,55,80096501,0,90094,1058400,924
9,30,26692021,0,440038,3206400,2592
9,41,55049710,0,104040,1612500,1350
9,79,42266606,0,259334,2006600,1659
9,82,35612054,0,440038,2268000,1863


## チューニング

In [44]:
df_params = pd.read_csv('ahc022_best_params.csv')

In [45]:
df_params['sample'] = df_params['sample'].apply(lambda x: f"{x:04d}")
df_params.rename(columns={'sample': "input"}, inplace=True)

In [46]:
df_params.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   input         100 non-null    object 
 1   L             100 non-null    int64  
 2   N             100 non-null    int64  
 3   S             100 non-null    int64  
 4   best_value    100 non-null    float64
 5   interval      100 non-null    int64  
 6   interval_num  100 non-null    int64  
dtypes: float64(1), int64(5), object(1)
memory usage: 5.6+ KB


In [47]:
df_params.sum()

input           0000000100020003000400050006000700080009001000...
L                                                            2910
N                                                            8005
S                                                           34501
best_value                                          1,111,152,372
interval                                                    19668
interval_num                                                10082
dtype: object

In [60]:
df_params.groupby(['S', 'input'])[['L', 'N', 'best_value', 'interval', 'interval_num']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,L,N,best_value,interval,interval_num
S,input,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,10,22,95,125232620,7,5
1,13,43,96,50518369,13,2
1,34,36,65,60577425,1,57
1,88,24,97,95412018,1,58
4,3,35,61,75017179,14,2
4,55,19,84,84648919,15,3
9,30,40,96,30653784,12,5
9,41,19,75,68234049,14,5
9,79,30,79,43100030,2,30
9,82,40,81,37645500,18,3


In [61]:
df[df['path']=="log.txt"].groupby(['S', 'input'])[['L', 'N', 'Score']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,L,N,Score
S,input,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,10,22,95,154918668
1,13,43,96,85145804
1,34,36,65,144757604
1,88,24,97,146948181
4,3,35,61,98506834
4,55,19,84,80096501
9,30,40,96,26692021
9,41,19,75,55049710
9,79,30,79,42266606
9,82,40,81,35612054


In [62]:
pd.concat([df_params.groupby(['S', 'input'])[['L', 'N', 'interval', 'interval_num', 'best_value']].sum(), df[df['path']=="log.txt"].groupby(['S', 'input'])['Score'].sum()], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,L,N,interval,interval_num,best_value,Score
S,input,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,10,22,95,7,5,125232620,154918668
1,13,43,96,13,2,50518369,85145804
1,34,36,65,1,57,60577425,144757604
1,88,24,97,1,58,95412018,146948181
4,3,35,61,14,2,75017179,98506834
4,55,19,84,15,3,84648919,80096501
9,30,40,96,12,5,30653784,26692021
9,41,19,75,14,5,68234049,55049710
9,79,30,79,2,30,43100030,42266606
9,82,40,81,18,3,37645500,35612054
