# Extraction of DGEMM raw data and computation of regression coefficients

First, we download the HDF5 file (warning: this is a large file, several
gigabytes).

In [1]:
!test -f /tmp/data.db || wget https://gitlab.in2p3.fr/cornebize/g5k_data_non_regression/-/raw/master/data.db? -O /tmp/data.db
!du -sh /tmp/data.db

12G	/tmp/data.db


According to the [changelog](https://gitlab.in2p3.fr/cornebize/g5k_data_non_regression/-/blob/master/exp_changelog.org):
- the cooling issue started on `2019-09-01`
- we changed the protocol on `2019-10-18`
- the cooling issue was fixed on `2019-11-27`
- there was a BIOS upgrade on `2020-04-01`

So, we will take the measures done between these last two dates.

In [2]:
import pandas
import datetime

import cashew
print(cashew.__version__)
print(cashew.__git_version__)
from cashew import linear_regression as lr

0.0.0
f6e1abe82ebba1eec668652189985c631c10b5b5


In [3]:
def to_epoch(date_s):
    return int(datetime.datetime.strptime(date_s, '%Y-%m-%d').timestamp())

conditions = [f'start_time > {to_epoch("2019-11-27")}',
              f'start_time < {to_epoch("2020-02-01")}',
              'cluster == dahu']
print(conditions)
df = pandas.read_hdf('/tmp/data.db', where=conditions)
print(len(df))
df.head()

['start_time > 1574809200', 'start_time < 1580511600', 'cluster == dahu']
12696320


Unnamed: 0,function,m,n,k,timestamp,duration,core,node,cluster,jobid,cpu,start_time,index,expfile_hash
0,dgemm,1517,460,3339,222.583498,0.187202,0,1,dahu,1895763,0,1574848720,0,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...
1,dgemm,587,755,1492,222.770729,0.054655,0,1,dahu,1895763,0,1574848720,1,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...
2,dgemm,1834,1269,1435,222.825393,0.251805,0,1,dahu,1895763,0,1574848720,2,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...
3,dgemm,725,133,10427,223.077209,0.092584,0,1,dahu,1895763,0,1574848720,3,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...
4,dgemm,3517,3560,372,223.169803,0.339271,0,1,dahu,1895763,0,1574848720,4,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...


In [4]:
def compute_lin_reg(df):
    df = df.copy()
    lr.compute_variable_products(df, 'mnk')
    reg = lr.compute_full_reg(df, 'duration', ['mnk'])
    total_flop = (2 * df['mnk']).sum()
    total_time = df['duration'].sum()
    reg['avg_gflops'] = total_flop / total_time * 1e-9
    reg['function'] = lr.get_unique(df, 'function')
    return reg

reg = pandas.DataFrame(lr.regression(df, compute_lin_reg))
print(len(reg))
reg.head()

1456


Unnamed: 0,intercept,mnk,tvalue_mnk,intercept_residual,mnk_residual,tvalue_mnk_residual,avg_gflops,function,cluster,node,expfile_hash,cpu,jobid,start_time
0,3e-06,7.905501e-11,183.379783,3.714144e-07,2.797882e-12,9.519797,25.210995,dgemm,dahu,1,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...,0,1895763,1574848720
1,2e-06,7.494961e-11,176.674498,4.099831e-07,2.891201e-12,10.136128,26.590512,dgemm,dahu,1,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...,1,1895763,1574848720
2,3e-06,7.755325e-11,175.939304,3.993891e-07,2.751326e-12,9.183373,25.694303,dgemm,dahu,2,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...,0,1895763,1574848720
3,2e-06,7.4035e-11,170.876087,3.37138e-07,2.762763e-12,9.066612,26.912353,dgemm,dahu,2,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...,1,1895763,1574848720
4,2e-06,7.68782e-11,178.076505,2.655408e-07,2.828755e-12,9.438278,25.928341,dgemm,dahu,3,b71f533e61cf2879a4fe26294df73d40c2229b52b41fda...,0,1895763,1574848720


In [5]:
reg.to_csv('/tmp/dgemm_calibration.csv', index=False)