# Best OS metrics

We want to select potential candidates for features in our models from OS metrics. There are many of them (1962), so we need to do it automatically.

We'll fit simple linear regression model on every metrics as a feature and comparing R-squared.

We'll start with Redis YCSB vs Redis YCSB experiment.

In [1]:
import pandas as pd

from helpers.load_data import (
    get_data_with_metrics,
    get_experiments_paths,
    trim_experiment,
)
from helpers.regression import fit_regression

experiments_path = '../../data/basic'

In [2]:
def get_rsquared_for_features(df: pd.DataFrame, target: str, features: list):
    rsquared_list = []
    aic_list = []

    for i, feature in enumerate(features):
        try:
            results = fit_regression(data=df, formula=f'{target} ~ {feature}')
            rsquared_list.append((results.rsquared, feature))
            aic_list.append((results.aic, feature))
        except Exception as e:
            print(f'\nError on feature {feature}!')
            print(str(e))

        if (i + 1) % 20 == 0:
            print(f'{i+1}/{len(features)}, ', end='', flush=True)

    return rsquared_list, aic_list


def print_rsquared_list(rsquared_list, limit):
    rsquared_list.sort(reverse=True)

    for rsquared, feature in rsquared_list[:limit]:
        print(f'{rsquared:.6f} {feature}')
        

def print_aic_list(aic_list, limit):
    aic_list.sort(reverse=False)

    for aic, feature in aic_list[:limit]:
        print(f'{aic:.4f} {feature}')
        
def print_joint(rsquared_list, aic_list, limit):
    rsquared_list.sort(reverse=True)
    aic_list.sort(reverse=False)

    for i, (rsquared, feat_1) in enumerate(rsquared_list[:limit], 1):
        for j, (aic, feat_2) in enumerate(aic_list, 1):
            if feat_1 == feat_2:
#                 print(f'{i:02} | {rsquared:.4f} | {j:02} | {aic:.2f} | {feat_1}')
                print(f'{i} | {rsquared:.4f} | {aic:.2f} | {feat_1}')
                break


## Redis YCSB vs Redis YCSB

In [3]:
instances_n=12
experiment_name='redis_ycsb_1_redis_ycsb_11'
perf_metric = 'app_latency'
path = get_experiments_paths(experiment_name, experiments_path)[0]
df = get_data_with_metrics(path, instances_n=instances_n, cpu_window=30)
df['time'] = pd.to_datetime(df['cbtool_time'], unit='s')
trim=8
df = trim_experiment(df, trim)

In [4]:
features = ['instances_n', 'cpu', 'memory'] + list(df.columns[9:-1])
rsquared_redis_ycsb, aic_redis_ycsb = get_rsquared_for_features(df, perf_metric, features)

20/1615, 40/1615, 60/1615, 80/1615, 100/1615, 120/1615, 140/1615, 160/1615, 180/1615, 200/1615, 220/1615, 240/1615, 260/1615, 280/1615, 300/1615, 320/1615, 340/1615, 360/1615, 380/1615, 400/1615, 420/1615, 440/1615, 460/1615, 480/1615, 500/1615, 520/1615, 540/1615, 560/1615, 580/1615, 600/1615, 620/1615, 640/1615, 660/1615, 680/1615, 700/1615, 720/1615, 740/1615, 760/1615, 780/1615, 800/1615, 820/1615, 840/1615, 860/1615, 880/1615, 900/1615, 920/1615, 940/1615, 960/1615, 980/1615, 1000/1615, 1020/1615, 1040/1615, 1060/1615, 1080/1615, 1100/1615, 1120/1615, 1140/1615, 1160/1615, 1180/1615, 1200/1615, 1220/1615, 1240/1615, 1260/1615, 1280/1615, 1300/1615, 1320/1615, 1340/1615, 1360/1615, 1380/1615, 1400/1615, 1420/1615, 1440/1615, 1460/1615, 1480/1615, 1500/1615, 1520/1615, 1540/1615, 1560/1615, 1580/1615, 1600/1615, 

In [5]:
print_joint(rsquared_redis_ycsb, aic_redis_ycsb, limit=30)

1 | 0.9875 | -1632.21 | node_memory_Committed_AS_bytes
2 | 0.9870 | -1625.90 | instances_n
3 | 0.9864 | -1619.87 | node_memory_MemFree_bytes
4 | 0.9863 | -1618.54 | node_memory_SUnreclaim_bytes
5 | 0.9852 | -1607.11 | node_filesystem_free_bytes_device__dev_mapper_ubuntu__vg_ubuntu__lvfstype_ext4mountpoint___
6 | 0.9852 | -1607.11 | node_filesystem_avail_bytes_device__dev_mapper_ubuntu__vg_ubuntu__lvfstype_ext4mountpoint___
7 | 0.9850 | -1604.83 | node_cpu_seconds_total_cpu_6mode_iowait_
8 | 0.9846 | -1601.09 | node_memory_Cached_bytes
9 | 0.9844 | -1599.24 | node_cpu_seconds_total_cpu_11mode_iowait_
10 | 0.9841 | -1596.79 | node_cpu_seconds_total_cpu_14mode_iowait_
11 | 0.9841 | -1596.76 | node_memory_Buffers_bytes
12 | 0.9841 | -1596.48 | node_cpu_seconds_total_cpu_8mode_iowait_
13 | 0.9839 | -1594.70 | node_cpu_seconds_total_cpu_10mode_iowait_
14 | 0.9838 | -1593.60 | node_cpu_seconds_total_cpu_9mode_iowait_
15 | 0.9837 | -1592.72 | node_cpu_seconds_total_cpu_19mode_iowait_
16 | 0.98

## Linpack vs Linpack

In [6]:
instances_n=12
experiment_name='linpack_1_linpack_11'
perf_metric = 'app_throughput_inv'
path = get_experiments_paths(experiment_name, experiments_path)[0]
df = get_data_with_metrics(path, instances_n=instances_n, cpu_window=30)
df['time'] = pd.to_datetime(df['cbtool_time'], unit='s')
trim=5
df = trim_experiment(df, trim)

In [7]:
features = ['instances_n', 'cpu', 'memory'] + list(df.columns[9:-1])
rsquared_linpack, aic_linpack = get_rsquared_for_features(df, perf_metric, features)

20/1953, 40/1953, 60/1953, 80/1953, 100/1953, 120/1953, 140/1953, 160/1953, 180/1953, 200/1953, 220/1953, 240/1953, 260/1953, 280/1953, 300/1953, 320/1953, 340/1953, 360/1953, 380/1953, 400/1953, 420/1953, 440/1953, 460/1953, 480/1953, 500/1953, 520/1953, 540/1953, 560/1953, 580/1953, 600/1953, 620/1953, 640/1953, 660/1953, 680/1953, 700/1953, 720/1953, 740/1953, 760/1953, 780/1953, 800/1953, 820/1953, 840/1953, 860/1953, 880/1953, 900/1953, 920/1953, 940/1953, 960/1953, 980/1953, 1000/1953, 1020/1953, 1040/1953, 1060/1953, 1080/1953, 1100/1953, 1120/1953, 1140/1953, 1160/1953, 1180/1953, 1200/1953, 1220/1953, 1240/1953, 1260/1953, 1280/1953, 1300/1953, 1320/1953, 1340/1953, 1360/1953, 1380/1953, 1400/1953, 1420/1953, 1440/1953, 1460/1953, 1480/1953, 1500/1953, 1520/1953, 1540/1953, 1560/1953, 1580/1953, 1600/1953, 1620/1953, 1640/1953, 1660/1953, 1680/1953, 1700/1953, 1720/1953, 1740/1953, 1760/1953, 1780/1953, 1800/1953, 1820/1953, 1840/1953, 1860/1953, 1880/1953, 1900/1953, 1920/195

In [8]:
print_joint(rsquared_linpack, aic_linpack, limit=30)

1 | 0.8834 | -1088.90 | node_pressure_io_waiting_seconds_total
2 | 0.8833 | -1088.83 | node_pressure_io_stalled_seconds_total
3 | 0.8718 | -1082.18 | node_cpu_seconds_total_cpu_18mode_iowait_
4 | 0.8645 | -1078.23 | node_cpu_seconds_total_cpu_17mode_iowait_
5 | 0.8550 | -1073.44 | node_netstat_Icmp6_OutMsgs
6 | 0.8514 | -1071.66 | node_cpu_seconds_total_cpu_15mode_iowait_
7 | 0.8501 | -1071.08 | node_memory_Active_anon_bytes
8 | 0.8501 | -1071.05 | node_memory_Active_bytes
9 | 0.8500 | -1071.03 | node_memory_AnonPages_bytes
10 | 0.8499 | -1070.97 | node_memory_MemAvailable_bytes
11 | 0.8498 | -1070.91 | node_memory_MemFree_bytes
12 | 0.8490 | -1070.53 | cpu
13 | 0.8486 | -1070.37 | node_memory_Committed_AS_bytes
14 | 0.8482 | -1070.15 | instances_n
15 | 0.8470 | -1069.62 | node_memory_PageTables_bytes
16 | 0.8467 | -1069.48 | node_memory_Inactive_anon_bytes
17 | 0.8464 | -1069.32 | node_cpu_seconds_total_cpu_23mode_iowait_
18 | 0.8455 | -1068.89 | node_cpu_seconds_total_cpu_7mode_iowai

## Sysbench vs Sysbench

In [9]:
instances_n=12
experiment_name='sysbench_1_sysbench_11'
perf_metric = 'app_latency'
path = get_experiments_paths(experiment_name, experiments_path)[0]
df = get_data_with_metrics(path, instances_n=instances_n, cpu_window=30)
df['time'] = pd.to_datetime(df['cbtool_time'], unit='s')

In [10]:
features = ['instances_n', 'cpu', 'memory'] + list(df.columns[9:-1])
rsquared_sysbench, aic_sysbench = get_rsquared_for_features(df, perf_metric, features)

20/1954, 40/1954, 60/1954, 80/1954, 100/1954, 120/1954, 140/1954, 160/1954, 180/1954, 200/1954, 220/1954, 240/1954, 260/1954, 280/1954, 300/1954, 320/1954, 340/1954, 360/1954, 380/1954, 400/1954, 420/1954, 440/1954, 460/1954, 480/1954, 500/1954, 520/1954, 540/1954, 560/1954, 580/1954, 600/1954, 620/1954, 640/1954, 660/1954, 680/1954, 700/1954, 720/1954, 740/1954, 760/1954, 780/1954, 800/1954, 820/1954, 840/1954, 860/1954, 880/1954, 900/1954, 920/1954, 940/1954, 960/1954, 980/1954, 1000/1954, 1020/1954, 1040/1954, 1060/1954, 1080/1954, 1100/1954, 1120/1954, 1140/1954, 1160/1954, 1180/1954, 1200/1954, 1220/1954, 1240/1954, 1260/1954, 1280/1954, 1300/1954, 1320/1954, 1340/1954, 1360/1954, 1380/1954, 1400/1954, 1420/1954, 1440/1954, 1460/1954, 1480/1954, 1500/1954, 1520/1954, 1540/1954, 1560/1954, 1580/1954, 1600/1954, 1620/1954, 1640/1954, 1660/1954, 1680/1954, 1700/1954, 1720/1954, 1740/1954, 1760/1954, 1780/1954, 1800/1954, 1820/1954, 1840/1954, 1860/1954, 1880/1954, 1900/1954, 1920/195

In [15]:
print_joint(rsquared_sysbench, aic_sysbench, limit=500)

1 | 0.9437 | 2283.30 | node_netstat_Tcp_CurrEstab
2 | 0.9350 | 2332.39 | node_sockstat_sockets_used
3 | 0.9331 | 2341.94 | node_filefd_allocated
4 | 0.9318 | 2348.91 | node_sockstat_TCP_inuse
5 | 0.9277 | 2368.57 | node_memory_Inactive_anon_bytes
6 | 0.9222 | 2393.53 | node_filesystem_files_free_device_tmpfsfstype_tmpfsmountpoint__run_snapd_ns_
7 | 0.9222 | 2393.55 | node_filesystem_files_free_device_tmpfsfstype_tmpfsmountpoint__run_
8 | 0.9211 | 2398.37 | node_sockstat_UDP6_inuse
9 | 0.9197 | 2404.34 | node_load1
10 | 0.9185 | 2409.27 | node_memory_Shmem_bytes
11 | 0.9181 | 2411.20 | node_filesystem_free_bytes_device_tmpfsfstype_tmpfsmountpoint__run_snapd_ns_
12 | 0.9181 | 2411.20 | node_filesystem_free_bytes_device_tmpfsfstype_tmpfsmountpoint__run_
13 | 0.9181 | 2411.20 | node_filesystem_avail_bytes_device_tmpfsfstype_tmpfsmountpoint__run_snapd_ns_
14 | 0.9181 | 2411.20 | node_filesystem_avail_bytes_device_tmpfsfstype_tmpfsmountpoint__run_
15 | 0.9176 | 2413.26 | node_sockstat_TCP_al