# WLTests Results
Analyses and visualises results generated by a wltest

In [None]:
from conf import LisaLogging
LisaLogging.setup()

In [None]:
import logging
from IPython.display import display

from wa_results_collector import WaResultsCollector
import pandas as pd

%pylab inline

## Results analysis and metrics collection

In [None]:
collector = WaResultsCollector(
    
    # WLTests results folder:
    base_dir='../../results/wltests/', # Base path of your results folders
    #wa_dirs='(substring_to_match)',   # Parse only folder matching this regexp
    
    # Results to collect:
    parse_traces=False,                # Enable trace parsing only to get more metrics
                                       # NOTE: results generation will take more times
    
    # Kernel tree used for the tests
    kernel_repo_path='/path/to/your/linux/sources/tree'
)

## Collected metrics

In [None]:
df = collector.results_df
logging.info("Metrics available for plots and analysis:")
for metric in df.metric.unique().tolist():
    logging.info("   %s", metric)

# Jankbench

## Total Frame Duration

In [None]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.report(workload='jankbench', metric='frame_total_duration',
                     test="^{}$".format(test), sort_on='99%', ascending=True)

## Energy

In [None]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.report(workload='jankbench', metric='device_total_energy',
                     test="^{}$".format(test), sort_on='mean', ascending=True)

## Frames Duration CDF

In [None]:
for test in collector.tests(workload='jankbench'):
    logging.info("Results for: %s", test)
    collector.plot_cdf(workload='jankbench', metric='frame_total_duration',
                       test="^{}$".format(test), threshold=16)

# Exoplayer

## Dropper Frames

In [None]:
for test in collector.tests(workload='exoplayer'):
    logging.info("Results for: %s", test)
    collector.report(workload='exoplayer', metric='exoplayer_dropped_frames',
                     test=test, sort_on='99%', ascending=True)

## Energy

In [None]:
for test in collector.tests(workload='exoplayer'):
    logging.info("Results for: %s", test)
    collector.report(workload='exoplayer', metric='device_total_energy',
                     test=test, sort_on='mean', ascending=True)

# Homescreen

In [None]:
for test in collector.tests(workload='homescreen'):
    logging.info("Results for: %s", test)
    collector.report(workload='homescreen', metric='device_total_energy',
                     test=test, sort_on='mean', ascending=True)

# Geekbench

## Overall scores

In [None]:
for metric in [ 'Single-Core_score', 'Multi-Core_score']:
    collector.report(workload='geekbench', metric=metric,
                     sort_on='99%', ascending=False)

## Detailed scores

In [None]:
# Get Geekbench scores
df = collector.results_df
gb_scores_db = df[df.test == 'geekbench']

# Group scores
grouped_df = gb_scores_db.groupby(['test', 'tag', 'kernel', 'metric'])

# Get stats for grouped scores
stats_df = pd.DataFrame(grouped_df.describe(percentiles=[.95, .99]))
stats_df = stats_df.reset_index().rename(columns={'level_4': 'stats'})

### Single Core Scores

In [None]:
single_score_df = stats_df[stats_df.metric.str.match('Single.*')][['metric', 'kernel', 'stats', 'value']]
single_score_df['metric'] = single_score_df.metric.apply(lambda s : s.replace('Single-Core_', '').replace('_score', ''))
single_score_df = single_score_df.set_index(['metric', 'kernel', 'stats']).unstack()
logging.info("Detailed SINGLE core scores:")
single_score_df

### Multi Core Scores

In [None]:
multi_score_df = stats_df[stats_df.metric.str.match('Multi.*')][['metric', 'kernel', 'stats', 'value']]
multi_score_df['metric'] = multi_score_df.metric.apply(lambda s : s.replace('Multi-Core_', '').replace('_score', ''))
multi_score_df = multi_score_df.set_index(['metric', 'kernel', 'stats']).unstack()
logging.info("Detailed SINGLE core scores:")
multi_score_df

# PCMark Scores

## Overall Scores

In [None]:
pm_df = df[df.workload == 'pcmark']
pm_scores = [m for m in pm_df.metric.unique().tolist() if m.startswith('pcmark_')]
for metric in pm_scores:
    collector.report(workload='pcmark', metric=metric,
                     sort_on='99%', ascending=False)

## Detailed Scores

In [None]:
# Get Geekbench scores
df = collector.results_df
pm_scores_db = df[df.workload == 'pcmark']

# Group scores
grouped_df = pm_scores_db.groupby(['test', 'tag', 'kernel', 'metric'])

# Get stats for grouped scores
stats_df = pd.DataFrame(grouped_df.describe(percentiles=[.95, .99]))
stats_df = stats_df.reset_index().rename(columns={'level_4': 'stats'})

In [None]:
pm_score_df = stats_df[stats_df.metric.str.match('pcmark_.*')][['metric', 'kernel', 'stats', 'value']]
pm_score_df['metric'] = pm_score_df.metric.apply(lambda s : s.replace('pcmark_', ''))
pm_score_df = pm_score_df.set_index(['metric', 'kernel', 'stats']).unstack()
logging.info("Detailed scores:")
pm_score_df

# Generic comparison plots
`plot_comparisons` can be used to automatically discover metrics that changed between different kernel versions or tags. 

In [None]:
logging.info("Here is the list of kernels available:")
logging.info("  %s", ', '.join(df['kernel'].unique().tolist() ))

In [None]:
# Select the baseline kernels for comparisions:
# by deafult we use the first available:
kernel_baseline = df['kernel'].iloc[0]
# Or defined here below one of the above reported kernels as baseline for comparisions
# kernel_baseline = "PutHereYourKernelName"

logging.info("Comparing against baseline kernel: %s", kernel_baseline)
collector.plot_comparisons(base_id=kernel_baseline, by='kernel')