In [None]:
%matplotlib inline

# Python 2.x / 3.x compatibility
from __future__ import division, print_function

#Import modules
import pandas as pd
import numpy as np
import os
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import re

#import csv
import glob as gb

#import pathlib

import datetime
import sqlite3

from df2gspread import df2gspread as d2g

mpl.rcParams['figure.figsize'] = (16, 9)
pd.options.display.max_rows = 200

In [None]:
import regression_analysis
# from imp import reload
# reload(regression_analysis)

# Parse compatibility Matrix

In [None]:
compat_matrix = regression_analysis.parse_compatibility_matrix()

In [None]:
# Force a new version (don't want to be prompted each time I parse df_files)
new_version = compat_matrix.iloc[0].copy()
new_version['OpenStudio'] = "2.4.2"
compat_matrix = compat_matrix.append(new_version).sort_values('OpenStudio', ascending=False).reset_index(drop=True)

In [None]:
compat_matrix.head()

In [None]:
compat_matrix['Has_Docker'].value_counts()

In [None]:
compat_matrix[compat_matrix['Has_Docker']].tail()

In [None]:
# Count Number of OpenStudio versions within each E+ version
compat_matrix.groupby('E+')['OpenStudio'].count()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
compat_matrix.groupby('E+')['OpenStudio'].count().plot(kind='barh', ax=ax)
ax.set_xlim(0, compat_matrix.groupby('E+')['OpenStudio'].count().max())
ax.set_title('Number of OpenStudio version for each E+ version')
ax.set_xlabel('Number of OpenStudio Versions')
plt.show()

In [None]:
# Export to CSV
# compat_matrix.to_csv('compat_matrix.csv')

# Fix permissions and skin down the fuelcell OSW

In [None]:
# Skinning it down is done in the model_tests.rb now
help(regression_analysis.cleanup_bloated_osws)

The permissions stuff is done in the launch docker shell scripts

If you want to do it manually

Need to do:
    
    sudo chown -R $USER * 
    sudo find . -type f -exec chmod 664 {} \;

# Parse out.osw files

In [None]:
from imp import reload
reload(regression_analysis)

## Without custom tags

In [None]:
df_files = regression_analysis.find_info_osws(compat_matrix=compat_matrix)

## With custom Tags

In [None]:
# df_files = regression_analysis.find_info_osws_with_tags(compat_matrix=compat_matrix)

## Output the test status: Fail/Success/Blank

In [None]:
def background_colors(val):
    s = 'background-color: {}'
    if val == 'Fail':
        return s.format('#F4C7C3')
    elif val == '':
        return s.format('#f2e2c1')
    return ''

def hover(hover_color="#ffff99"):
    return dict(selector="tr:hover",
                props=[("background-color", "%s" % hover_color)])

styles = [
    hover(),
    dict(selector="td", props=[#("font-size", "150%"),
                               ("text-align", "center")]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]

In [None]:
# Prepare the dataframe
success = regression_analysis.success_sheet(df_files)

In [None]:
success[success[('8.8.0', '2.4.2')] == 'Fail']

In [None]:
(success.style.applymap(background_colors).set_table_styles(styles)
          .set_caption("Test Success"))

### Filter for a few tests only

#### Only those were some are missing or failed

In [None]:
filt = success[(success == '').any(axis=1) |
               (success == 'Fail').any(axis=1)].index.get_level_values(0).unique().tolist()

(success.loc[filt].style
          .applymap(background_colors)
          .set_table_styles(styles)
          .set_caption("Test Success"))

#### Other examples

In [None]:
# Filter on a single containing string
filt = success.index.get_level_values(0).str.contains('exterior_equi')

# Filter on a pattern
#filt = success.index.get_level_values(0).str.match(r'(exterior_equipment)|(meters)|(plant_op_schemes)|(avms_temp)')

(success.loc[filt].style.applymap(background_colors).set_table_styles(styles)
          .set_caption("Test Success"))

In [None]:
success_filt = success.loc[success[(success == '').sum(axis=1) >= 1].index.get_level_values(0).tolist()]
filt = success_filt[success_filt[('8.8.0', '2.4.1')] == 'Fail'].index.get_level_values(0).tolist()

(success.loc[filt].style.applymap(background_colors).set_table_styles(styles)
        .set_caption("Test Success"))

### Export to Google

In [None]:
spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'Test_Status'
d2g.upload(success.T.reset_index().T.reset_index(),
           gfile=spreadsheet, wks_name=wks_name,
           row_names=False, col_names=False)

## Output Missing tests: ruby versus osm

In [None]:
test_impl = regression_analysis.test_implemented_sheet(df_files=df_files, success=success,
                                   only_for_mising_osm=False)

In [None]:
test_impl[~test_impl['osm']]

In [None]:
spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'Tests_Implemented'
d2g.upload(test_impl,
           gfile=spreadsheet, wks_name=wks_name,
           row_names=True, col_names=True)

## Ouput the total_site_energy (kBTU)

In [None]:
site_kbtu = df_files.applymap(regression_analysis.parse_total_site_energy)

In [None]:
spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'SiteKBTU'
d2g.upload(site_kbtu.T.reset_index().T.reset_index().fillna(''),
           gfile=spreadsheet, wks_name=wks_name,
           # Skip first row
           start_cell='A1',
           row_names=False, col_names=False)

## Output the rolling percent difference of total kBTU from one version to the next

In [None]:
site_kbtu_change = site_kbtu.pct_change(axis=1)

In [None]:
site_kbtu.loc[site_kbtu_change.index.get_level_values(0).str.contains('flat_plate')]

In [None]:
#site_kbtu_change.loc['pv_and_storage_facilityexcess']
site_kbtu_change.loc[site_kbtu_change.index.get_level_values(0).str.contains('flat_plate')]

### Heatmap > 1% change

In [None]:
regression_analysis.heatmap_sitekbtu_pct_change(site_kbtu=site_kbtu,
                            row_threshold=0.01, display_threshold=0.001, 
                            savefig=False, show_plot=True)

### Heatmap > 0.5% change

In [None]:
regression_analysis.heatmap_sitekbtu_pct_change(site_kbtu=site_kbtu,
                            row_threshold=0.005, display_threshold=0.001, 
                            savefig=False, show_plot=True)

In [None]:
spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'SiteKBTU_Percent_Change'
d2g.upload(site_kbtu.pct_change(axis=1).T.reset_index().T.reset_index().fillna(''),
           gfile=spreadsheet, wks_name=wks_name,
           row_names=False, col_names=False)

<p style="font-size: 40px; color:red;">ANYTHING PAST THIS POINT NEEDS CLEANING</p>

## Difference in end use

In [None]:
over_5pct = (site_kbtu.pct_change(axis=1).abs() > 0.005).sum(axis=0).to_frame()
over_5pct.columns = ['Count (ABS(pct_diff) > 0.5%)']

In [None]:
site_kbtu.pct_change(axis=1).abs().describe()

In [None]:
over_5pct

In [None]:
over_5pct = (site_kbtu.pct_change(axis=1).abs() > 0.005).sum(axis=1).to_frame()
over_5pct.columns = ['Count (ABS(pct_diff) > 0.005)']

In [None]:
over_5pct.replace(0, np.nan).dropna().sort_values('Count (ABS(pct_diff) > 0.005)', ascending=False)

In [None]:
version_1 = '2.1.0'
version_2 = '2.2.1'


all_diffs = {}
failed = {}
for index, row in  df_files.T.reset_index(level=0, drop=True).T.iterrows():
    diff_ok = True
    try:
        cleaned_end_use_2 = regression_analysis.parse_end_use(row[version_2])
        ok2 = True
    except:
        cleaned_end_use_2 = 'Failed'
        diff_ok = False
        ok2 = False
    try:
        cleaned_end_use_1 = regression_analysis.parse_end_use(row[version_1])
        ok1 = True
    except:
        cleaned_end_use_1 = 'Failed'
        diff_ok = False
        ok1 = False
    if diff_ok:
        pct_diff = (cleaned_end_use_2 - cleaned_end_use_1) / cleaned_end_use_1
        
        all_diffs[index] = {version_1: cleaned_end_use_1,
                            version_2: cleaned_end_use_2,
                            'diff': pct_diff}
    else:
        failed[index] = {version_1: ok1,
                         version_2: ok2}
        
df_failed = pd.DataFrame(failed).T

In [None]:
# See the ones that changed: False means it fails, True means it worked
df_failed[df_failed[version_1] != df_failed[version_2]]

In [None]:
max_diffs = {}
for test, d in all_diffs.items():
    #dmax = 
    max_diffs[test] = {'Max': d['diff'].max().max(),
                       'Min': d['diff'].min().min(),
                       'Total Diff': (d[version_2][('Total', 'kBtu')].sum()
                                      - d[version_1][('Total', 'kBtu')].sum()) / d[version_1][('Total', 'kBtu')].sum()}
    
    
df_diffs = pd.DataFrame(max_diffs).T

In [None]:
df_diffs[~(df_diffs == 0).all(axis=1)].style.format("{:.2%}")

In [None]:
from matplotlib.ticker import FuncFormatter

In [None]:
test = ('coolingtowers', 'rb')

fig, ax = plt.subplots(figsize=(16,9))

fmt = lambda x,pos: '{:.0%}'.format(x)

sns.heatmap(all_diffs[test]['diff'].dropna(how='all', axis=0).dropna(how='all', axis=1).abs(),
            ax=ax, cmap='YlOrRd',
            vmin=0, vmax=1,
            cbar_kws={'format': mpl.ticker.FuncFormatter(fmt)},
            annot=all_diffs[test]['diff'].dropna(how='all', axis=0).dropna(how='all', axis=1), fmt='.1%')
ax.set_title("Percent difference in End Use By Fuel for test '{}' between {} and {}".format(test, version_2, version_1))
plt.show()

# Find missing tests: Map tests to Cpp classes

## Grep in ruby and osm tests

In [None]:
os.chdir('/home/julien/Software/Others/OpenStudio-resources/model/simulationtests/')

# Grep in ruby test for Model:: statements
grep = !/bin/grep "Model::" *.rb
objs = pd.DataFrame([x.split(':', maxsplit=1 ) for x in grep], columns=['file', 'grepped_line'])

# Grep in ruby test for Model:: statements
grep_lib = !/bin/grep "Model::" ./lib/*.rb
objs_lib = pd.DataFrame(grep_lib, columns=['grepped_line'])
objs_lib['file'] = 'lib/baseline_model.rb'

# Find all Model namespace Classes by getting name from the cpp files
os_classes = !ls /home/julien/Software/Others/OpenStudio/openstudiocore/src/model/*.cpp
os_classes = [os.path.split(os.path.splitext(p)[0])[1] for p in os_classes]

os.chdir('/home/julien/Software/Others/OpenStudio-resources')

In [None]:
model_object_pat = re.compile(r'OpenStudio::Model::(.*?)\.new')
def parse_model_object(s):
    m = model_object_pat.search(s)
    if m:
        return m.groups()[0]
    else:
        print('Cannot match {}'.format(s))
        return None
    
objs['ModelObject'] = objs['grepped_line'].apply(parse_model_object)
objs_lib['ModelObject'] = objs_lib['grepped_line'].apply(parse_model_object)

# Concat both
objs = pd.concat([objs, objs_lib])

In [None]:
set(objs['ModelObject']) - set(os_classes) 

In [None]:
# set(os_classes) - set(objs['ModelObject'])

In [None]:
df_os_classes = pd.DataFrame(index=os_classes)
df_os_classes['In Ruby Test'] = False
df_os_classes = df_os_classes.join(objs.groupby('ModelObject')['file'].apply(list))
df_os_classes.loc[df_os_classes['file'].notnull(),
                  'file'] = df_os_classes.loc[df_os_classes['file'].notnull(),
                                              'file'].apply(np.unique)
df_os_classes.loc[df_os_classes['file'].notnull(), 'In Ruby Test'] = True
df_os_classes = df_os_classes.rename(columns={'file': 'files'})

In [None]:
df_os_classes['In Ruby Test'].value_counts()

In [None]:
#df_os_classes.to_csv('Mapping_ruby_test_to_cpp_classes.csv')

## Get comments dict from the google sheet

In [None]:
from df2gspread import gspread2df as g2d

spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'Mapping_ruby_test_to_cpp_classes'

df = g2d.download(spreadsheet, wks_name, col_names = True, row_names = True)
#comments_dict = df['IsNormal'].to_dict()
comments_dict = df.loc[df['IsNormal'] != '', 'IsNormal'].to_dict()
comments_dict

In [None]:
s = pd.Series(comments_dict)
s = s[s.str.lower().str.contains('added')].str.split(':', expand=True)[1].str.strip().sort_values()

In [None]:
n_tot_obj = 0
n_tot_tests = 0
for index, val in s.reset_index().groupby(1)['index'].apply(list).items():
    if index == 'pv_and_storage_facilityexcess.rb':
        test = 'pv_and_storage_facilityexcess.rb and pv_and_storage_demandleveling.rb'
        n_tot_tests += 1
    else:
        test = index
    n_tot_tests += 1
    n_tot_obj += len(val)
    print("**{}** ({})".format(test, len(val)))
    print()
    for x in val:
        print("* {}".format(x))
    print("\n")
print("\n**Total Added: {} objects in {} tests**".format(n_tot_obj, n_tot_tests))

In [None]:
comments_dict

In [None]:
#comments.set_index('Test')['IsNormal'].to_dict()

In [None]:
# Merge comments
comments = pd.Series(comments_dict, name='IsNormal')
df_os_classes = df_os_classes.join(comments)
df_os_classes = df_os_classes[['In Ruby Test', 'IsNormal', 'files']]

## Find objects in the osm tests

In [None]:
# Compile a regex
os_class_pattern = re.compile(r'OS:(.*?),')

# Initialize a column of empty lists
df_os_classes['osms'] = np.empty((len(df_os_classes), 0)).tolist()

# Loop on all osms, and find OS objects
for osm_path in gb.glob('*.osm'):
    with open(osm_path) as f:
        lines = f.readlines()
    for line in lines:
        m = os_class_pattern.match(line)
        if m:
            classname = m.groups()[0].replace(':','')
            if classname in df.index:
                df_os_classes.loc[classname, 'osms'].append(osm_path)

In [None]:
df_os_classes.loc[df_os_classes['osms'].apply(len) == 0, 'osms'] = None

In [None]:
filt1 = ~df_os_classes['In Ruby Test']
filt2 = df_os_classes['IsNormal'].isnull()
filt3 = df_os_classes['osms'].isnull()
df_os_classes[filt1 & filt2 & filt3] # .apply(lambda x: print(x.name), axis=1)

In [None]:
df_os_classes.fillna('')

## Upload to Google

In [None]:
spreadsheet = '/EffiBEM&NREL-Regression-Test_Status'
wks_name = 'Mapping_ruby_test_to_cpp_classes'
d2g.upload(df_os_classes.fillna(''),
           gfile=spreadsheet, wks_name=wks_name,
           row_names=True, col_names=True)

# Test convergence

In [None]:
OSCLI = '/home/julien/Software/Others/OS-build/Products/openstudio'
OSCLI= '/usr/bin/openstudio-2.4.1'
RUN_N_TIMES = 10

## Running the same in.OSW

In [None]:
os.chdir('/home/julien/Software/Others/OpenStudio-resources/testruns/evaporative_cooling.osm/')

In [None]:
!ls

In [None]:
import subprocess
    
r = {}
o = {}
e = {}
for i in range(0, RUN_N_TIMES):
    process = subprocess.Popen([OSCLI, 'run', '-w', 'in.osw'], shell=False,
                           stdout=subprocess.PIPE, 
                           stderr=subprocess.PIPE)

    # wait for the process to terminate
    out, err = process.communicate()
    o[i] = out
    e[i] = err
    errcode = process.returncode
    r[i] = regression_analysis.parse_total_site_energy('out.osw')
    print("{} - {:,.0f}".format(i, r[i]))
    
# Say to user
!echo "THIS IS DONE" | espeak

In [None]:
result = pd.Series(r)
result

In [None]:
result.describe()

In [None]:
os.chdir('/home/julien/Software/Others/OpenStudio-resources')

## Running the same test (measure)

In [None]:
os.chdir('/home/julien/Software/Others/OpenStudio-resources')

In [None]:
!ls

In [None]:
diffs = over_5pct.replace(0, np.nan).dropna()['Count (ABS(pct_diff) > 0.005)'].sort_values(ascending=False)

In [None]:
s = "openstudio model_tests.rb -n '/"
tests = []
for i, (test, ext) in enumerate(diffs[diffs > 1].index.tolist()):
    print(i)
    test_name = "test_{}_{}".format(test, ext)
    #s += " --name test_{}_{}".format(test, ext)
    if i < len(diffs[diffs > 1])-1:
        s+='({})|'.format(test_name)
    else:
        s+='({})'.format(test_name)
    tests.append(test_name)
#print("$os_build/Products/openstudio model_tests.rb {}".format(s))
#print("ruby model_tests.rb {}".format(s))
s += "/'"

In [None]:
s

In [None]:
len(diffs[diffs > 1])

In [None]:
out_dict = {}

import subprocess

from shutil import copyfile

#test_exts = diffs[diffs > 1].index.tolist()
test_exts =  [
    #('surface_properties', 'rb'),
    #('fan_on_off', 'rb'),
    #('exterior_equipment', 'rb'),
    ('generator_microturbine', 'rb'),
]
for (test, ext) in test_exts:
    print(test)
 
    #base_path = '/home/julien/Software/Others/OpenStudio-resources/testruns/availability_managers.rb/'
    base_path = '/home/julien/Software/Others/OpenStudio-resources/testruns/{}.{}/'.format(test, ext)


    r = {}
    o = {}
    e = {}
    for i in range(0, RUN_N_TIMES):
        process = subprocess.Popen([OSCLI, 'model_tests.rb', '-n',
                                    '/{}_{}/'.format(test, ext)], 
                                   shell=False,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)

        # wait for the process to terminate
        out, err = process.communicate()
        o[i] = out
        e[i] = err
        errcode = process.returncode
        
        if errcode != 0:
            print("Problem with {}.{}, run {}".format(test, ext, i))
        else:
            r[i] = regression_analysis.parse_total_site_energy(os.path.join(base_path, 'out.osw'))
            print("{} - run {} - {:,.0f}".format(test, i, r[i]))


            # cp the osm somewhere else
            #src_path = os.path.join(base_path, 'in.osm')
            #dst_path = os.path.join(base_path, '../{t}.{e}_{i}.osm'.format(t=test, e=ext, i=i))
            #copyfile(src_path, dst_path)  
            
            #src_path = os.path.join(base_path, 'run/in.idf')
            #dst_path = os.path.join(base_path, '../{t}.{e}_{i}.idf'.format(t=test, e=ext, i=i))
            #copyfile(src_path, dst_path)
    
    out_dict["{}.{}".format(test, ext)] = {'r':r, 'o':o, 'e':e}

!echo "THIS IS DONE" | espeak

In [None]:
r

In [None]:
print(out)

In [None]:
print(err)

In [None]:
all_dfs = []
for k, v in out_dict.items():
    df = pd.DataFrame(v)
    df['test'] = k
    df['run'] = df.index
    df.set_index(['test', 'run'], inplace=True)
    all_dfs.append(df)

In [None]:
df_all = pd.concat(all_dfs)

In [None]:
df_all['r'].unstack(0).pct_change().max().sort_values(ascending=False)*100

In [None]:
df_all['r'].unstack(0)['surface_properties.rb']

In [None]:
df_all['r'].unstack(0)['fan_on_off.rb']

In [None]:
surface_properties.rb, fan_on_off.rb

In [None]:
 set([".".join([x[0], x[1]]) for x in diffs[diffs > 1].index]) - set(df_all['r'].unstack(0).pct_change().columns)

In [None]:
result = pd.Series(r)
result

In [None]:
100*(result - result.iloc[0])/result.iloc[0]

In [None]:
process = subprocess.Popen([OSCLI, 'model_tests.rb', '-n', '/availability/'], shell=True,
                            stdout=subprocess.PIPE, 
                            stderr=subprocess.PIPE)
out, err = process.communicate()

In [None]:
out

In [None]:
r

In [None]:
r

In [None]:
o

In [None]:
e

# Compare With Custom Tags

In [None]:
keep_only_runs = ['Ubuntu_run1', 'Ubuntu_run2', 'Windows_run1', 'Windows_run2']

In [None]:
df_files = regression_analysis.find_info_osws_with_tags(compat_matrix=compat_matrix)
subset_files = df_files[[x for x in df_files.columns
                         if x[1] == '2.4.1' 
                         and x[2] in keep_only_runs
                        ]]
subset_success = regression_analysis.success_sheet(subset_files)

In [None]:
subset_success[subset_success['n_fail+missing']>0]

## First filter only tests that have some variations in site kBTU

I check for tests where the min accross runs isn't equal to the max

In [None]:
site_kbtu = df_files.applymap(regression_analysis.parse_total_site_energy)

In [None]:
# Restrict to our version of interest, drop rows with all nan
site_kbtu_241 = site_kbtu['8.8.0']['2.4.1'].dropna(how='all')

# Keep only the custom tagged ones
site_kbtu_241 = site_kbtu_241[[x for x in site_kbtu_241.columns if x in keep_only_runs]]

# Filter on rows where the min is not the max
site_kbtu_241 = site_kbtu_241[site_kbtu_241.apply(lambda row: min(row) != max(row), axis=1)]

# Make a multiindex 
site_kbtu_241.columns = pd.MultiIndex.from_tuples([x.split('_') for x in site_kbtu_241.columns],
                                                 names=['Platform', 'Run'])

For these tests where we have variations, we can visualize the deviation each run Platform/run using a boxplox:

In [None]:
fig, ax = plt.subplots(figsize=(16,9))
site_kbtu_241.boxplot(ax=ax, grid=False)
ax.set_title('Boxplot of tests that have variations, by platform and run')
ax.set_ylabel('Total site kBTU')
plt.show()

In [None]:
site_kbtu_241.plot(kind='kde')

## Check biggest differences by looking at CV

Second, I calculate the coefficient of variation ($CV$) for each test = standard deviation ($\sigma$) divided by mean ($\mu$)

$$CV = \frac{\sigma}{\mu}$$

I then use a set tolerance to filter out tests that have a CV that isn't above or equal to the tolerance.

In [None]:
# Coefficient of variation: standard deviation divided by mean
cv = site_kbtu_241.std(axis=1) / site_kbtu_241.mean(axis=1)
cv.name = 'Coefficient of Variation'

In [None]:
cv_tol = 0.00001
print("Setting CV Tolerance to {:.3%}".format(cv_tol))

In [None]:
sns.set_style('white')
sns.set_palette('Set2')

In [None]:
ax = cv[cv >= cv_tol].sort_values(ascending=True).plot(kind='barh', figsize=(16,9))
vals = ax.get_xticks()
ax.set_xticklabels(['{:3.2f}%'.format(x*100) for x in vals])
ax.set_title('Coefficient of Variation for tests that are above cv_tol={:.3%}'.format(cv_tol))
plt.show()

**Using the same tests, we can visualize the total site kBTU for each:**

In [None]:
fig, ax = plt.subplots()
site_kbtu_241.reindex(index=cv[cv >= cv_tol].index).plot(kind='bar', ax=ax)
ax.set_title('Total site kBTU for tests that are above the CV tolerance')
ax.set_ylabel('Total Site kBTU')
plt.show()

## Could Ruby test just be unstable regardless of platform?

1) First,  **the big differences are in the ruby tests mostly** (except 2.). I've mentionned already that I fixed a bunch of instabilities in the ruby tests, but there are some I couldn't fix yet: **could the ruby tests in question just be unstable regardless of platform?**

I plot the entire heatmap (all OS version) of these tests which have a CV >= cv_tol:

In [None]:
toplot = site_kbtu.reindex(index=cv[cv >= cv_tol].index)
toplot = toplot[[x for x in toplot.columns if x[2] in ([''] + keep_only_runs)]]

In [None]:
regression_analysis.heatmap_sitekbtu_pct_change(site_kbtu=toplot,
                            row_threshold=0.0000, display_threshold=0.0001, 
                            savefig=False, show_plot=True, figsize=(16,6))

The following tests are unstable regardless of platform:
    
* airloop_and_zonehvac.rb
* evaporative_cooling.rb
* surface_properties.rb 
* unitary_system_performance_multispeed.rb (edited)

The big unknown is **what the heck happened in Windows Run 1 for unitary_vav_bypass**?

## One OSM test produces different results on different platform

----

2) One very important exception to (1) above is the `evaporative_cooling.osm` test: **seems to be stable on both platform, but it doesn't have the same numbers on Ubuntu versus windows! Further investigation is warranted.**

Note: You might say it's hard to tell if the OSM is stable on a given platform with two runs. I ran it 10 times on Ubuntu, and it is stable.

    count    1.000000e+01
    mean     7.632714e+06
    std      9.817002e-10
    min      7.632714e+06
    25%      7.632714e+06
    50%      7.632714e+06
    75%      7.632714e+06
    max      7.632714e+06
    dtype: float64

## Run N more times on a given machine to have more info

In [None]:
torun

In [None]:
s = "'/"

torun = (cv[cv >= cv_tol].index.tolist())

for i, (test, ext) in enumerate(torun):
    test_name = "test_{}_{}".format(test, ext)
    #test_name = "test_{}".format(test)
    #s += " --name test_{}_{}".format(test, ext)
    if i < len(torun)-1:
        s+='({})|'.format(test_name)
    else:
        s+='({})'.format(test_name)

s += "/'"
print("ruby model_tests.rb -n {}".format(s))

## Reload with more runs

In [None]:
df_files = regression_analysis.find_info_osws_with_tags(compat_matrix=compat_matrix)
subset_files = df_files[[x for x in df_files.columns
                         if x[1] == '2.4.1' 
                         #and x[2] != 'Ubuntu_run2'
                        ]]
# Keep only those that I run more than twice
subset_files = subset_files.loc[subset_files[('8.8.0', '2.4.1', 'Ubuntu_run3')].notnull()]

# Parse site_kbtu
site_kbtu = subset_files.applymap(regression_analysis.parse_total_site_energy)

# Restrict to our version of interest, drop rows with all nan
site_kbtu_241 = site_kbtu['8.8.0']['2.4.1'].dropna(how='all')

# Keep only the custom tagged ones
site_kbtu_241 = site_kbtu_241[[x for x in site_kbtu_241.columns if x != '']]

# Make a multiindex 
site_kbtu_241.columns = pd.MultiIndex.from_tuples([x.split('_') for x in site_kbtu_241.columns],
                                                 names=['Platform', 'Run'])

In [None]:
site_kbtu_241.groupby(level='Platform', axis=1).mean().plot(kind='bar')

In [None]:
def heatmap_from_pct_diff(toplot, display_threshold=0.001, 
                          title=None):
    # Prepare two custom cmaps with one single color
    grey_cmap = mpl.colors.ListedColormap('#f7f7f7')
    green_cmap = mpl.colors.ListedColormap('#f0f7d9')


    w = 16
    h = w * toplot.shape[0] / (3 * toplot.shape[1])

    fig, ax = plt.subplots(figsize=(w, h))

    # Reserve 1.5 inches at bottom for explanation
    #fig.subplots_adjust(bottom=1.5/h)

    # Same as: fmt = lambda x,pos: '{:.1%}'.format(x)
    def fmt(x, pos): return '{:.1%}'.format(x)


    # Plot with colors, for those that are above the display_threshold
    sns.heatmap(toplot.abs(), mask=toplot.abs() <= display_threshold,
                ax=ax, cmap='YlOrRd',  # cmap='Reds', 'RdYlGn_r'
                vmin=0, vmax=0.5,
                cbar_kws={'format': mpl.ticker.FuncFormatter(fmt)},
                annot=toplot, fmt='.2%', linewidths=.5)

    # Plot a second heatmap on top, only for those that are below
    sns.heatmap(toplot, mask=((toplot.abs() > display_threshold) |
                              (toplot.abs() == 0)),
                cbar=False,
                annot=True, fmt=".4%", annot_kws={"style": "italic"},
                ax=ax, cmap=grey_cmap)

    # Plot a third heatmap on top, only for those that are zero,
    # no annot just green
    sns.heatmap(toplot, mask=(toplot.abs() != 0),
                cbar=False,  # linewidths=.5, linecolor='#cecccc',
                annot=False,
                ax=ax, cmap=green_cmap)

    if title:
        ax.set_title(title)

    plt.show()


In [None]:
# % from the mean siteKBTU of the test
toplot = ((site_kbtu_241.T - site_kbtu_241.T.mean())/(site_kbtu_241.T.mean())).T

heatmap_from_pct_diff(toplot, title='Percentage difference from mean of test (both_versions)',
                     display_threshold=0.001)

In [None]:
# % from the mean siteKBTU of the test
mean_ubuntu = site_kbtu_241['Ubuntu'].mean(axis=1)
toplot = ((site_kbtu_241.T - mean_ubuntu)/(mean_ubuntu)).T

heatmap_from_pct_diff(toplot, title='Percentage difference from mean of test for Ubuntu platform',
                     display_threshold=0.0001)

# Delete previous runs

In [None]:
files = gb.glob('./test/plant_op_schemes_*')
files

In [None]:
for f in files:
    os.remove(f)

# Look at out.osw

In [None]:
!ls test/*generator*

In [None]:
data = regression_analysis.load_osw('test/generator_microturbine.rb_2.0.4_out.osw')

In [None]:
data

# Rename out.osw

In [None]:
# Copy all tagged runs into a 'Tagged' directory for zipping
os.mkdir('test/Tagged')
for f in gb.glob('test/*.osw'):
    if 'out_' in f:
        print(f)
        dst_path  = f.replace('test/', 'test/Tagged/')
        copyfile(f, dst_path)
    

In [None]:
# Zip in one go...
import zipfile
import glob as gb
with zipfile.ZipFile("Tagged.zip", "w") as z:
    for f in gb.glob('test/*.osw'):
        if 'out_' in f:
            z.write(f)

In [None]:
# Verify it worked
z = zipfile.ZipFile("Tagged.zip")
z.printdir()
z.close()