In [1]:
import seaborn as sns

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from osprey.config import Config



## Get Trial Databases

In [3]:
data_dir = '/Users/robert_arbon/Google Drive/Research/Hyperparameter_Optimisation/Code/Trial Data/DHFR'

In [4]:
from sqlalchemy.types import (TypeDecorator, Text, Float, Integer, Enum,
                              DateTime, String, Interval)
import json
from osprey.trials import Trial

In [5]:
class JSONEncoded(TypeDecorator):
    impl = Text

    def process_bind_param(self, value, dialect):
        if value is not None:
            value = json.dumps(value)
        return value

    def process_result_value(self, value, dialect):
        if value is not None:
            value = json.loads(value)
        return value

In [6]:
def trials_to_dict(trials, columns):
    for trial in trials:
        d = {}
        for i, item in enumerate(columns.items()):
            key, val = item
            new_val = trial[i]
            if isinstance(val.type, JSONEncoded):
                new_val = json.load(StringIO(new_val))
            d[key] = new_val
        yield d

In [7]:
def trials_dataframe(config):
    db = config.trials()
    columns = Trial.__table__.columns
    table_name = Trial.__tablename__

    cmd = 'SELECT * FROM %s' % table_name

    query = db.execute(cmd)
    results = query.fetchall()
    trial_dict = trials_to_dict(results, columns)
    return pd.DataFrame(trial_dict)

In [8]:
config = Config(data_dir+'/Random-var-GMRQ-2/alpha_angle.yaml')
df1 = trials_dataframe(config)
df1['Strategy'] = 'GMRQ(2)'
df = df1
# config = Config(data_dir+'/Random-var-GMRQ-CSE/alpha_angle.yaml')
# df2 = trials_dataframe(config)
# df2['Strategy'] = 'GMRQ(CSP)'

# df = pd.concat([df1, df2])


Loading config file:     /Users/robert_arbon/Google Drive/Research/Hyperparameter_Optimisation/Code/Trial Data/DHFR/Random-var-GMRQ-2/alpha_angle.yaml...
Loading trials database: sqlite:///osprey-trials.db...


In [9]:
print(df.shape)
df.head()
df.rename(columns={'project_name': 'Feature', 'status': 'Status'}, inplace=True)
df.columns


(5159, 18)


Index(['completed', 'config_sha1', 'elapsed', 'host', 'id', 'mean_test_score',
       'mean_train_score', 'n_test_samples', 'n_train_samples', 'parameters',
       'Feature', 'started', 'Status', 'test_scores', 'traceback',
       'train_scores', 'user', 'Strategy'],
      dtype='object')

In [10]:
rename = {'alpha_angle': r'$\alpha$', 
          'kappa_angle': r'$\kappa$',
          'psi-o_tor': r'($\psi,\omega$)', 
         'pp_tor': r'($\psi,\phi$)', 
         'psi_tor': r'$\psi$', 
         'phi-o_tor': r'($\phi,\omega$)',
         'phi_tor': r'$\phi$', 
         'omega_tor': r'$\omega$'}

df['Feature'] = df['Feature'].apply(lambda x: rename[x])

rename_status ={'SUCCEEDED': 'Succeeded', 'FAILED':'Failed'}
df['Status'] = df['Status'].apply(lambda x: rename_status[x])

In [11]:
df.drop_duplicates(subset='parameters', inplace=True)

## Chart 

In [13]:
df.head()

Unnamed: 0,completed,config_sha1,elapsed,host,id,mean_test_score,mean_train_score,n_test_samples,n_train_samples,parameters,Feature,started,Status,test_scores,traceback,train_scores,user,Strategy
0,2017-08-18 20:16:49.812477,8fd40cd83d5f530a53c05fada83967b19a248199,1970-01-01 00:07:29.210559,compute308.bc4.acrc.priv,1,2.842569,2.996021,"[200000, 200000, 200000, 200000, 200000]","[200000, 200000, 200000, 200000, 200000]","{""variance_cut__threshold"": 0.0206044928662746...",$\alpha$,2017-08-18 20:09:20.601918,Succeeded,"[2.977152429810875, 2.9909674944247566, 2.9662...",,"[2.988860204149737, 2.9987766501032995, 2.9960...",ra15808,GMRQ(2)
1,2017-08-18 20:54:14.571878,8fd40cd83d5f530a53c05fada83967b19a248199,1970-01-01 00:44:53.918217,compute308.bc4.acrc.priv,2,2.954952,2.997943,"[200000, 200000, 200000, 200000, 200000]","[200000, 200000, 200000, 200000, 200000]","{""variance_cut__threshold"": 0.0017826497151386...",$\alpha$,2017-08-18 20:09:20.653661,Succeeded,"[2.953804573511941, 2.946792661204334, 2.97513...",,"[2.9998938576712026, 2.9995053860291425, 2.999...",ra15808,GMRQ(2)
2,2017-08-18 20:17:13.574895,8fd40cd83d5f530a53c05fada83967b19a248199,1970-01-01 00:07:52.835159,compute308.bc4.acrc.priv,3,2.943055,2.997429,"[200000, 200000, 200000, 200000, 200000]","[200000, 200000, 200000, 200000, 200000]","{""variance_cut__threshold"": 0.0001694044709524...",$\alpha$,2017-08-18 20:09:20.739736,Succeeded,"[2.9896072802469207, 2.7900464723106633, 2.997...",,"[2.999816341015027, 2.994605618457376, 2.99977...",ra15808,GMRQ(2)
3,2017-08-18 21:00:39.369673,8fd40cd83d5f530a53c05fada83967b19a248199,1970-01-01 00:51:18.606086,compute308.bc4.acrc.priv,4,2.967834,2.999385,"[200000, 200000, 200000, 200000, 200000]","[200000, 200000, 200000, 200000, 200000]","{""variance_cut__threshold"": 0.0002900604571717...",$\alpha$,2017-08-18 20:09:20.763587,Succeeded,"[2.9746196809816787, 2.9806604040291518, 2.892...",,"[2.999655835689648, 2.99996471637393, 2.999711...",ra15808,GMRQ(2)
4,2017-08-18 23:19:59.969078,8fd40cd83d5f530a53c05fada83967b19a248199,1970-01-01 03:10:39.106725,compute308.bc4.acrc.priv,5,2.901105,2.999565,"[200000, 200000, 200000, 200000, 200000]","[200000, 200000, 200000, 200000, 200000]","{""variance_cut__threshold"": 0.0003473834041118...",$\alpha$,2017-08-18 20:09:20.862353,Succeeded,"[2.9787622984525073, 2.950281301722762, 2.9654...",,"[2.998880342228884, 2.9995641203003856, 2.9995...",ra15808,GMRQ(2)


In [20]:
success_rate = df.loc[df['Strategy']=='GMRQ(2)', :].groupby(['Strategy', 'Feature', 'Status'])['completed'].aggregate(np.size)

In [21]:
success_rate = pd.DataFrame(success_rate).reset_index()

In [22]:
success_rate = pd.pivot_table(success_rate, index=['Strategy', 'Feature'], columns='Status', 
                              values='completed', aggfunc=np.sum, margins=True)

In [23]:
success_rate['Rate'] = success_rate['Succeeded']/success_rate['All']*100

In [24]:
success_rate.style.format({ 'Rate': '{:0.1f}%'})

Unnamed: 0_level_0,Status,Failed,Succeeded,All,Rate
Strategy,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GMRQ(2),$\alpha$,5,395,400,98.8%
GMRQ(2),$\kappa$,4,374,378,98.9%
GMRQ(2),$\omega$,49,331,380,87.1%
GMRQ(2),$\phi$,1,379,380,99.7%
GMRQ(2),$\psi$,9,371,380,97.6%
GMRQ(2),"($\phi,\omega$)",5,325,330,98.5%
GMRQ(2),"($\psi,\omega$)",10,353,363,97.2%
GMRQ(2),"($\psi,\phi$)",602,174,776,22.4%
All,,685,2702,3387,79.8%


In [17]:
success_rate.reset_index().groupby('Strategy').aggregate(np.sum)

Status,Failed,Succeeded,All,Rate
Strategy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All,1426.0,5300.0,6726.0,78.798692
GMRQ(2),685.0,2702.0,3387.0,700.318191
GMRQ(CSP),741.0,2598.0,3339.0,669.269573
