In [None]:
from solvePlot import isNumber
import pandas as pd
import altair as alt
import os
import re

In [None]:
colordomain = ['Solution', 'None', 'UND', 'MEM']
colorrange = ['#4DAF4A', '#E41A1C', '#377EB8', '#170EB8']

alt.data_transformers.disable_max_rows()

def getResults(shape, getInProgress=False, getCancelled=False):
    for root, _, fs in os.walk(shape):
        files =  [os.path.join(root, f) for f in fs]
        break
    results = []
    inProgressCounter = 0
    cancelledCounter = 0
    for path in files:
        with open(path) as f:
            lines = f.readlines()
            
        nVars = nClauses = None
        m = None
        if len(lines) > 8:
            m = re.search('Using (\d+) variables and (\d+) clauses', lines[1])
        if m:
            nVars = int(m.group(1))
            nClauses = int(m.group(2))
        
        if len(lines) == 0:
            inProgressCounter += 1
            if not getInProgress:
                continue
            result = 'In progress'
        else:
            result = lines[-1].strip()
        if result.startswith('Job submitted date = '):
            cancelledCounter += 1
            if not getCancelled:
                continue
            result = "Cancelled"
        if result == '---------------' or result == "Job output ends":
            result = 'Error'
        category = result
            
        if not result in ['Cancelled', 'UND', 'None', 'Error', 'TIMEOUT', 'In progress']:
            category = 'Solution'
            ratio = 1.0
        else:
            ratio = 0.0
            
        if isNumber(result):
            ratio = float(result)
            category = 'UND'
            
        if 'out-of-memory' in lines[-1]:
            category = 'MEM'

        m = re.search('([0-9]+)t_([0-9]+)c.out', path)
        #m.group(0)
        results.append({
            'nCubeTypes': int(m.group(1)),
            'nColors': int(m.group(2)),
            'variables': nVars,
            'clauses': nClauses,
            'result': result,
            'category': category,
            'ratio': ratio
        })
    df = pd.DataFrame(results)
    #print("Found {} in progress and {} cancelled".format(inProgressCounter, cancelledCounter))
    return df

def plotData(df):
    base = alt.Chart(df).transform_calculate(
        url='https://akodiat.github.io/polycubes?rule=' + alt.datum.result
    )
    return base.mark_rect().encode(
        alt.X('nCubeTypes:O', title='# of species'),
        alt.Y('nColors:O', title='# of colors', sort='-y'),
        color=alt.Color('category', scale=alt.Scale(domain=colordomain, range=colorrange)),
        href='url:N'
    ) + base.mark_circle(color='black', opacity=0.5).encode(
        alt.X('nCubeTypes:O', title='# of species'),
        alt.Y('nColors:O', title='# of colors', sort='-y'),
        size=alt.Size('ratio:Q'),
        tooltip=['result', 'nCubeTypes', 'nColors', 'ratio'],
        href='url:N'
    )

In [None]:
def getSmallest(df, measure):
    return df.loc[df['category'] == 'Solution'][measure].min()

In [None]:
paths = !ls -d all_8-mers/8-mer_*/

In [None]:
df = pd.DataFrame([{
    'minNs':getSmallest(getResults(path), 'nCubeTypes'),
    'minNc': getSmallest(getResults(path), 'nColors'), 
    'path': path
} for path in paths])

In [None]:
alt.Chart(df).mark_bar().encode(
    alt.X("minNs:O", title='Number of species'),
    y='count()',
) | alt.Chart(df).mark_bar().encode(
    alt.X("minNc:O", title='Number of colours'),
    y='count()',
)

In [None]:
df.sort_values(by=['minNs'])

In [None]:
plotData(getResults('8-mer_89'))

In [None]:
plotData(getResults('8-mer_148'))

In [None]:
getResults('8-mer_134')['nCubeTypes'].min()

In [None]:
getResults('8-mer_134')

In [None]:
getSmallest(getResults('8-mer_134'), 'nCubeTypes')

In [None]:
paths