# Table of Contents
 <p>

In [21]:
import matplotlib
import matplotlib.pyplot as mplplot
import matplotlib.sankey
import re
import pprint
%matplotlib inline

In [35]:
def parseQuestion(line, parsedQuestions):
    parsedQuestion = re.search(r"@question '([^']+)', '([^']+)', leadsTo: '([^']+)'", line)
    if parsedQuestion is None :
        parsedQuestion = re.search(r"@question '([^']+)', '([^']+)'", line)
    if parsedQuestion is not None :
        parsedQuestions.append({'CONTEXT': parsedQuestion.group(1),
                                'TEXT' : parsedQuestion.group(2),
                                'LEADSTO' : parsedQuestion.group(3) if len(parsedQuestion.groups()) > 2 else None,
                                'ANSWERS' : [] , 
                                'CHECKBOXES' : []
                               })

def parseAnswer(line, parsedQuestions) :
    parsedAnswer = re.search(r"@answer '([^']+)', leadsTo: '([^']+)'", line)
    if parsedAnswer is None :
        parsedAnswer = re.search(r"@answer '([^']+)'", line)
    if parsedAnswer is not None :
        parsedQuestions[-1]['ANSWERS'].append({'TEXT' : parsedAnswer.group(1) + '_' + str(len(parsedQuestions)), 
                                               'LEADSTO' : parsedAnswer.group(2) if len(parsedAnswer.groups()) > 1 else None})

def parseCheckbox(line, parsedQuestions) :
    parsedCheckbox = re.search(r"@checkbox '([^']+)'", line)
    parsedQuestions[-1]['CHECKBOXES'].append({'TEXT' : parsedCheckbox.group(1) + '_' + str(len(parsedQuestions)), 
                                              'LEADSTO' : None})

def findShortText(fullText, parsedQuestions):
    options = [question['CONTEXT'] + ': ' + question['TEXT'] for question in parsedQuestions if (question['TEXT'] is not None and fullText in question['TEXT'])]
    return 'Terminate: ' + question['TEXT'] if isinstance(options, list) and len(options) == 0 else options[0]    
    
def parseDecisionTree(treeFile) :
    parsedQuestions = []
    with open(treeFile, 'r') as decisionTreeLines :
        for line in decisionTreeLines :
            if '@question' in line :
                parseQuestion(line, parsedQuestions)
            elif '@answer' in line :
                parseAnswer(line, parsedQuestions)
            elif '@checkbox' in line :
                parseCheckbox(line, parsedQuestions)
    # if question has a global "LEADSTO" then propagate the corresponding shorttext version to all 
    # answers/checkboxes that have no independent "LEADSTO" and for those that do have an independent
    # "LEADSTO" replace this with the shorttext version
    for question in parsedQuestions :
        if question['LEADSTO'] is not None :
            for answer in question['ANSWERS']:
                if answer['LEADSTO'] is None :
                    answer['LEADSTO'] = findShortText(question['LEADSTO'], parsedQuestions)
                else:
                    answer['LEADSTO'] = findShortText(answer['LEADSTO'], parsedQuestions)

            for checkbox in question['CHECKBOXES']:
                if checkbox['LEADSTO'] is None :
                    checkbox['LEADSTO'] = findShortText(question['LEADSTO'], parsedQuestions)
                else :
                    checkbox['LEADSTO'] = findShortText(checkbox['LEADSTO'], parsedQuestions)
    
    return parsedQuestions

In [36]:
parsedQuestions = parseDecisionTree('/Users/hughdickinson/Documents/Development/VERITAS/Zooniverse/Galaxy-Zoo/app/lib/illustris_tree.coffee')
printer = pprint.PrettyPrinter(indent=4) 
printer.pprint(parsedQuestions)

[   {   'ANSWERS': [   {'LEADSTO': 'How rounded is it?', 'TEXT': 'Smooth_1'},
                       {   'LEADSTO': 'Could this be a disk viewed edge-on?',
                           'TEXT': 'Features or disk_1'},
                       {   'LEADSTO': 'Would you like to discuss this object?',
                           'TEXT': 'Star or artifact_1'}],
        'CHECKBOXES': [],
        'CONTEXT': 'Shape',
        'LEADSTO': None,
        'TEXT': 'Is the galaxy simply smooth and rounded, with no sign of a '
                'disk?'},
    {   'ANSWERS': [   {   'LEADSTO': 'Does the galaxy have a bulge at its '
                                      'center? If so, what shape?',
                           'TEXT': 'Yes_2'},
                       {   'LEADSTO': 'Is there any sign of a bar feature '
                                      'through the center of the galaxy?',
                           'TEXT': 'No_2'}],
        'CHECKBOXES': [],
        'CONTEXT': 'Disk',
        'LEADSTO': None,


In [37]:
for question in parsedQuestions :
    for answer in question['ANSWERS'] :
        print ('{} [10] {}'.format(findShortText(question['TEXT'], parsedQuestions), 
                                   findShortText(answer['LEADSTO'], parsedQuestions)))

Shape: Is the galaxy simply smooth and rounded, with no sign of a disk? [10] Round: How rounded is it?
Shape: Is the galaxy simply smooth and rounded, with no sign of a disk? [10] Disk: Could this be a disk viewed edge-on?
Shape: Is the galaxy simply smooth and rounded, with no sign of a disk? [10] Discuss: Would you like to discuss this object?
Disk: Could this be a disk viewed edge-on? [10] Bulge: Does the galaxy have a bulge at its center? If so, what shape?
Disk: Could this be a disk viewed edge-on? [10] Bar: Is there any sign of a bar feature through the center of the galaxy?
Bar: Is there any sign of a bar feature through the center of the galaxy? [10] Terminate: Is there any sign of a bar feature through the center of the galaxy?
Bar: Is there any sign of a bar feature through the center of the galaxy? [10] Terminate: Is there any sign of a bar feature through the center of the galaxy?
Spiral: Is there any sign of a spiral arm pattern? [10] Spiral: How tightly wound do the spira

TypeError: 'in <string>' requires string as left operand, not NoneType