In [11]:
import pandas as pd

def probability(df):
    def parse_condition(condition):
        condition = condition.strip()
        if '=' in condition:
            field, value = condition.split('=')
            field = field.strip()
            value = value.strip()
            return field, value
        return None
    
    def compute_probability(query):
        if '|' in query:
            condition, given = query.split('|')
            condition = condition.strip()
            given = given.strip()
        else:
            condition = query.strip()
            given = None
        
        if ',' in condition:
            conditions = condition.split(',')
        else:
            conditions = [condition]
        
        mask = pd.Series([True] * len(df))
        for cond in conditions:
            if cond:
                field, value = parse_condition(cond)
                mask &= (df[field] == value)
        
        if given:
            given_conditions = given.split(',')
            given_mask = pd.Series([True] * len(df))
            for cond in given_conditions:
                field, value = parse_condition(cond)
                given_mask &= (df[field] == value)
            if given_mask.sum() == 0:
                return 0
            return mask[given_mask].sum() / given_mask.sum()
        return mask.sum() / len(df)

    def p(query):
        return compute_probability(query)

    return p

In [12]:
df = pd.read_csv('tennis.txt', sep='\s+')

p = probability(df)

In [13]:
print(p('play=yes'))
print(p('play = no'))
print(p('outlook=sunny | play=yes')) 
print(p('outlook=rainy | play=no'))
print(p('play=no | outlook=sunny, wind = weak'))

0.6428571428571429
0.35714285714285715
0.2222222222222222
0.4
0.6666666666666666
