In [43]:
import re

class Dish:
    
    def __init__(self, ingredients, allergens):
        self.ingredients = ingredients
        self.allergens = allergens
        
class DishBuilder:
    
    dishregex = r"(?P<ingredients>[a-z ]+) \(contains (?P<allergens>[a-z, ]+)\)"
    
    @staticmethod
    def _build(dishrepresentation):
        m = re.match(DishBuilder.dishregex, dishrepresentation)
        ingredients = m.group("ingredients").split(' ')
        allergens = m.group("allergens").split(', ')
        return Dish(ingredients, allergens)
        
    @staticmethod   
    def buildall(dishrepresentations):
        return [DishBuilder._build(dishrep) for dishrep in dishrepresentations]
    
class AllergenGuesser:

    _flatten = lambda t: [item for sublist in t for item in sublist]
    
    @staticmethod
    def guess(dishes):
        '''Returns a mapping of words we understand to their ingredient counterparts.'''
        
        # First build a dict of sets where any allgern could be represented by any possible word.
        allingredients = AllergenGuesser._flatten([dish.ingredients for dish in dishes])
        allallergens = AllergenGuesser._flatten([dish.allergens for dish in dishes])        
        possiblemeanings = {allergen: set([ingredient for ingredient in allingredients]) for allergen in allallergens}
        
        # Now, by intersecting the underlying sets for each allergen, we build smaller sets, what each allergen could be
        # represented by.
        for dish in dishes:
            possiblemeaning = {allergen: set([ingredient for ingredient in dish.ingredients]) for allergen in dish.allergens}
            for allergen, ingredients in possiblemeaning.items():
                possiblemeanings[allergen] = set.intersection(possiblemeanings[allergen], ingredients
                                                              
        # Now these sets are reduced, by removing any other element, that already has a definitive value elsewhere.
        # If this is not possible, there is not information to find a definitive set of correlation between words
        # in the first place.
        while any([len(meaning) > 1 for meaning in possiblemeanings.values()]):
            for allergen, meaning in possiblemeanings.items():
                if len(meaning) == 1:
                    for itallergen in possiblemeanings:
                        if allergen == itallergen:
                            continue
                        possiblemeanings[itallergen] = possiblemeanings[itallergen] - meaning
        return possiblemeanings
    
    @staticmethod
    def nonallergens(dishes):
        '''Returns a set of words, that are definitively not allergens.'''
                                                              
        possiblemeanings = AllergenGuesser.guess(dishes)
        allingredients = set(AllergenGuesser._flatten([dish.ingredients for dish in dishes]))
        return allingredients - set(AllergenGuesser._flatten(possiblemeanings.values()))
    
    @staticmethod
    def allergens(dishes):
        '''Returns a set of words, that are definitively allergens'''                                                   
                                                              
        allingredients = set(AllergenGuesser._flatten([dish.ingredients for dish in dishes]))
        return allingredients - AllergenGuesser.nonallergens(dishes)
  
    @staticmethod
    def countnonallergens(dishes):
        '''Returns the count of words in a list of dishes, that are not allergens.'''
                                                              
        from collections import Counter
        counter = Counter(AllergenGuesser._flatten([dish.ingredients for dish in dishes]))
        allergens = AllergenGuesser.allergens(dishes)
        return sum({key: val for key, val in counter.items() if not key in allergens}.values())
            
filename = "./input.txt"
with open(filename) as f:
    content = [line.strip() for line in f.readlines()]
    
dishes = DishBuilder.buildall(content)
print("Solution to part 1: " + str(AllergenGuesser.countnonallergens(dishes)))

orderedallergens = sorted(AllergenGuesser.guess(dishes).items())
cdil = ",".join([ingredient for allergen, (ingredient,) in orderedallergens])
print("Solution to part 2: " + str(cdil))

Solution to part 1: 2614
Solution to part 2: qhvz,kbcpn,fzsl,mjzrj,bmj,mksmf,gptv,kgkrhg
