# Task 1

In [1]:
def read_data(file):
    """Reads the file and returns:
    - the rules in the form:
        rules = {"rule_1": [[lower_1, upper_1], [lower_2, upper_2]], ...}
    - my ticket as a list
    - the other tickets as a list of lists:
    """
    
    with open(file) as f:
        lines = [line[: -1] for line in f]
    
    your_ticket_index = lines.index("your ticket:")
    rules_lines = lines[: your_ticket_index - 1]
    your_ticket_line = lines[your_ticket_index + 1]
    nearby_tickets_lines = lines[your_ticket_index + 4: ]
    
    # extracting the rules:
    rules = {}
    for line in rules_lines:
        rule = line[: line.find(':')]
        first_interval = line[line.find(':') + 2: line.find(" or ")]
        second_interval = line[line.find(" or ") + 4: ]
        
        rule_args = []
        rule_args.append([int(bound) for bound in first_interval.split('-')])
        rule_args.append([int(bound) for bound in second_interval.split('-')])
        
        rules[rule] = rule_args
        
    # extracting my ticket:
    my_ticket = [int(entry) for entry in your_ticket_line.split(',')]
    
    # extracting the other tickets
    other_tickets = []
    for line in nearby_tickets_lines:
        other_tickets.append([int(entry) for entry in line.split(',')])
    
    return rules, my_ticket, other_tickets    

In [2]:
def get_error_rate(rules, other_tickets):
    """Gets the ticket scanning rate according to the calculation in the task description."""
    
    non_compliant_nums = []
    
    for ticket in other_tickets:
        for num in ticket:
            if not is_compliant(num, rules):
                non_compliant_nums.append(num)
    
    return sum(non_compliant_nums)


def is_compliant(num, rules):
    """Checks if the number is compliant to any of the rules in rules."""
    
    for rule_arg in rules.values():
        if (rule_arg[0][0] <= num <= rule_arg[0][1]) or \
           (rule_arg[1][0] <= num <= rule_arg[1][1]):
            return True
    return False

In [3]:
# testing:
file = "data/test_data.txt"
rules, my_ticket, other_tickets = read_data(file)
error_rate = get_error_rate(rules, other_tickets)
print("The error rate is: {}".format(error_rate))

The error rate is: 71


In [4]:
# real data:
file = "data/data.txt"
rules, my_ticket, other_tickets = read_data(file)
error_rate = get_error_rate(rules, other_tickets)
print("The error rate is: {}".format(error_rate))

The error rate is: 29878


# Task 2

In [10]:
import numpy as np

In [7]:
def sort_valid_tickets(other_tickets, rules):
    """Returns only the valid tickets."""
    
    compliant_tickets = []
    for ticket in other_tickets:
        compliance = [is_compliant(num, rules) for num in ticket]
        if all(compliance):
            compliant_tickets.append(ticket)
    
    return compliant_tickets

In [18]:
def get_fulfilled_rules(other_tickets, rules):
    """Gets the list of rules for every column in other tickets, which every ticket fulfills."""
    
    tickets = np.array(other_tickets)
    fulfilled_rules = []
    column_rules = []
    
    for i in range(tickets.shape[1]):
        column = tickets[:, i]
        for rule in rules.keys():
            if (((rules[rule][0][0] <= column) & (column <= rules[rule][0][1])) | \
               ((rules[rule][1][0] <= column) & (column <= rules[rule][1][1]))).all():
                column_rules.append(rule)
        
        fulfilled_rules.append(column_rules)
        column_rules = []
    
    return fulfilled_rules

In [77]:
def get_column_rules(fulfilled_rules, rules):
    """Gets the one single rule for every column so that the column
    fulfills that rule and every rule is used once.
    """
    
    # sorting the fulfilled rules based on how many entries they have:
    # another list for later being able to reconstruct the original column order:
    rules_with_index = [{i: item} for i, item in enumerate(fulfilled_rules)]
    # helper function to order the list of dict based on the lengt of the lists in it:
    def get_len(dict_item): return len(list(dict_item.values())[0])
    rules_with_index.sort(key=get_len)
    fulfilled_rules.sort(key=len)
    
    # searching with depth first search:
    queue = [[]]
    indices_to_try = np.array([0 for _ in range(len(fulfilled_rules))])
    
    while queue:
        elem = queue.pop()
        column_index = len(elem)         
        options = fulfilled_rules[column_index]
        index = indices_to_try[column_index]
        
        if index < len(options):
            # getting the potential new element and only adding it to the queue if it is valid:
            new_rule = options[index]
            if new_rule not in elem:
                new_elem = elem.copy()
                new_elem.append(new_rule)
                if len(new_elem) == len(fulfilled_rules):
                    print("Feasible solution found!")
                    return sort_back_to_original(new_elem, rules_with_index)
                else:
                    queue.append(elem)
                    queue.append(new_elem)
                    indices_to_try[column_index + 1: ] = 0
            else:
                queue.append(elem)
                
            indices_to_try[column_index] += 1
        
    
    print("No feasible solution found!")
    return None
                                                 
                                                 

def sort_back_to_original(rules_list, rules_with_index):
    """Sorts the solution list back to the original column order."""
    return_list = [""] * len(rules_list)
    
    for i, rule in enumerate(rules_list):
        return_list[list(rules_with_index[i].keys())[0]] = rule
    
    return return_list

In [83]:
def get_multiplicative(my_ticket, column_rules):
    """Gets the multiplicative of the values in my tocket which contain
    the keyword departure.
    """
    mult = 1
    for i, rule in enumerate(column_rules):
        if "departure" in rule:
            mult *= my_ticket[i]
            
    return mult

In [78]:
# testing:
file = "data/test_data_2.txt"
rules, my_ticket, other_tickets = read_data(file)
other_tickets = sort_valid_tickets(other_tickets, rules)
fulfilled_rules = get_fulfilled_rules(other_tickets, rules)
print("Options to fulfill:")
print(fulfilled_rules)
column_rules = get_column_rules(fulfilled_rules, rules)
print("Column rules:")
print(column_rules)

Options to fulfill:
[['row'], ['class', 'row'], ['class', 'row', 'seat']]
Feasible solution found!
Column rules:
['row', 'class', 'seat']


In [84]:
# real data:
file = "data/data.txt"
rules, my_ticket, other_tickets = read_data(file)
other_tickets = sort_valid_tickets(other_tickets, rules)
fulfilled_rules = get_fulfilled_rules(other_tickets, rules)
print("Options to fulfill:")
print(fulfilled_rules)
column_rules = get_column_rules(fulfilled_rules, rules)
print("Column rules:")
print(column_rules)
print("The multiplicative of the fields which contain departure: {}".format(
    get_multiplicative(my_ticket, column_rules)))

Options to fulfill:
[['departure station', 'departure track', 'departure time', 'arrival location', 'arrival platform', 'arrival track', 'price', 'route', 'type', 'wagon', 'zone'], ['departure location', 'departure station', 'departure platform', 'departure track', 'departure date', 'departure time', 'arrival location', 'arrival station', 'arrival platform', 'arrival track', 'class', 'duration', 'price', 'route', 'row', 'seat', 'train', 'type', 'wagon', 'zone'], ['arrival platform', 'price'], ['departure location', 'departure station', 'departure platform', 'departure track', 'departure date', 'departure time', 'arrival location', 'arrival platform', 'arrival track', 'class', 'price', 'route', 'seat', 'train', 'type', 'wagon', 'zone'], ['departure location', 'departure station', 'departure platform', 'departure track', 'departure date', 'departure time', 'arrival location', 'arrival platform', 'arrival track', 'class', 'price', 'route', 'seat', 'type', 'wagon', 'zone'], ['departure sta