# Day 4: Passport processing

In [63]:
import re

## Part 1
Check if all fields exist for the passports. cid is optional

mbyr (Birth Year)  
iyr (Issue Year)  
eyr (Expiration Year)  
hgt (Height)  
hcl (Hair Color)  
ecl (Eye Color)  
pid (Passport ID)  
cid (Country ID)  

In [62]:
# Read the data and separate each passport into its own string in a list
data = open("input/input-day-4.txt", "r")
data = data.read().split("\n\n")
data = [x.replace("\n", " ") for x in data]

p = re.compile(r"(\w+):") # Pattern for finding all the keys
required_fields = ["byr","iyr","eyr","hgt","hcl","ecl","pid"]

nr_of_valid_passports = 0

for passport in data:

    passport_fields = p.findall(passport)

    # Check if all fields are pressent. cid is optional!
    # This can be acchieved by seeing if the required fields is a subset of the passport fields
    if set(required_fields).issubset(passport_fields):
        nr_of_valid_passports += 1
    
print("Nr of valid: " + str(nr_of_valid_passports))
print("Tot nr of passports: " + str(len(data)))

Nr of valid: 247
Tot nr of passports: 299


## Part 2

Each field has specific rules

    byr (Birth Year) - four digits; at least 1920 and at most 2002.
    iyr (Issue Year) - four digits; at least 2010 and at most 2020.
    eyr (Expiration Year) - four digits; at least 2020 and at most 2030.
    hgt (Height) - a number followed by either cm or in:
        If cm, the number must be at least 150 and at most 193.
        If in, the number must be at least 59 and at most 76.
    hcl (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
    ecl (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
    pid (Passport ID) - a nine-digit number, including leading zeroes.
     * "including leading zeroes" means just that if there are zeros are counted to if they are at the start 
    cid (Country ID) - ignored, missing or not.


In [409]:
# Match string starting with 1 or more digits followed by "cm" or "in"
height_p = re.compile(r"(?P<height>\d+)(?P<unit>cm|in)")
# Match string starting with "#" followed by 6 characters "a-f" or digits "0-9"
hair_color_p = re.compile(r"\#[a-f0-9]{6,6}")
# Has to be exactly 9 numbers long.
passport_id_p = re.compile(r"[0-9]{8}[0-9]$")

def are_key_values_valid(fields):
    valid = True
    
    for field in passport_fields:
        key = field[0]
        value = field[1]
        
        if key == "byr":
            if not (int(value) >= 1920 and int(value) <= 2002):
                valid = False
            
        if key == "iyr":
            if not (int(value) >= 2010 and int(value) <= 2020):
                valid = False
    
        if key == "eyr":
            if not (int(value) >= 2020 and int(value) <= 2030):
                valid = False
            
        if key == "hgt":
            valid_pattern = height_p.search(value)
            if valid_pattern:
                height = valid_pattern.group("height")
                unit = valid_pattern.group("unit")
                if unit == "cm":
                    if not (int(height) >= 150 and int(height) <= 193):
                        valid = False
                if unit == "in":
                    if not (int(height) >= 59 and int(height) <= 76):
                        valid = False
            else:
                valid = False

        if key == "hcl":
            valid_pattern = hair_color_p.search(value)
            if not valid_pattern:
                valid = False

        if key == "ecl":
            if not value in ["amb", "blu", "brn", "gry", "grn", "hzl", "oth"]:
                valid = False

        if key == "pid":
            valid_pattern = passport_id_p.match(value)
            if not valid_pattern:
                valid = False

    return valid

In [411]:
# Read the data and separate each passport into its own string in a list
data = open("input-day-4.txt", "r")
data = data.read().split("\n\n")
data = [x.replace("\n", " ") for x in data]

p = re.compile(r"(\w+):(\#?\w*)") # Pattern for finding all the keys
required_fields = ["byr","iyr","eyr","hgt","hcl","ecl","pid"]

nr_of_valid_passports = 0

for passport in data:
    
    passport_fields = p.findall(passport)

    keys = [field[0] for field in passport_fields]
    values = [field[1] for field in passport_fields]
    
    # Check if all fields are pressent. cid is optional!
    # This can be acchieved by seeing if the required fields is a subset of the passport fields
    if set(required_fields).issubset(keys):
        
        # Check if each key value is valid
        if are_key_values_valid(passport_fields):
            nr_of_valid_passports += 1
            my_dict_list.append(my_dict)
            
    i += 1
    
print("Nr of valid: " + str(nr_of_valid_passports))
print("Tot nr of passports: " + str(len(data)))

Nr of valid: 145
Tot nr of passports: 299
