**Packages**

In [1]:
import re

**Reading in the data**

In [2]:
def read_passport_file(filename):
    all_info = []

    with open(filename, 'r') as f:
        lines = f.readlines()
        line_num = 0
        while line_num < len(lines):
            this_list = []
            while (line_num < len(lines)) and (lines[line_num] != '\n'):
                pairs = lines[line_num].strip().split(' ')
                processed_pairs = [(pair.split(':')[0], pair.split(':')[1]) for pair in pairs]
                this_list += processed_pairs
                line_num += 1
            all_info.append(dict(this_list))
            line_num += 1
    return all_info

In [3]:
passport_info = read_passport_file('input_day4.txt')

**Checking validity**

In [3]:
def has_necessary_fields(passport_dict):
    key_set = set(passport_dict.keys())
    if (len(key_set) == 8): # all 8 fields are present
        return True
    set_no_cid = key_set - {'cid'}
    
    # now that we know cid isn't in there (we don't care if it is or isn't),
    # we can check if there are still 7 fields
    
    if (len(set_no_cid) == 7): 
        return True
    
    return False

Test example:

In [4]:
test_info = read_passport_file('day4_test.txt')

In [5]:
[has_necessary_fields(passport) for passport in test_info]

[True, False, True, False]

**Answer to part 1**

In [6]:
valid_list = [has_necessary_fields(passport) for passport in passport_info]

In [7]:
sum(valid_list)

219

**Checking data validity**

In [8]:
def is_valid(passport_dict):
    if not has_necessary_fields(passport_dict): # if it doesn't have all the necessary fields
        return False # then it isn't valid
    return byr_correct(passport_dict['byr']) and iyr_correct(passport_dict['iyr']) and eyr_correct(passport_dict['eyr']) and hgt_correct(passport_dict['hgt']) and hcl_correct(passport_dict['hcl']) and ecl_correct(passport_dict['ecl']) and pid_correct(passport_dict['pid'])

In [19]:
def byr_correct(byr):
    # byr (Birth Year) - four digits; at least 1920 and at most 2002.
    byr = int(byr)
    return (byr >= 1920) and (byr <= 2002)

def iyr_correct(iyr):
    # iyr (Issue Year) - four digits; at least 2010 and at most 2020.
    iyr = int(iyr)
    return (iyr >= 2010) and (iyr <= 2020)

def eyr_correct(eyr):
    # (Expiration Year) - four digits; at least 2020 and at most 2030.
    eyr = int(eyr)
    return (eyr >= 2020) and (eyr <= 2030)

def hgt_correct(hgt):
    # (Height) - a number followed by either cm or in:
        # If cm, the number must be at least 150 and at most 193.
        # If in, the number must be at least 59 and at most 76.
    if hgt[-2:] == 'cm':
        num = int(hgt[:-2])
        return (num >= 150) and (num <= 193)
    elif hgt[-2:] == 'in':
        num = int(hgt[:-2])
        return (num >= 59) and (num <= 76)
    else:
        return False

def hcl_correct(hcl):
    # (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
    if (len(hcl) != 7):
        return False
    return True if re.match('^(#[a-f\d]{6})$', hcl) else False

def ecl_correct(ecl):
    # (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
    ecl_set = {'amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth'}
    return (ecl in ecl_set)

def pid_correct(pid):
    # (Passport ID) - a nine-digit number, including leading zeroes.
    if len(pid) != 9:
        return False
    return True if re.match('^(\d{9})$', pid) else False

**Invidivual Test cases**

In [26]:
print("Test Case 1: byr. Passed:", True == byr_correct("2002"))
print("Test Case 2: byr. Passed:", False == byr_correct("2003"))

print("Test Case 3: hgt. Passed:", True == hgt_correct("60in"))
print("Test Case 4: hgt. Passed:", True == hgt_correct("190cm"))
print("Test Case 5: hgt. Passed:", False == hgt_correct("190in"))
print("Test Case 6: hgt. Passed:", False == hgt_correct("190"))

print("Test Case 7: hcl. Passed:", True == hcl_correct("#123abc"))
print("Test Case 8: hcl. Passed:", False == hcl_correct("#123abz"))
print("Test Case 9: hcl. Passed:", False == hcl_correct("123abc"))

print("Test Case 10: ecl. Passed:", True == ecl_correct("brn"))
print("Test Case 11: ecl. Passed:", False == ecl_correct("wat"))

print("Test Case 12: pid. Passed:", True == pid_correct("000000001"))
print("Test Case 13: pid. Passed:", False == pid_correct("0123456789"))

Test Case 1: byr. Passed: True
Test Case 2: byr. Passed: True
Test Case 3: hgt. Passed: True
Test Case 4: hgt. Passed: True
Test Case 5: hgt. Passed: True
Test Case 6: hgt. Passed: True
Test Case 7: hcl. Passed: True
Test Case 8: hcl. Passed: True
Test Case 9: hcl. Passed: True
Test Case 10: ecl. Passed: True
Test Case 11: ecl. Passed: True
Test Case 12: pid. Passed: True
Test Case 13: pid. Passed: True


**Passport test cases**

In [31]:
test_info2 = read_passport_file('day4_test2.txt')

true_labels = [False] * 4 + [True] * 4
true_labels

[False, False, False, False, True, True, True, True]

In [29]:
[has_necessary_fields(passport) for passport in test_info2]

[True, True, True, True, True, True, True, True]

In [32]:
[is_valid(passport) for passport in test_info2]

[False, False, False, False, True, True, True, True]

**Answer to Part 2**

In [33]:
valid_list2 = [is_valid(passport) for passport in passport_info]

In [34]:
sum(valid_list2)

127