# Advent of Code 2020 - Puzzle 4B

## Import packages

In [269]:
import pandas as pd
from collections import defaultdict

## Main

In [270]:
# open datafile and read string
with open('data/input_puzzle_4.txt', 'r') as file:
    data = file.read()

# inspect data
data[:100]

'iyr:2013 hcl:#ceb3a1\nhgt:151cm eyr:2030\nbyr:1943 ecl:grn\n\neyr:1988\niyr:2015 ecl:gry\nhgt:153in pid:17'

In [271]:
# init defaultdict
dic = defaultdict(lambda : {})

# set id counter
ID = 0

# iterate over data and unpack
for entry in data.split('\n'):
    for pair in entry.split(' '):
        # fetch k,v pairs
        if len(pair) != 0: 
            key, value = pair.split(':')
            dic[ID][key] = value
        # new passport
        else:
            ID += 1

# close defaultdict         
dic = dict(dic)

# convert dict to dataframe and sort index
df = pd.DataFrame.from_dict(dic, orient='index')
df = df.sort_index()

# show dataframe
df.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,2013,#ceb3a1,151cm,2030,1943.0,grn,,
1,2015,0c6261,153in,1988,1966.0,gry,173cm,
2,2014,#733820,166cm,2025,1952.0,blu,79215921,
3,2013,#733820,165cm,2022,,oth,073015801,101.0
4,2013,#623a2f,160cm,2026,1948.0,brn,122719649,246.0


In [272]:
# copy original for modification
passed = df.copy(deep=True)

# convert iyr
iyr = df[df.iyr.str.len() == 4]
passed.iyr.update(iyr.iyr.astype('int'))

# convert eyr
eyr = df[df.eyr.str.len() == 4]
passed.eyr.update(eyr.eyr.astype('int'))

# convert byr
byr = df[df.byr.str.len() == 4]
passed.byr.update(byr.byr.astype('int'))

# check columns
passed.iyr = (passed.iyr >= 2010) & (passed.iyr <= 2020)
passed.eyr = (passed.eyr >= 2020) & (passed.eyr <= 2030)
passed.byr = (passed.byr >= 1920) & (passed.byr <= 2002)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,#ceb3a1,151cm,True,True,grn,,
1,True,0c6261,153in,False,True,gry,173cm,
2,True,#733820,166cm,True,True,blu,79215921,
3,True,#733820,165cm,True,False,oth,073015801,101.0
4,True,#623a2f,160cm,True,True,brn,122719649,246.0


In [273]:
# subset cms 
cm = df[df.hgt.str[-2:] == 'cm']
hgt = cm.hgt.str[:-2].astype('int')
passed.hgt.update((hgt >= 150) & (hgt <=193))

# subset cms 
inch = df[df.hgt.str[-2:] == 'in']
hgt = inch.hgt.str[:-2].astype('int')
passed.hgt.update((hgt >= 59) & (hgt <=76))

# check others
c1 = df.hgt.str[-2:] != 'cm'
c2 = df.hgt.str[-2:] != 'in'
others = df[(c1 & c2)]

# fail subset and map result
others = others.hgt == True
passed.hgt.update(others)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,#ceb3a1,True,True,True,grn,,
1,True,0c6261,False,False,True,gry,173cm,
2,True,#733820,True,True,True,blu,79215921,
3,True,#733820,True,True,False,oth,073015801,101.0
4,True,#623a2f,True,True,True,brn,122719649,246.0


In [274]:
# check haircolour
c1 = df.hcl.str[:1] == '#'
c2 = df.hcl.str[1:].str.match(pat='[a-f0-9]')
c3 = df.hcl.str[1:].str.len() == 6

# check conditions
passed.hcl = (c1 & c2 & c3)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,True,True,True,True,grn,,
1,True,False,False,False,True,gry,173cm,
2,True,True,True,True,True,blu,79215921,
3,True,True,True,True,False,oth,073015801,101.0
4,True,True,True,True,True,brn,122719649,246.0


In [275]:
# check eyecolor
colors = ['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth']
passed.ecl = df.ecl.isin(colors)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,True,True,True,True,True,,
1,True,False,False,False,True,True,173cm,
2,True,True,True,True,True,True,79215921,
3,True,True,True,True,False,True,073015801,101.0
4,True,True,True,True,True,True,122719649,246.0


In [276]:
# check pid
c1 = df.pid.str.match(pat='[0-9]')
c2 = df.pid.str.len() == 9

# map condition
passed.pid = (c1 & c2)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,True,True,True,True,True,False,
1,True,False,False,False,True,True,False,
2,True,True,True,True,True,True,False,
3,True,True,True,True,False,True,True,101.0
4,True,True,True,True,True,True,True,246.0


In [277]:
# pass all cid
cid = pd.Series(True, index=passed.index)
passed.cid.update(cid)

# show head
passed.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
0,True,True,True,True,True,True,False,True
1,True,False,False,False,True,True,False,True
2,True,True,True,True,True,True,False,True
3,True,True,True,True,False,True,True,True
4,True,True,True,True,True,True,True,True


In [278]:
# drop na's and check Trues
passed = passed.dropna()
passed = passed[passed.all(axis=1)]

# check valid passports
valid = df.loc[passed.index]

valid.head()

Unnamed: 0,iyr,hcl,hgt,eyr,byr,ecl,pid,cid
4,2013,#623a2f,160cm,2026,1948,brn,122719649,246.0
5,2017,#ceb3a1,154cm,2028,2000,hzl,229371724,
6,2013,#623a2f,177cm,2029,1980,amb,914628384,
7,2019,#efcc98,76in,2026,1945,amb,475316185,
10,2012,#b6652a,160cm,2029,1920,oth,223041037,212.0


In [281]:
# Calculate the sum of the valid passports:
total_number_of_valid_passports = sum(valid.all(axis=1))
print("Total valid passports: ", total_number_of_valid_passports)

Total valid passports:  167
