In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("titanic.csv")
print(f"{df.shape}")
print(df.iloc[0, :].to_string())

(891, 15)
survived                 0
pclass                   3
sex                   male
age                   22.0
sibsp                    1
parch                    0
fare                  7.25
embarked                 S
class                Third
who                    man
adult_male            True
deck                   NaN
embark_town    Southampton
alive                   no
alone                False


### funcs

In [139]:
class FlagSchema(object):
    def __init__(self, flag_map:dict[str, int]):
        self.flag_map = flag_map
        self._bitfield_length = max(1, sum(flag_map.values()).bit_length())

    def decode(self, bitfield:int)->list[str]:
        return [name for name, value in self.flag_map.items() if bitfield & value]

    def apply_to_df(self, df:pd.DataFrame)->pd.Series:
        flag_srs = pd.Series(index=df.index, dtype=pd.Int64Dtype()).fillna(0)
        for k, v in self.flag_map.items():
            flag_srs += df[k].astype(bool) * v
        return flag_srs

In [39]:
class FlagSet:
    def __init__(self, flag_map: dict):
        self._flag_map = flag_map
        self._bitfield = 0
        self._bitfield_length = max(1, self._bitfield.bit_length())

    @staticmethod
    def decode_flags(flag_map, bitfield):
        return [name for name, mask in flag_map.items() if bitfield & mask]

    @staticmethod
    def is_flag_set(flag_map, flag_name):
        mask = flag_map.get(flag_name, 0)
        return lambda bitfield: bool(bitfield & mask)

    @classmethod
    def from_value(cls, flag_map, bitfield):
        obj = cls(flag_map)
        obj._bitfield = bitfield
        return obj

    def __repr__(self):
        active = self.decode()
        return f"<FlagSet {self.binary()} → {active}>"

    def apply(self, *flag_names):
        for name in flag_names:
            self._bitfield |= self._flag_map[name]
        return self

    def clear(self, *flag_names):
        for name in flag_names:
            self._bitfield &= ~self._flag_map[name]
        return self

    def toggle(self, *flag_names):
        for name in flag_names:
            self._bitfield ^= self._flag_map[name]
        return self

    def is_set(self, flag_name):
        return bool(self._bitfield & self._flag_map[flag_name])

    def decode(self):
        return [name for name, mask in self._flag_map.items() if self._bitfield & mask]

    def mask(self, flag_name):
        return self._flag_map.get(flag_name, 0)

    def value(self):
        return self._bitfield

    def binary(self):
        return f"{self._bitfield:0{self._bitfield_length}b}"

    def describe(self):
        data = []
        for name, mask in sorted(self._flag_map.items()):
            data.append({
                'Flag Name': name,
                'Mask': f"{mask:0{self._bitfield_length}b}",
                'Value': mask,
                'Active': self.is_set(name)
            })
        return pd.DataFrame(data)

### df

In [140]:
flag_map = {
    'has_survived': 0b0001,
    'is_alone': 0b0010,
    'is_male': 0b0100,
    'age_below_18': 0b1000,
}
fs = FlagSchema(flag_map=flag_map)
fs

<__main__.FlagSchema at 0x270b3542cf0>

In [141]:
df['has_survived'] = df['survived'].astype(bool)
df['is_alone'] = df['alone'].astype(bool)
df['is_male'] = df['sex'].map({'male': 1, 'female': 0})
df['age_below_18'] = df['age'] < 18

df['flags'] = fs.apply_to_df(df=df)
df['flags']

0      4
1      1
2      3
3      1
4      6
      ..
886    6
887    3
888    0
889    7
890    6
Name: flags, Length: 891, dtype: Int64

In [150]:
(df['flags'] & (fs.flag_map['has_survived'] | fs.flag_map['is_alone'])) == (fs.flag_map['has_survived'] | fs.flag_map['is_alone'])

0      False
1      False
2       True
3      False
4      False
       ...  
886    False
887     True
888    False
889     True
890    False
Name: flags, Length: 891, dtype: boolean

In [134]:
df['flags'].apply(lambda x: fs.decode(x))

0                              [is_male]
1                         [has_survived]
2               [has_survived, is_alone]
3                         [has_survived]
4                    [is_alone, is_male]
                     ...                
886                  [is_alone, is_male]
887             [has_survived, is_alone]
888                                   []
889    [has_survived, is_alone, is_male]
890                  [is_alone, is_male]
Name: flags, Length: 891, dtype: object

### bitand flag basics

In [14]:
# define bit masks
has_survived = 0b0001
is_alone = 0b0010
is_male = 0b0100
age_below_18 = 0b1000

In [7]:
flag = 0b0110

In [8]:
print(f"check: {flag & has_survived = }")
print(f"check: {flag & is_alone = }")
print(f"check: {flag & is_male = }")
print(f"check: {flag & age_below_18 = }")

check: flag & has_survived = 0
check: flag & is_alone = 2
check: flag & is_male = 4
check: flag & age_below_18 = 0


In [17]:
# set
bin(flag | has_survived)

'0b111'

In [15]:
bin(flag & ~0b0010) # clear

'0b100'

In [16]:
bin(flag ^ 0b0100) # flip

'0b10'

### flag set

In [51]:
flags = {
    'has_survived': 0b0001,
    'is_alone': 0b0010,
    'is_male': 0b0100,
    'age_below_18': 0b1000,
}
fs = FlagSet(flags)
fs.describe()

Unnamed: 0,Flag Name,Mask,Value,Active
0,age_below_18,1000,8,False
1,has_survived,1,1,False
2,is_alone,10,2,False
3,is_male,100,4,False


In [33]:
fs.apply('has_survived', 'is_male')

<FlagSet 101 → ['has_survived', 'is_male']>

In [32]:
FlagSet.from_value(flags, 0b0101)

<FlagSet 101 → ['has_survived', 'is_male']>

In [35]:
FlagSet.from_value(flags, 0b0101).decode()

['has_survived', 'is_male']

In [38]:
FlagSet.from_value(flags, 0b0101).toggle('age_below_18')

<FlagSet 1101 → ['has_survived', 'is_male', 'age_below_18']>

In [37]:
FlagSet.from_value(flags, 0b0101).clear('has_survived')

<FlagSet 100 → ['is_male']>