In [60]:
import pandas as pd

In [61]:
df = pd.read_csv("titanic.csv")
print(f"{df.shape}")
print(df.iloc[0, :].to_string())

(891, 15)
survived                 0
pclass                   3
sex                   male
age                   22.0
sibsp                    1
parch                    0
fare                  7.25
embarked                 S
class                Third
who                    man
adult_male            True
deck                   NaN
embark_town    Southampton
alive                   no
alone                False


### funcs

In [3]:
class FlagSchema(object):
    def __init__(self, flag_map:dict[str, int]):
        self.flag_map = flag_map
        self._bitfield_length = max(1, sum(flag_map.values()).bit_length())

    def decode(self, bitfield:int)->list[str]:
        return [name for name, value in self.flag_map.items() if bitfield & value]

    def apply_to_df(self, df:pd.DataFrame)->pd.Series:
        flag_srs = pd.Series(index=df.index, dtype=pd.Int64Dtype()).fillna(0)
        for k, v in self.flag_map.items():
            flag_srs += df[k].astype(bool) * v
        return flag_srs

In [4]:
class FlagSet:
    def __init__(self, flag_map: dict):
        self._flag_map = flag_map
        self._bitfield = 0
        self._bitfield_length = max(1, self._bitfield.bit_length())

    @staticmethod
    def decode_flags(flag_map, bitfield):
        return [name for name, mask in flag_map.items() if bitfield & mask]

    @staticmethod
    def is_flag_set(flag_map, flag_name):
        mask = flag_map.get(flag_name, 0)
        return lambda bitfield: bool(bitfield & mask)

    @classmethod
    def from_value(cls, flag_map, bitfield):
        obj = cls(flag_map)
        obj._bitfield = bitfield
        return obj

    def __repr__(self):
        active = self.decode()
        return f"<FlagSet {self.binary()} → {active}>"

    def apply(self, *flag_names):
        for name in flag_names:
            self._bitfield |= self._flag_map[name]
        return self

    def clear(self, *flag_names):
        for name in flag_names:
            self._bitfield &= ~self._flag_map[name]
        return self

    def toggle(self, *flag_names):
        for name in flag_names:
            self._bitfield ^= self._flag_map[name]
        return self

    def is_set(self, flag_name):
        return bool(self._bitfield & self._flag_map[flag_name])

    def decode(self):
        return [name for name, mask in self._flag_map.items() if self._bitfield & mask]

    def mask(self, flag_name):
        return self._flag_map.get(flag_name, 0)

    def value(self):
        return self._bitfield

    def binary(self):
        return f"{self._bitfield:0{self._bitfield_length}b}"

    def describe(self):
        data = []
        for name, mask in sorted(self._flag_map.items()):
            data.append({
                'Flag Name': name,
                'Mask': f"{mask:0{self._bitfield_length}b}",
                'Value': mask,
                'Active': self.is_set(name)
            })
        return pd.DataFrame(data)

### binary flag

In [9]:
# define bit masks
has_survived = 0b0001
is_alone = 0b0010
is_male = 0b0100
age_below_18 = 0b1000

In [10]:
flag = 0b0110

In [11]:
print(f"check: {flag & has_survived = }")
print(f"check: {flag & is_alone = }")
print(f"check: {flag & is_male = }")
print(f"check: {flag & age_below_18 = }")

check: flag & has_survived = 0
check: flag & is_alone = 2
check: flag & is_male = 4
check: flag & age_below_18 = 0


In [12]:
# set
bin(flag | has_survived)

'0b111'

In [13]:
bin(flag & ~0b0010) # clear

'0b100'

In [14]:
bin(flag ^ 0b0100) # flip

'0b10'

### binary flag - flag set

In [15]:
flags = {
    'has_survived': 0b0001,
    'is_alone': 0b0010,
    'is_male': 0b0100,
    'age_below_18': 0b1000,
}
fs = FlagSet(flags)
fs.describe()

Unnamed: 0,Flag Name,Mask,Value,Active
0,age_below_18,1000,8,False
1,has_survived,1,1,False
2,is_alone,10,2,False
3,is_male,100,4,False


In [16]:
fs.apply('has_survived', 'is_male')

<FlagSet 101 → ['has_survived', 'is_male']>

In [17]:
FlagSet.from_value(flags, 0b0101)

<FlagSet 101 → ['has_survived', 'is_male']>

In [18]:
FlagSet.from_value(flags, 0b0101).decode()

['has_survived', 'is_male']

In [19]:
FlagSet.from_value(flags, 0b0101).toggle('age_below_18')

<FlagSet 1101 → ['has_survived', 'is_male', 'age_below_18']>

In [20]:
FlagSet.from_value(flags, 0b0101).clear('has_survived')

<FlagSet 100 → ['is_male']>

### df

In [63]:
flag_map = {
    'has_survived': 0b0001,
    'is_alone': 0b0010,
    'is_male': 0b0100,
    'age_below_18': 0b1000,
}

In [64]:
df['flags'] = 0

df['flags'] = df['flags'] + df['survived'].mul(flag_map['has_survived'])
df['flags'] = df['flags'] + df['alone'].astype(bool).mul(flag_map['is_alone'])
df['flags'] = df['flags'] + df['sex'].map({'male': 1, 'female': 0}).mul(flag_map['is_male'])
df['flags'] = df['flags'] + df['age'].lt(18).mul(flag_map['age_below_18'])

df['flags'].value_counts().sort_index()

flags
0      39
1     105
2      25
3      90
4      97
5      23
6     336
7      63
8      15
9      29
10      2
11      9
12     24
13     22
14     11
15      1
Name: count, dtype: int64

In [94]:
df['flags_bin'] = df['flags'].apply(lambda x: f"{x:08b}")
df['flags_decoded'] = df['flags'].apply(lambda bitfield: [name for name, value in flag_map.items() if bitfield & value])
sdf = (
    df
    .assign(flags_decoded_str = lambda x: x['flags_decoded'].astype(str))
    .loc[:,['flags','flags_bin','flags_decoded_str']]
    .value_counts()
    .sort_index()
    .reset_index()
)
sdf

Unnamed: 0,flags,flags_bin,flags_decoded_str,count
0,0,0,[],39
1,1,1,['has_survived'],105
2,2,10,['is_alone'],25
3,3,11,"['has_survived', 'is_alone']",90
4,4,100,['is_male'],97
5,5,101,"['has_survived', 'is_male']",23
6,6,110,"['is_alone', 'is_male']",336
7,7,111,"['has_survived', 'is_alone', 'is_male']",63
8,8,1000,['age_below_18'],15
9,9,1001,"['has_survived', 'age_below_18']",29


In [100]:
sdf['flags'] & 0b1000

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     8
9     8
10    8
11    8
12    8
13    8
14    8
15    8
Name: flags, dtype: int64

In [101]:
df['is_survived'] = (df['flags'] & flag_map['has_survived']).astype(pd.Int64Dtype())
df['is_survived'].value_counts()

is_survived
0    549
1    342
Name: count, dtype: Int64

In [102]:
df.loc[lambda x:(x['flags'] & flag_map['has_survived'])>0,:].shape

(342, 19)

### df - flag scheme

In [None]:
flag_map = {
    'has_survived': 0b0001,
    'is_alone': 0b0010,
    'is_male': 0b0100,
    'age_below_18': 0b1000,
}
fs = FlagSchema(flag_map=flag_map)
fs

<__main__.FlagSchema at 0x1ad7c12a510>

In [None]:
df['has_survived'] = df['survived'].astype(bool)
df['is_alone'] = df['alone'].astype(bool)
df['is_male'] = df['sex'].map({'male': 1, 'female': 0})
df['age_below_18'] = df['age'] < 18

df['flags'] = fs.apply_to_df(df=df)
df['flags']

0      4
1      1
2      3
3      1
4      6
      ..
886    6
887    3
888    0
889    7
890    6
Name: flags, Length: 891, dtype: Int64

In [None]:
(df['flags'] & (fs.flag_map['has_survived'] | fs.flag_map['is_alone'])) == (fs.flag_map['has_survived'] | fs.flag_map['is_alone'])

0      False
1      False
2       True
3      False
4      False
       ...  
886    False
887     True
888    False
889     True
890    False
Name: flags, Length: 891, dtype: boolean

In [None]:
df['flags'].apply(lambda x: fs.decode(x))

0                              [is_male]
1                         [has_survived]
2               [has_survived, is_alone]
3                         [has_survived]
4                    [is_alone, is_male]
                     ...                
886                  [is_alone, is_male]
887             [has_survived, is_alone]
888                                   []
889    [has_survived, is_alone, is_male]
890                  [is_alone, is_male]
Name: flags, Length: 891, dtype: object

### flagging funcs and actions

In [45]:
def action_1(df, res_col, flag_col):
    mask = lambda x: (x['age'] > 50) & (x['sex'] == 'female')
    df.loc[mask, res_col] = 1
    df[flag_col] = 0
    df.loc[mask, flag_col] = 1

def action_2(df, res_col, flag_col):
    mask = lambda x: (x['age'] < 18) & (x['sex'] == 'male')
    df.loc[mask, res_col] = 0
    df[flag_col] = 0
    df.loc[mask, flag_col] = 1

In [None]:
actions_sequence = [action_1, action_2]
res_col = 'survived_2'
flag_col_suffix = 'flag_'

# Apply actions
df[res_col] = df['survived']
for i, action in enumerate(actions_sequence):
    action(df, res_col=res_col, flag_col=flag_col_suffix + str(i))

df[['survived', 'survived_2', 'flag_0', 'flag_1']].value_counts().reset_index()

Unnamed: 0,survived,survived_2,flag_0,flag_1,count
0,0,0,0,0,513
1,1,1,0,0,303
2,0,0,0,1,35
3,1,0,0,1,23
4,1,1,1,0,16
5,0,1,1,0,1


### archive

In [None]:
# import functools

# def decorate_action(func):
#     @functools.wraps(func)
#     def wrapper(df, *args, **kwargs):
#         if len(args) == 1:
#             res_col = args[0]
#             flag_col = kwargs['flag_col']
#         elif len(args) == 2:
#             res_col = args[0]
#             flag_col = args[1]
#         else:
#             res_col = kwargs['res_col']
#             flag_col = kwargs['flag_col']

#         logger.debug(f"calling {func.__name__}")
#         res, flag = func(df, *args, **kwargs)

#         # apply back
#         df[res_col] = res
#         df[flag_col] = flag

#         # return pd.DataFrame({
#         #     res_col: res,
#         #     flag_col: flag
#         # }, index=df.index)
#     return wrapper


In [None]:
# # action take df, then make new df with 2 cols, 1 res, 1 flag
# @decorate_action
# def action_1(df, res_col, flag_col):
#     mask = lambda x: (x['age'] > 50) & (x['sex'] == 'female')
#     res = df[res_col].copy()
#     res.loc[mask(df)] = 1
#     flag = mask(df).astype(int)
#     return res, flag

# @decorate_action
# def action_2(df, res_col, flag_col):
#     mask = lambda x: (x['age'] < 18) & (x['sex'] == 'male')
#     res = df[res_col].copy()
#     res.loc[mask(df)] = 0
#     flag = mask(df).astype(int)
#     return res, flag