In [1]:
import pandas as pd
import typing as t

In [26]:
shapes = pd.read_csv("shapes.csv", index_col=0)
banking = pd.read_csv("banking.csv", index_col=0)
#banking.columns = [c.strip() for c in banking.columns]

In [197]:
banking

Unnamed: 0_level_0,Country,BankType,Owner,Code,CorpC,FinLib,RegInt,BList,Wolfs
Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ABNAmro,NLD,Universal,Public,No,0.74,0.98,144,Yes,No
Barclays,GBR,Commercial,Public,Explicit,0.14,1.0,277,Yes,Yes
BNP Paribas,FRA,Universal,Public,Implicit,0.82,1.0,75,No,No
Carnegie,SWE,Investment,Public,No,0.71,0.95,83,No,No
Citigroup,USA,Commercial,Public,Explicit,0.0,1.0,426,Yes,Yes
Coutts \& Co,GBR,Private,Public,Implicit,0.14,1.0,277,Yes,No
CS,CHE,Universal,Public,Explicit,0.44,0.95,83,Yes,Yes
Deutsche Bank,GER,Universal,Public,Explicit,0.95,0.9,45,No,Yes
Goldman Sachs,USA,Investment,Public,Explicit,0.0,1.0,426,Yes,Yes
HSBC,GBR,Commercial,Public,Implicit,0.14,1.0,277,Yes,Yes


# Scaling

In [15]:
def scaling_nominal(df: pd.DataFrame, column: str):
    # get all the unique values for the column
    uniques = df[column].unique()

    # copy the dataframe without the column we are replacing
    df_scaled = df[[c for c in df.columns if c!= column]].copy()

    for unique in uniques:
        df_scaled[f"{column}={unique}"] = df[column] == unique

    return df_scaled

In [16]:
def scaling_ordinal(df: pd.DataFrame, column: str, order_list: t.List=None):
    # get all the unique values for the column
    uniques = df[column].unique()

    # copy the dataframe without the column we are replacing
    df_scaled = df[[c for c in df.columns if c!= column]].copy()

    for unique in uniques:
        if order_list:
            df_scaled[f"{column}<={unique}"] = df[column].apply(order_list.index) <= order_list.index(unique)
        else:
            df_scaled[f"{column}<={unique}"] = df[column] <= unique

    return df_scaled

In [17]:
def scaling_interordinal(df: pd.DataFrame, column: str, order_list=None):
    # get all the unique values for the column
    uniques = df[column].unique()

    # copy the dataframe without the column we are replacing
    df_scaled = df[[c for c in df.columns if c!= column]].copy()

    for unique in uniques:
        if order_list:
            df_scaled[f"{column}<={unique}"] = df[column].apply(order_list.index) <= order_list.index(unique)
            df_scaled[f"{column}>={unique}"] = df[column].apply(order_list.index) >= order_list.index(unique)
        else:
            df_scaled[f"{column}<={unique}"] = df[column] <= unique
            df_scaled[f"{column}>={unique}"] = df[column] >= unique

    return df_scaled

In [18]:
scaling_nominal(banking, "Country")

Unnamed: 0_level_0,BankType,Owner,Code,CorpC,FinLib,RegInt,BList,Wolfs,Country= NLD,Country= GBR,Country= FRA,Country= SWE,Country= USA,Country= CHE,Country= GER,Country= CAN,Country= ESP
Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABNAmro,Universal,Public,No,0.74,0.98,144,Yes,No,True,False,False,False,False,False,False,False,False
Barclays,Commercial,Public,Explicit,0.14,1.0,277,Yes,Yes,False,True,False,False,False,False,False,False,False
BNP Paribas,Universal,Public,Implicit,0.82,1.0,75,No,No,False,False,True,False,False,False,False,False,False
Carnegie,Investment,Public,No,0.71,0.95,83,No,No,False,False,False,True,False,False,False,False,False
Citigroup,Commercial,Public,Explicit,0.0,1.0,426,Yes,Yes,False,False,False,False,True,False,False,False,False
Coutts \& Co,Private,Public,Implicit,0.14,1.0,277,Yes,No,False,True,False,False,False,False,False,False,False
CS,Universal,Public,Explicit,0.44,0.95,83,Yes,Yes,False,False,False,False,False,True,False,False,False
Deutsche Bank,Universal,Public,Explicit,0.95,0.9,45,No,Yes,False,False,False,False,False,False,True,False,False
Goldman Sachs,Investment,Public,Explicit,0.0,1.0,426,Yes,Yes,False,False,False,False,True,False,False,False,False
HSBC,Commercial,Public,Implicit,0.14,1.0,277,Yes,Yes,False,True,False,False,False,False,False,False,False


In [27]:
banking.columns

Index(['Country', 'BankType', 'Owner', 'Code', 'CorpC', 'FinLib', 'RegInt',
       'BList', 'Wolfs'],
      dtype='object')

In [28]:
scaled_banking = banking.copy()
nominal_attributes = ["Country", "BankType", "Owner", "RegInt", "BList", "Wolfs"]
ordinal_attributes = dict()
interordinal_attributes = {"Code": ["No", "Implicit", "Explicit"], "CorpC": None, "FinLib": None}

for nominal_attribute in nominal_attributes:
    scaled_banking = scaling_nominal(scaled_banking, nominal_attribute)
for ordinal_attribute, order in ordinal_attributes.items():
    scaled_banking = scaling_ordinal(scaled_banking, ordinal_attribute, order)
for interordinal_attribute, order in interordinal_attributes.items():
    scaled_banking = scaling_interordinal(scaled_banking, interordinal_attribute, order)

scaled_banking

Unnamed: 0_level_0,Country=NLD,Country=GBR,Country=FRA,Country=SWE,Country=USA,Country=CHE,Country=GER,Country=CAN,Country=ESP,BankType=Universal,...,CorpC<=0.77,CorpC>=0.77,FinLib<=0.98,FinLib>=0.98,FinLib<=1.0,FinLib>=1.0,FinLib<=0.95,FinLib>=0.95,FinLib<=0.9,FinLib>=0.9
Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABNAmro,True,False,False,False,False,False,False,False,False,True,...,True,False,True,True,True,False,False,True,False,True
Barclays,False,True,False,False,False,False,False,False,False,False,...,True,False,False,True,True,True,False,True,False,True
BNP Paribas,False,False,True,False,False,False,False,False,False,True,...,False,True,False,True,True,True,False,True,False,True
Carnegie,False,False,False,True,False,False,False,False,False,False,...,True,False,True,False,True,False,True,True,False,True
Citigroup,False,False,False,False,True,False,False,False,False,False,...,True,False,False,True,True,True,False,True,False,True
Coutts \& Co,False,True,False,False,False,False,False,False,False,False,...,True,False,False,True,True,True,False,True,False,True
CS,False,False,False,False,False,True,False,False,False,True,...,True,False,True,False,True,False,True,True,False,True
Deutsche Bank,False,False,False,False,False,False,True,False,False,True,...,False,True,True,False,True,False,True,False,True,True
Goldman Sachs,False,False,False,False,True,False,False,False,False,False,...,True,False,False,True,True,True,False,True,False,True
HSBC,False,True,False,False,False,False,False,False,False,False,...,True,False,False,True,True,True,False,True,False,True


# Derivation and closure

In [199]:
shapes

Unnamed: 0_level_0,Has 3 Vertices,Has 4 vertices,Has a direct angle,Equilateral
Shape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Equilateral triangle,True,False,False,True
Rectangle triangle,True,False,True,False
Rectangle,False,True,True,False
Square,False,True,True,True


In [198]:
def up(ctx: pd.DataFrame, objs: t.Iterable) -> t.List:
    objs = ctx.loc[list(objs)] # get the rows of the objects
    attrs = ctx.columns[objs.all(axis=0)]  # get the column name (attribute) when all the values in the column are True
    return attrs.to_list()

def down(ctx: pd.DataFrame, attrs: t.Iterable) -> t.List:
    attrs = ctx[list(attrs)] # get the columns of the attributes
    objs = ctx.index[attrs.all(axis=1)]  # get the row index (object) when all the values in the row are True
    return objs.to_list()

def updown(ctx: pd.DataFrame, objs: t.Iterable) -> t.Set: return set(down(ctx, up(ctx, objs)))
def downup(ctx: pd.DataFrame, attrs: t.Iterable) -> t.Set: return set(up(ctx, down(ctx, attrs)))

[up(shapes, ["Rectangle", "Rectangle triangle"]),
down(shapes, []),
 updown(shapes, ["Rectangle"]),
updown(shapes, ["Rectangle"]) == updown(shapes, updown(shapes, ["Rectangle"])),
downup(shapes, {"Equilateral"})]

[['Has a direct angle'],
 ['Equilateral triangle', 'Rectangle triangle', 'Rectangle', 'Square'],
 {'Rectangle', 'Square'},
 True,
 {'Equilateral'}]

# AllClosure

In [104]:
def LexicallyLower(intent: t.Set, other: t.Set, attribute_order: t.List, i: int) -> bool:
    if attribute_order[i] not in other.difference(intent):
        return False

    return intent.intersection(attribute_order[:i]) == other.intersection(attribute_order[:i])
    
def NextClosure(ctx: pd.DataFrame, intent: t.Set, attribute_order: t.List) -> t.Set:
    intent = set(intent)
    for i, a in reversed(list(enumerate(attribute_order))): # For all i in M (in reversed order) do
        intent_b = intent.intersection(set(attribute_order[:i]))
        intent_b = downup(ctx, intent_b.union({a}))

        if LexicallyLower(intent, intent_b, attribute_order, i):
            return intent_b
    #return intent # Return A

In [189]:
def AllClosureYield(ctx: pd.DataFrame, attribute_order: t.List) -> t.Generator[t.Set, None, None]:
    intent = downup(ctx, set())
    bottom = set(up(ctx, set()))
    
    while intent != bottom:
        yield intent
        intent = NextClosure(ctx, intent, attribute_order)
    yield intent
def AllClosure(ctx: pd.DataFrame, attribute_order: t.List) -> t.List[t.Set]:
    return [intent for intent in AllClosureYield(ctx, attribute_order)]

In [190]:
AllClosure(shapes, shapes.columns.to_list())

[set(),
 {'Equilateral'},
 {'Has a direct angle'},
 {'Has 4 vertices', 'Has a direct angle'},
 {'Equilateral', 'Has 4 vertices', 'Has a direct angle'},
 {'Has 3 Vertices'},
 {'Equilateral', 'Has 3 Vertices'},
 {'Has 3 Vertices', 'Has a direct angle'},
 {'Equilateral', 'Has 3 Vertices', 'Has 4 vertices', 'Has a direct angle'}]

In [192]:
banking_intents = AllClosure(scaled_banking, scaled_banking.columns.to_list())
len(banking_intents)

550

In [195]:
import latviz_convert as l
l.df_to_latviz(scaled_banking, "banking.json")

In [31]:
shapes


Unnamed: 0_level_0,Has 3 Vertices,Has 4 vertices,Has a direct angle,Equilateral
Shape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Equilateral triangle,True,False,False,True
Rectangle triangle,True,False,True,False
Rectangle,False,True,True,False
Square,False,True,True,True
