In [13]:
from datafusiontools._core.utils import CreateInputsML
from datafusiontools._core.data_input import Data, Variable
from datafusiontools.machine_learning.self_organizing_map import SOM
import pandas as pd

In [14]:
# create the feature list
create_features = CreateInputsML()

# read csv, define column names that should be _excluded_ (!)
df = pd.read_csv('../data/NN_data.csv', sep=';')#, index_col=0)
exclude_var_names = ['Output']

# prepare datafusiontools feature object
vars = []
for col in df.columns:
    vars.append(Variable(label=col, value=df[col].values))
data = Data(variables=vars, location=None)
in_var_names = df.columns.difference(exclude_var_names).to_list()
create_features.add_features(data, variable_names=in_var_names, use_independent_variable=False)

# extract data from feature object
training_data = create_features.get_all_features(flatten=False)
component_names = create_features.get_feature_names()

In [15]:
# setup SOM object
som = SOM(classification=False, mapsize=[50,30])

In [16]:
som.train(data=training_data, names=component_names)

AttributeError: module 'sompy' has no attribute 'SOMFactory'

In [None]:
# som.codebook.to_csv('tests/test_output/test_SOM/codebook.csv', index=False)
# som.codebook = pd.read_csv('tests/test_output/test_SOM/codebook.csv')

In [None]:
#som.plot_umatrix_components()

In [None]:
import altair as alt

In [None]:
if alt.__version__ > '4.2':
    print('true')

In [None]:
alt.__version__

In [None]:
df.head()

In [None]:
df_target_var = df[exclude_var_names]
df_target = df_target.copy()
df_target.loc[:, 'bmu'] = som.bmu.astype(int)
#df_target.head()

In [None]:
if som.mapsize[0] <= 25:
    step_size = 12
elif som.mapsize[0] < 50:
    step_size = 9
else:
    step_size = 6

data_fields = som.codebook.columns.to_list()
data_fields.remove("X")
data_fields.remove("Y")
data_fields.remove("XY")
# data_fields.remove("U_matrix")

input_dropdown = alt.binding_select(options=data_fields)
if alt.__version__ > "4.2":
    selection = alt.selection_single(
        fields=["parameter"],
        bind=input_dropdown,
        name="selected",
        value=data_fields[0],  # valid after altair 5.0 is released
        # init={"parameter": data_fields[0]},  # depr in altair 5.0
    )
else:
    selection = alt.selection_single(
        fields=["parameter"],
        bind=input_dropdown,
        name="selected",
        # value=data_fields[0], # valid after altair 5.0 is released
        init={"parameter": data_fields[0]},  # depr in altair 5.0
    )

scale_color = alt.Scale(
    range=[
        "#3D3D3D",
        "#F0F8FF",
        "cornflowerblue",
        "mediumseagreen",
        "#FFEE00",
        "darkorange",
        "firebrick",
    ],
    zero=False,
    nice=False,
)

brush = alt.selection(type="interval", name="BRUSH")

# prepare base_matrix


In [None]:
components = alt.Chart(som.codebook).mark_rect().encode(
    x=alt.X(
        "X:N", scale=alt.Scale(paddingInner=0.02), axis=None, sort=None
    ),
    y=alt.Y(
        "Y:N", scale=alt.Scale(paddingInner=0.02), axis=None, sort=None
    ),
    opacity=alt.condition(brush, alt.value(1), alt.value(0.2)),
    # cannot yet use expr for title. see https://github.com/vega/vega-lite/issues/7264
    color=alt.Color(
        "value:Q",
        scale=scale_color,
        legend=alt.Legend(
            title="selected_parameter", labelLimit=300, orient="top"
        ),
    ),
).transform_fold(
    data_fields, as_=["parameter", "value"]
).add_selection(
    selection
).add_selection(
    brush
).transform_filter(
    selection
).properties(width={"step": step_size}, height={"step": step_size})

In [None]:
dynamic = alt.Chart(df_target_var).mark_bar().encode(
    x='Output',
    y='count()',
    color='Output'
).transform_lookup(
    lookup = 'bmu',
    from_ = alt.LookupData(som.codebook, 'XY'),
    as_ = 'lu'
).transform_calculate(
    "X", "datum.lu.X"
).transform_calculate(
    "Y", "datum.lu.Y"
).transform_filter(
    brush 
)

# combine charts in layer and horizontal concat
comb = alt.hconcat(components, dynamic)
comb

In [None]:
# prepare dots on basemap
map_dynamic = alt.Chart(df_target_var).mark_circle(
    stroke='white',
    strokeWidth=0.2,
    size=12
).encode(
    longitude='lon:Q',
    latitude='lat:Q',    
    color=alt.Color('value:Q', sort= "ascending", scale=scale_color),
    size=alt.Size('value:Q', scale=alt.Scale(range=[30, 160]), legend=None),
).transform_lookup(
    lookup = 'som_key',
    from_ = alt.LookupData(matrix_csv, 'XY'),
    as_ = 'geo'
).transform_calculate(
    "X", "datum.geo.X"
).transform_calculate(
    "Y", "datum.geo.Y"
).transform_calculate(
    "value", "datum.geo[select_parameter]"
).transform_filter(
    brush 
)

# combine charts in layer and horizontal concat
comb = alt.hconcat(
    components, 
    alt.layer(
        map_base, 
        map_dynamic, 
        map_rivers
    ).properties(width=500, height=450, view={'fill':'#3D3D3D'})
).properties(title=alt.TitleParams(text=
    '''A self-organizing map of European rivers using variables
representing the geographical characteristics of their catchments''', 
    anchor='middle')
)
comb