# Altair Plots

# Preliminaries

In [87]:
%load_ext lab_black

## Imports

In [1]:
import altair as alt
import pandas as pd
import numpy as np

## Setup

# Data

Generating some fake data.

## Functions

In [2]:
def _get_feature_imprt(n_features, n_orders_of_magnitude=3):
    feature_imprt = ((10**n_orders_of_magnitude)*np.random.rand(n_features))**np.random.rand(n_features)
    feature_imprt = feature_imprt/np.sum(feature_imprt)
    return feature_imprt


def _get_feature_names(n_features):
    return ["Feature_{:02d}".format(i) for i in range(n_features)]

def _get_iterations(n_features, n_iterations=2):
    return np.random.randint(n_iterations+1, size=(n_features,))

In [3]:
def get_dataframe(n_features, n_iterations=2, n_orders_of_magnitude=3):
    df = pd.DataFrame()
    
    df['feature_name'] = _get_feature_names(n_features)
    df['feature_impt'] = _get_feature_imprt(n_features, n_orders_of_magnitude=n_orders_of_magnitude)
    df['iteration_id'] = _get_iterations(n_features, n_iterations=n_iterations)
    return df

## Work

Actually generating data

In [4]:
n_features = 15

df = get_dataframe(n_features, n_iterations=2, n_orders_of_magnitude=3)
df.head()

Unnamed: 0,feature_name,feature_impt,iteration_id
0,Feature_00,0.453986,0
1,Feature_01,0.03002,0
2,Feature_02,0.000985,1
3,Feature_03,0.004282,0
4,Feature_04,0.026872,2


# Histogram

In [5]:
def _alias_title(title, aliases):
    for text, alias in aliases:
        if alias is not None:
            title = title.replace(text, alias)
    return title

In [6]:
def feature_importances_plot(
    df,
    x_quantity="iteration_id",
    y_quantity="feature_impt",
    c_quantity="feature_name",
    labelFontSize=20,
    axis_titleFontSize=30,
    legend_titleFontSize=20,
    fig_titleFontSize=35,
    font="times new roman",
    aliases=None,
):

    f = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(
                "{}:N".format(x_quantity), title=_alias_title(x_quantity, aliases),
            ),
            y=alt.Y(
                "sum({})".format(y_quantity), title=_alias_title(y_quantity, aliases)
            ),
            color=alt.Color(
                "{}:N".format(c_quantity), title=_alias_title(c_quantity, aliases)
            ),
        )
    )

    # Configuration
    f = (
        f.configure_axis(
            titleFont=font,
            labelFont=font,
            labelFontSize=labelFontSize,
            titleFontSize=axis_titleFontSize,
        )
        .configure_title(font=font, fontSize=fig_titleFontSize)
        .configure_legend(
            titleFont=font,
            labelFont=font,
            labelFontSize=labelFontSize,
            titleFontSize=legend_titleFontSize,
        )
    )
    f = f.properties(title="Selected Features", width=400, height=600)
    return f


feature_importances_plot(
    df,
    aliases=[
        ("iteration_id", "Iteration"),
        ("feature_impt", "Feature Importance"),
        ("feature_name", "Feature Name"),
    ],
)

In [11]:
f = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        x=alt.X("iteration_id:N", title="Iteration"),
        y=alt.Y("sum(feature_impt)", title="Feature Importance"),
        color=alt.Color("feature_name:N", title="Feature Name"),
    )
)

f = f.configure_title(font="computer modern", fontSize=20)
f.properties(title="Selected Features", width=400, height=600)

# Accuracy Plot