In [1]:
import warnings
warnings.filterwarnings('ignore')

# Resolve path when used in a usecase project
import sys
from pathlib import Path
sys.path.insert(0, str(Path("../../../").resolve()))

import plotly.io as pio
pio.renderers.default = "notebook_connected"

# Guide to the charts in `reporting.charts`

This notebook explains(with examples) the functions included in the various submodules of the charts module within the reporting package.
1. `feature_overview` provides a comprehensive visualisation of the chosen feature
2. `primitive` contains auxiliary functions like text formatting, text wrapping, plotting correlation, etc 
3. `batchplot` contains functions to visualise batch-level data. This is described in detail in [this tutorial](todo)

# `reporting.charts.feature_overview`

## `plot_feature_overview`

A mixed plot used for exploring the properties of a feature, which include box plot, histogram chart, scatter plot against target variable if specified, time series plot and a table of descriptive statistics.

* Box plot and histogram chart of the feature give us an overview of its **distribution and key statistics** visually.


* Descriptive Statistics table supplements the charts with key statistics values.  


* Scatter plot of the feature against target variable helps us find **the relationship that can initially reveal predictive power**.


* Time series plot of the feature allows us to understand **how the feature has evolved over time**, the kind of insight that is difficult to deduce from box and histogram charts.

### Plotting the feature overview

Plots the figures stated above

In [2]:
from reporting.charts.feature_overview import plot_feature_overview
from reporting.datasets import get_throughput_data

plot_feature_overview(
    data=get_throughput_data(),
    feature="inp_avg_hardness",
    timestamp="status_time",
    target="total_cu",
)

### Unknown target

If target is not decided yet, user can simply omit this argument

In [3]:
from reporting.charts.feature_overview import plot_feature_overview
from reporting.datasets import get_throughput_data


plot_feature_overview(
    data=get_throughput_data(),
    feature="inp_avg_hardness",
    timestamp="status_time",
)

### Adding tag range to overview

To add feature ranges to the visualisation, provide `tag_range` argument. When a single tag is passed as `feature`, then `tag_range` can be a simple tuple with lower and upper limits: `(min, max)`.

In [4]:
from reporting.charts.feature_overview import plot_feature_overview
from reporting.datasets import get_throughput_data


plot_feature_overview(
    data=get_throughput_data(),
    feature="outp_quantity",
    tag_range=(150, 300),
    timestamp="status_time",
    target="total_cu",
)

# `reporting.charts.primitives`

## `plot_correlation`

Use `mask` argument for showing desired part of corr. matrix (can be `"lower"`/`"upper"`/`None`)

In [5]:
from reporting.charts.primitives import plot_correlation
from reporting.datasets import get_throughput_data

master_table = get_throughput_data()
plot_correlation(master_table, mask="lower", height=500)

### Filtering and sorting columns/rows

If you want to filter the resulting corr matrix by rows/columns/both use corresponding `rows`/`columns`/`rows` and `columns` arguments.

That might be useful for plotting target-feature correlations.

Additionally one might want to sort resulting data; this can be achieved by providing `sort_by`.

In [6]:
from reporting.charts.primitives import plot_correlation
from reporting.datasets import get_throughput_data

master_table = get_throughput_data()
plot_correlation(
    master_table,
    columns=["outp_quantity"],  # selects one column, target in the example
    sort_by="outp_quantity",  # orders by one column, target in the example
    width=200,
)

## `plot_focused_pairplot`

This function produces several scatter plots of the `target_column` versus a set of `feature_columns`. 

If no `feature_columns` are provided, all variables of `data` are ploted against the `target_column`

It is possible to choose the number of plots per row by changin `grid_width`

In [7]:
from reporting.charts.primitives import plot_focused_pairplot
from reporting.datasets import get_throughput_data


plot_focused_pairplot(
    data=get_throughput_data(),
    target_column="cu_content",  # y axis of all scatter plots
    feature_columns=[  # features to plot againts target_column
        "inp_quantity",
        "cu_content",
        "outp_quantity",
        "inp_avg_hardness",
        "rec_ph",
        "rec_percent",
    ],
    n_columns=3,  # number of plots per row
)

## `plot_string`

This function helps to render text blocks.

Use standard pythonic EOL characters to start new line.

In [8]:
from reporting.charts.primitives import plot_string

plot_string(
    text=(
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n"
        "Donec et consectetur lacus. Ut vel ante.\n\n"
        "Cras nisi felis, ultrices nec facilisis eget."
    ),
    title="Lorem ipsum dolor sit amet",
)

### Changing Font Size

To update font size use `text_size` and `title_size` arguments

In [9]:
plot_string(
    text=(
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
        "Etiam nec efficitur ex. Donec laoreet fringilla sagittis. "
        "Sed hendrerit bibendum aliquet. Etiam libero lectus, "
        "egestas non consequat ut, accumsan bibendum nunc. "
        "Vivamus ut quam bibendum, iaculis lorem ornare, "
        "laoreet ligula. Aenean eleifend dui quis leo pretium, "
        "et tempor sem convallis. "
        "Maecenas sed lorem in arcu facilisis placerat. "
        "Nunc in mauris faucibus, iaculis augue vitae, vulputate ipsum.\n\n"
        "Pellentesque euismod sodales arcu a vestibulum. "
        "Phasellus nec augue commodo, eleifend odio sit amet, "
        "sollicitudin augue. "
        "Vestibulum ante ipsum primis in faucibus orci luctus"
        " et ultrices posuere cubilia curae; Maecenas posuere elementum pulvinar. "
        "Donec mi metus, molestie eget nisl sed, faucibus dictum arcu. "
        "Mauris faucibus, odio at blandit sodales, tortor eros laoreet lorem, "
        "eu laoreet metus velit ut quam. Suspendisse potenti. "
        "Mauris tristique, velit quis ultricies tempus, mauris libero viverra ligula, "
        "quis tempus arcu elit eget diam."
    ),
    title=(
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
        "Donec imperdiet tempus augue, tincidunt feugiat felis iaculis quis. "
        "Donec quis semper."
    ),
    title_size=30,
    text_size=10,
)

### Text Wrapping

Text wrapping is usefull when
* text lacks EOL characters
* text has very long sentences

Use `max_characters_per_text_line` to specify max line length. All sentences will be wrapped to fit in this length. If you don't want to apply any wrapping, pass `None`.

In [10]:
plot_string(
    text=(
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
        "Etiam nec efficitur ex. Donec laoreet fringilla sagittis. "
        "Sed hendrerit bibendum aliquet. Etiam libero lectus, "
        "egestas non consequat ut, accumsan bibendum nunc. "
        "Vivamus ut quam bibendum, iaculis lorem ornare, "
        "laoreet ligula. Aenean eleifend dui quis leo pretium, "
        "et tempor sem convallis. "
        "Maecenas sed lorem in arcu facilisis placerat. "
        "Nunc in mauris faucibus, iaculis augue vitae, vulputate ipsum.\n"
        "Pellentesque euismod sodales arcu a vestibulum. "
        "Phasellus nec augue commodo, eleifend odio sit amet, "
        "sollicitudin augue. "
        "Vestibulum ante ipsum primis in faucibus orci luctus"
        " et ultrices posuere cubilia curae; Maecenas posuere elementum pulvinar. "
        "Donec mi metus, molestie eget nisl sed, faucibus dictum arcu. "
        "Mauris faucibus, odio at blandit sodales, tortor eros laoreet lorem, "
        "eu laoreet metus velit ut quam. Suspendisse potenti. "
        "Mauris tristique, velit quis ultricies tempus, mauris libero viverra ligula, "
        "quis tempus arcu elit eget diam."
    ),
    title=(
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
        "Donec imperdiet tempus augue, tincidunt feugiat felis iaculis quis. "
        "Donec quis semper."
    ),
    max_characters_per_text_line=75,
)