# 2 Components Data

In [None]:
import numpy as np
import time

from ml4pd import components
from ml4pd_utils.utils import prep_dist, evaluators, io, prep_df, plot_utils
from ml4pd.aspen_units import Distillation
from ml4pd.streams import MaterialStream

input_molecules = prep_dist.get_mol_labels()
components.set_components(input_molecules['name'].to_list())

raw_data = io.get_test_val_data(compositions=[2], dates=['220803', '220821'], unit="distillation", chemistries=['ketone', 'vfa'], ftype="aspen")
data = prep_df.add_name_columns(raw_data, input_molecules[['name', 'mol']])
data = prep_dist.add_flow_perc(data)
data = prep_df.rename_flowrate_columns(data)


## Benchmark Info

In [None]:
input_molecules.T

In [None]:
raw_data.describe().T

## Time Benchmark

In [None]:

times = []
for i in range(0, 10):
    start_time = time.time()
    feed_stream = MaterialStream(stream_type="feed")(
        vapor_fraction=data['vapor_fraction'].to_list(),
        pressure=data['feed_pressure'].to_list(),
        molecules=prep_df.get_name_columns(data).to_dict('list'),
        flowrates=prep_df.get_flowrate_columns(data).to_dict('list'),
    )
    
    dist_col = Distillation(
        no_stages = data['no_stages'].to_list(),
        feed_stage =  data['feed_stage'].to_list(),
        pressure = data['pressure_atm'].to_list(),
        reflux_ratio = data['ratio_reflux'].to_list(),
        boilup_ratio = data['ratio_boilup'].to_list(),
        verbose=False,
        fillna=False
    )

    bott_stream, dist_stream = dist_col(feed_stream)
    
    times.append(time.time() - start_time)

ordered_data = prep_dist.sort_targets_by_weight(data, feed_stream._mw_idx)

average = np.mean(times).round(2)
std = np.std(times).round(2)

print(f"{len(data)} data pts take {average} +/- {std} seconds to predict.")
        

## Classifier Benchmark

In [None]:
ok_idx = np.array(ordered_data[ordered_data['Status'] == 'OK'].index)
plot_utils.plot_confusion_matrix(ordered_data, dist_col.status)
            

## Flowrates Benchmark

In [None]:
prep_dist.plot_flow(
    all_true=ordered_data,
    y_pred=bott_stream.flow,
    data_slice={
        'Status': 'OK',
    }
)

## Duty Benchmark

In [None]:
plot_utils.plot(
    all_true=ordered_data,
    duty_condensor=dist_col.condensor_duty, 
    duty_reboiler=dist_col.reboiler_duty, 
    data_slice={"Status": "OK"}
)

## Temperature Benchmark

In [None]:
plot_utils.plot(
    all_true=ordered_data, 
    temp_bott=bott_stream.temperature, 
    temp_dist=dist_stream.temperature, 
    data_slice={'Status': 'OK'},
)
            

## Mean Absolute Error

In [None]:
prep_dist.evaluate_flow(
    all_true=ordered_data, 
    y_pred=bott_stream.flow,
    metric='mae',
    data_slice={"Status": "OK"}
)

In [None]:
evaluators.evaluate(
    all_true=ordered_data, 
    duty_condensor=dist_col.condensor_duty, 
    duty_reboiler=dist_col.reboiler_duty, 
    data_slice={'Status': 'OK'},
    metric="mae"
)

In [None]:
evaluators.evaluate(
    all_true=ordered_data, 
    temp_bott=bott_stream.temperature, 
    temp_dist=dist_stream.temperature, 
    data_slice={'Status': 'OK'},
    metric="mae"
)