In [None]:
from utils import utils_models, utils_gn, utils_dgrd, utils_sig
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import importlib
import numpy as np
import pandas as pd
import seaborn as sns
importlib.reload(utils_models)
importlib.reload(utils_gn)
importlib.reload(utils_dgrd)
importlib.reload(utils_sig)

In [None]:
def feature_importance_analysis(model, model_type, feature_names, target_list):
    """
    Function that calculates feature importance for fitted model.

    Args:
    ----
         model:         model object
         model_type:    model 'cycle' or 'capacity-ir'
         feature_names: name of the features
         target_list:   list of targets

    Returns:
    -------
            data frame of feature importance.
    """

    # Create a lambda function to scale importance values to the interval [0, 1]
    scaler = lambda x: (x-x.min()) / (x.max()-x.min())

    # Get the importance list
    feature_importance = [scaler(model.regressor_.estimators_[i].feature_importances_) for i in range(len(target_list))]
    # Cast feature importance list to a 2D numpy array
    feature_importance = np.array(feature_importance)

    return pd.DataFrame(data=feature_importance.T, columns=target_list, index=feature_names)

In [None]:
# Load train data to get feature names 
df = utils_sig.ccv_signature_features(data_dict=utils_gn.read_data('train_1238.pkl'), n=100, multi_cycle=False)

In [None]:
# For the model that predicts knees, elbows and EOL
cycle_at_model = utils_gn.read_data('sig_cycles.pkl', folder='models')
dframe = feature_importance_analysis(model=cycle_at_model, model_type='cycle', feature_names=df.columns, target_list=['k-o', 'k-p', 'e-o', 'e-p', 'EOL'])

In [None]:
fig = plt.figure(figsize=(18, 3))
df1 = dframe.copy()
df_index = np.array(df1.index)

for i, item in enumerate(df1.columns):
    

    ax = fig.add_subplot(1, 5, i+1)
    ax.text(0.7, 0.95, item, transform=ax.transAxes, fontsize=16, fontweight='bold', va='top')
    
    this_importance = df1[item].values
    sort_index = np.argsort(this_importance)

    this_importance = this_importance[sort_index]
    this_index = df_index[sort_index]

    ax.bar(this_index[::-1][:10], this_importance[::-1][:10], color='brown', ec='black', alpha=0.78)
    ax.tick_params(axis='x', rotation=90, labelsize=14)
    ax.tick_params(axis='y', labelsize=14)

    if i != 0:
        ax.set_yticklabels([])
    
    if i == 0:
        ax.set_ylabel('Feature importance', fontsize=16)

plt.savefig(fname="plots/sig-feature-importance-cycle-at-bar.pdf", bbox_inches='tight')
    

In [None]:
# For the model that predicts 'value_at'
value_at_model = utils_gn.read_data('sig_capacity_ir.pkl', folder='models')
dframe2 = feature_importance_analysis(model=value_at_model, model_type='capacity-ir', feature_names=df.columns, target_list=['Qatk-o', 'Qatk-p', 'IRate-o', 'IRate-p', 'IRatEOL'])

In [None]:
fig = plt.figure(figsize=(18, 3))
df1 = dframe2.copy()
df_index = np.array(df1.index)

for i, item in enumerate(df1.columns):
    

    ax = fig.add_subplot(1, 5, i+1)
    ax.text(0.6, 0.95, item, transform=ax.transAxes, fontsize=16, fontweight='bold', va='top')
    
    this_importance = df1[item].values
    sort_index = np.argsort(this_importance)

    this_importance = this_importance[sort_index]
    this_index = df_index[sort_index]

    ax.bar(this_index[::-1][:10], this_importance[::-1][:10], color='brown', ec='black', alpha=0.78)
    ax.tick_params(axis='x', rotation=90, labelsize=14)
    ax.tick_params(axis='y', labelsize=14)

    if i != 0:
        ax.set_yticklabels([])
    
    if i == 0:
        ax.set_ylabel('Feature importance', size=16)

plt.savefig(fname="plots/sig-feature-importance-value-at-bar.pdf", bbox_inches='tight')
   