In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

## Functions for Feature Importance

In [4]:
def get_linear_regression_feature_importance(model_file, feature_names):
    """
    Extracts feature importance from a linear regression model stored in a .sav file.
    
    Parameters:
    - model_file (str): File path to the .sav file containing the linear regression model.
    - feature_names (list): List of feature names.
    
    Returns:
    - DataFrame: DataFrame containing the feature names and their corresponding importance.
    """

    model = joblib.load(model_file)

    coefficients = model.coef_

    absolute_coefficients = np.abs(coefficients)

    scaler = StandardScaler()
    normalized_coefficients = scaler.fit_transform(absolute_coefficients.reshape(-1, 1)).flatten()

    feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': normalized_coefficients})

    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

    return feature_importance_df


def get_svm_feature_importance(model_file, feature_names, X_train):
    """
    Extracts feature importance from an SVM model stored in a .sav file.
    
    Parameters:
    - model_file (str): File path to the .sav file containing the SVM model.
    - feature_names (list): List of feature names.
    - X_train (DataFrame or array-like): Training data used to fit the SVM model.
    
    Returns:
    - DataFrame: DataFrame containing the feature names and their corresponding importance.
    """
    model = joblib.load(model_file)

    if model.kernel == 'linear':
        coefficients = model.coef_.flatten()
        importance = np.abs(coefficients)
    else:
        raise NotImplementedError("Feature importance for non-linear SVMs is not implemented yet.")

    scaler = StandardScaler()
    normalized_importance = scaler.fit_transform(importance.reshape(-1, 1)).flatten()

    feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': normalized_importance})

    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

    return feature_importance_df

## Linear Regression

In [5]:
path = "models/LinearRegression.sav"
feature_names = ["Open", "High", "Low", "Close", "Volume"]

feature_importance_df = get_linear_regression_feature_importance(path, feature_names)
print(feature_importance_df)

Ranked Feature Importance:
  Feature  Importance
3   Close    1.997547
2     Low   -0.435193
1    High   -0.457862
0    Open   -0.531928
4  Volume   -0.572564


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
path = "models/LinearRegression_log.sav"
feature_names = ["Open", "High", "Low", "Close", "Volume"]

feature_importance_df = get_linear_regression_feature_importance(path, feature_names)
print(feature_importance_df)

  Feature  Importance
3   Close    1.182225
1    High    0.615189
0    Open    0.202764
2     Low   -0.231189
4  Volume   -1.768990


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## SVM

In [None]:
model_file_path = 'models/SVM.sav'
feature_names =  ["Open", "High", "Low", "Close", "Volume"]
X_train = ...  # training data (How to get this easily without running the model file)
feature_importance = get_svm_feature_importance(model_file_path, feature_names, X_train)
print(feature_importance)