In [1]:
# Load required modules
import numpy as np
import pandas as pd

# The specific kind of model I used
from sklearn.neural_network import MLPRegressor
# Required for loading the previously saved model
import pickle

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Define Mean Squared Error for PFI
def MeSqEr(y, y_hat):
    n = y.shape[0]
    SE = np.zeros_like(y)
    for j in range(0, n):
        SE[j] = (y[j] - y_hat[j]) ** 2

    return np.sum(SE) / n

In [3]:
db = pd.read_csv('./data/PFI_db.csv')

y = db['y'] # Isolate Target Output
X = db.drop('y', axis = 1) # Remove Target Output from db

# Load the previously trained model
f_hat = pickle.load(open('./data/PFI_model.sav', 'rb'))

# Predict The reference MSE for the trained model
y_hat_0 = f_hat.predict(X)
MSE_0 = MeSqEr(y, y_hat_0)
MSE_0

1.6796875

In [4]:
def PFI(X, y, MSE_0, f_hat):

    col = list(X.columns) # store feature names
    L = int(y.shape[0] / 2) # extract the 'half-length' index
    MSE, FI = np.zeros(len(col)), np.zeros(len(col)) # pre-allocate the MSE and FI arrays
    idx = 0

    for c in col:

        X_p = X.copy() # Clone the original db

        p1 = X.loc[0:L, c] # First Partition
        p2 = X.loc[L+1:, c] # Second Partition

        X_p[c] = pd.concat([p2, p1], ignore_index=True) # Permute the partitions
        y_hat = f_hat.predict(X_p) # Predict the value with the permuted db
        MSE[idx] = MeSqEr(y, y_hat) # Compute new MSE
        FI[idx] = MSE[idx] / MSE_0 # Compute FI

        idx += 1

    # This is for having an already ordered PFI outcome
    PFI_dict = {'Feature': col, 'MSE': MSE, 'FI': FI}
    PFI_df = pd.DataFrame(PFI_dict).sort_values(by=['FI'], ascending=False, ignore_index=True)

    return PFI_df

In [5]:
PFI_df = PFI(X, y, MSE_0, f_hat)
PFI_df.head()

Unnamed: 0,Feature,MSE,FI
0,e,21.101562,12.562791
1,J,20.398438,12.144186
2,F,16.390625,9.75814
3,h,12.914062,7.688372
4,q,12.226562,7.27907
