In [1]:
import numpy as np
import pandas as pd


from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline


from omnixai.explainers.tabular import TabularExplainer
from omnixai.data.tabular import Tabular

import dill as pickle


In [2]:
#Model selection function
def regressor_selection(X,y, metric = 'r2'):    
    pipe = Pipeline([('regressor' , RandomForestRegressor())])    
    param_grid = ''
    param = [        
                
        {'regressor' : [RandomForestRegressor()],
        'regressor__n_estimators' : [100,200,500],
        'regressor__max_depth' : list( range(5,25,5) ),
        'regressor__min_samples_split' : list( range(4,12,2) )
        },
        
        {'regressor' : [KNeighborsRegressor()],
         'regressor__n_neighbors' : [5,10,20,30],
         'regressor__p' : [1,2] 
        },
        {
         'regressor' : [Lasso(max_iter=500)],
         'regressor__alpha' : [0.001,0.01,0.1,1,10,100,1000]         
        }
            ]
    param_grid = param    
    clf = GridSearchCV(pipe, param_grid = param_grid, 
                       cv = 5, n_jobs=-1,scoring = metric)    
    best_clf = clf.fit(X, y)
    
    return(best_clf.best_params_['regressor'])

In [4]:
#Reading Data
url=  'Restaurant_Profitability_Training_Data.csv'
df = pd.read_csv(url)

Target = 'Profit'
categorical_features = ['Area', 'Age', 'Type','Price Range','Capacity','Number of Menu Items']
numerical_feature = []
target = 'Profit'

label=df[target]
data= df[categorical_features+numerical_feature]



#Data Preprocessing
numeric_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median'))
                                      ,('scaler', StandardScaler())])
categorical_transformer = OneHotEncoder(categories='auto')

encoder = ColumnTransformer(
    transformers=[
        ('numerical', numeric_transformer, numerical_feature),
        ('categorical', categorical_transformer, categorical_features)])
encoder.fit(data)

In [5]:
#Model Building and Selection
clf = regressor_selection(encoder.transform(data),label, metric = 'r2')
model = clf.fit(encoder.transform(data),label)                


In [6]:
Prediction = make_pipeline(encoder,model)


In [7]:
filename = 'Profitability_Prediction.sav'
pickle.dump(Prediction, open(filename, 'wb'))


In [8]:
def pre_processing(tabular_data):
    return tabular_data.to_pd()    

    
# Initialize a TabularExplainer
explainers = TabularExplainer(
   explainers=["lime", "shap"], # The explainers to apply
   mode="regression",                             # The task type
   data=Tabular(data=data,categorical_columns=categorical_features),
   model=Prediction,                                       # The ML model to explain
   preprocess=pre_processing,     # Converts raw features into the model inputs
    params={
        "lime": {"kernel_width": 3},
        "shap": {"nsamples": 100}
    }
   
)

In [9]:
filename = 'explainers.sav'
pickle.dump(explainers, open(filename, 'wb'))
