# Deploying Mtcars Model into Production

In this example, we'll create a simple random forest model on the `mtcars` dataset and prepare it for deployment as a google cloud function.

In [1]:
import pandas as pd
from sklearn.ensemble        import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics         import mean_squared_error, mean_absolute_error
from sklearn.externals       import joblib

## Data Preparation

In [2]:
# Download and read data
mtcars   = 'https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv'
raw_data = pd.read_csv(mtcars)

In [4]:
# Drop car name column
raw_data.drop('model', inplace=True, axis=1)

# Split into training and testing set
train, test = train_test_split(raw_data,
                               test_size=0.3,
                               random_state=451)
# Separe target column
train_y = train.pop('mpg')
test_y  = test.pop('mpg')

## Preprocessing

In [6]:
def feature_engineering(dataframe):
    dataframe['hp_per_weight'] = dataframe.loc[:,'hp'] / dataframe.loc[:,'wt']
    return(dataframe)

In [7]:
# Apply feature engineering
train = feature_engineering(train)
test  = feature_engineering(test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


## Fit Model

In [8]:
# Train simple model
rf_model = RandomForestRegressor(501)
rf_model.fit(train,train_y)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=501, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

## Evaluate Results

In [9]:
preds = rf_model.predict(test)
print('MAE:  {}'.format(mean_absolute_error(test_y, preds)))
print('RMSE: {}'.format(mean_squared_error(test_y, preds)))

MAE: 2.3604191616766443
RMSE: 7.460681634734433


In [10]:
# Export Model to Disk
joblib.dump(rf_model, 'model.pkl', compress=False)

['rf_model.pkl']

## Prepare Model Deployment

In [11]:
def load_model(filename='model.pkl'):
    '''
    Load Model Function
    
    When called, will return the a model loaded into memory.
    '''
    from sklearn.externals import joblib
    return(joblib.load(filename))

In [12]:
def predict(model, dataframe):
    '''
    Generate Prediction Function
    
    Upon recieving a new observation, will generate a prediction.
    '''
    dataframe = feature_engineering(dataframe)
    result    = model.predict(dataframe)
    return(result)

In [13]:
import open_deployment as od

od.deploy_faas(version='1.0.0',
               load_ml_model_function = load_model,
               prediction_function    = predict,
               model_file = 'model.pkl',
               preprocessing_function = feature_engineering
              )

  assert(inspect.isfunction(load_ml_model_function), "Object 'load_ml_model_function' is not a function")
  assert(inspect.isfunction(prediction_function), "Object 'prediction_function' is not a function")


Dependencies successfully written in file requirements.txt!
Directory . already exists
FaaS files successfully generated!
Zip file successfully generated!
yeet!
