# Class 5 - Machine Learning Operations

In [None]:
#!pip install -r requirements.txt

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shutil

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras import layers

**Load dataset - Boston housing**

Attribute Information:
- **CRIM**     - per capita crime rate by town
- **ZN**       - proportion of residential land zoned for lots over 25,000 sq.ft.
- **INDUS**   -  proportion of non-retail business acres per town
- **CHAS**     - Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- **NOX**     - nitric oxides concentration (parts per 10 million)
- **RM**      - average number of rooms per dwelling
- **AGE**     -  proportion of owner-occupied units built prior to 1940
- **DIS**     -  weighted distances to five Boston employment centres
- **RAD**      - index of accessibility to radial highways
- **TAX**      - full-value property-tax rate per \$10,000
- **PTRATIO** -  pupil-teacher ratio by town
- **B**       -  1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
- **LSTAT**   -  % lower status of the population
- **MEDV**    -  Median value of owner-occupied homes in \$1000's

**Task:** Regression of target feature `MEDV`

In [None]:
boston = pd.read_csv("Boston.csv").drop(columns='Unnamed: 0')

Splitting data into subsets

In [None]:
X = boston.drop(columns='medv')
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=0.7,
                                                    random_state=42)

Training neural network with one hidden layer and ReLU activation function

In [None]:
model = tf.keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[X.shape[1]]),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#Optimizing for Mean Square Error (MSE)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
#Early stopping may be considered as a regularization
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
training_record = model.fit(X_train,
                       y_train,
                       epochs=10,
                       validation_split=0.3,
                       verbose=1,
                       callbacks=early_stop)

In [None]:
model.summary()

In [None]:
df = pd.DataFrame(training_record.history)
df['epoch'] = training_record.epoch
plt.figure(figsize=[15,8])
plt.plot(df['epoch'], df['mse'], label='Train')
plt.plot(df['epoch'], df['val_mse'], label = 'Val')
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error')
plt.legend();

In [None]:
def model_quality(y_true, y_pred):
    print(f"Model RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.3f} (mean test y: {np.mean(y_true):.3f})")
    print(f"Model R^2: {r2_score(y_true, y_pred)*100:.2f}%")

In [None]:
y_pred = model.predict(X_test, verbose=0)
model_quality(y_test, y_pred)

In [None]:
#Save model for deployment
model.save('Boston_NN.keras')
shutil.copyfile('Boston_NN.keras', 'keras-app/Boston_NN.keras')

## Model deployment

In [None]:
import flask
import tensorflow
import pandas as pd
import numpy as np

app = flask.Flask(__name__)
model = None


def load_model():
    global model
    model = tensorflow.keras.models.load_model('Boston_NN.keras')


@app.route("/")
def hello():
    return """This is Boston prediction app. Use <b>/predict</b> endpoint with POST request e.g. <br><br> 
    curl -X POST -F data=@house.json 'http://localhost:5000/predict'"""


@app.route("/predict", methods=["POST"])
def predict():
    data = {"success": False}
    if flask.request.method == "POST":
        if flask.request.files.get("data"):
            observation = pd.read_json(flask.request.files["data"], orient='index').transpose()
            data["prediction"] = np.float64(model.predict(observation, verbose=0)[0][0])
            data["success"] = True

    return flask.jsonify(data)


if __name__ == "__main__":
    print("* Loading Keras model and Flask server...")
    load_model()
    app.run(host='0.0.0.0', threaded=False)

In [None]:
!curl -X POST -F data=@house.json http://localhost:5000/predict

The app can also be launched in terminal by switching working directory to `keras-app` folder and running
```shell
python app.py
```

We have an app ready to be published - right now we can only access our ML service locally, so it's still not very useful. The `app.py` can be deployed on a remote machine with the public IP and after binding DNS domain with the IP, the service could be available under a nice URL like http://boston-predict.com/. 

The server would require the setup of all dependencies and correct configuration, so there is additional effort to operationalize the app. With that approach scaling the service and applying changes (maybe next step is to add a graphical interface) would also be very tedious. Some of the problems can be alleviated by packaging the app into container such as [Docker container](https://www.docker.com/). Containerization is a modern technique for applications development - the application source code, configuration and all required dependencies are packed within an image which can be easily shared and run on multiple machines.

```bash
cd keras-app
docker build -t boston-app .
docker run -d -p 5000:5000 boston-app
docker ps
```

In particular, containers can be run on public cloud services such as [Cloud Run](https://cloud.google.com/run) - the managed services make it easier to monitor and maintain the ML applications. First image created in previous step is uploaded to [Container Registry](https://cloud.google.com/artifact-registry/docs) and launched as through Cloud Run.

In [None]:
!curl -X POST -F data=@house.json https://boston-app-t6lyhjuhjq-lm.a.run.app/predict

## Model maintenance

After deploying the model, the maintanance and monitoring phase starts. From the technical perspective, the application need to handle all the incoming requests within reasonable time, provide appropiate error handling, stay stable within the normal usage, etc. 

Additionally, the model needs to be monitored with regard to predictive performance. The drift in the incoming data (changes in the distribution of the underlying features compared to the training dataset) may degrade the model's quality. The bussiness needs may change over time as well, which in some cases may require model retraining or redefinition of the task.

In more complex deployments, multiple models are involved in the monitoring and maintance process. Usually the setup includes the 'leading' model and 'auxilary' models. Commonly used techniques include:
* **champion-challenger approach** - the 'champion' model is serving the predictions as the best performing model and the model's quality metrics are gathered over time; periodically the 'challengers' are evaluated against the new data points; if a challanger scores better than the champion, it may replace it as a new champion and the process is continued
* **multi-armed bandits** - there are multiple models capable of serving the prediction in the deployed solution; the leading model in terms of predictive quality handles more requests than the remaining models; often each model receives the probability of serving the prediction, where the leading model has the highest probability

**Multi-armed bandit**

In [None]:
from sklearn.linear_model import LinearRegression as LR
from sklearn.tree import DecisionTreeRegressor as DTR
import pickle

In [None]:
boston_lr = LR().fit(X_train, y_train)
model_quality(y_test, boston_lr.predict(X_test))
with open('Boston_LR.pkl', 'wb') as f:
    pickle.dump(boston_lr, f)
shutil.copyfile('Boston_LR.pkl', 'keras-app/Boston_LR.pkl')

In [None]:
boston_dtr = DTR().fit(X_train, y_train)
model_quality(y_test, boston_dtr.predict(X_test))
with open('Boston_DTR.pkl', 'wb') as f:
    pickle.dump(boston_dtr, f)
shutil.copyfile('Boston_DTR.pkl', 'keras-app/Boston_DTR.pkl')

In [None]:
import flask
import tensorflow
import pandas as pd
import numpy as np
import sklearn
import pickle

app = flask.Flask(__name__)
model = None


def load_models():
    global model_nn
    model_nn = tensorflow.keras.models.load_model('Boston_NN.keras')
    global model_lr
    with open('Boston_LR.pkl', 'rb') as f:
        model_lr = pickle.load(f)
    global model_dtr
    with open('Boston_DTR.pkl', 'rb') as f:
        model_dtr = pickle.load(f)


@app.route("/")
def hello():
    return """This is Boston prediction app. Use <b>/predict</b> endpoint with POST request e.g. <br><br> 
    curl -X POST -F data=@house.json 'http://localhost:5000/predict'"""


@app.route("/predict", methods=["POST"])
def predict():
    epsilon = 0.8
    bandits = [("Neural Network", model_nn), ("Linear Regression", model_lr),
               ("Decision Tree", model_dtr)]
    pick_probs = np.arange(epsilon, 1.0001, (1 - epsilon) / (len(bandits) - 1))
    pick = np.random.rand()
    index = sum([e < pick for e in pick_probs])
    model_name, model = bandits[index]
    data = {"success": False}
    if flask.request.method == "POST":
        if flask.request.files.get("data"):
            observation = pd.read_json(flask.request.files["data"],
                                       orient='index').transpose()
            if model_name == "Neural Network":
                data["prediction"] = np.float64(
                    model.predict(observation, verbose=0)[0][0])
            else:
                data["prediction"] = model.predict(observation)[0]
            data["model"] = model_name
            data["success"] = True

    return flask.jsonify(data)


if __name__ == "__main__":
    print("* Loading Keras model and Flask server...")
    load_models()
    app.run(host='0.0.0.0', threaded=False)

Validating proportions of models in response

In [None]:
import requests
from tqdm import tqdm

In [None]:
models_response = []
with open('house.json', 'r') as f:
    files = {'data': f.read()}
n = 100
for _ in tqdm(range(n)):
    res = requests.post('http://localhost:5000/predict', files=files)
    models_response.append(json.loads(res.content)['model'])
models_count = np.unique(np.array(models_response), return_counts=True)

In [None]:
plt.figure(dpi=120)
plt.bar(*models_count, color=['r', 'g', 'b'])
plt.ylabel("Frequency")