### Demo Notebook that uses FLAML framework's AutoML feature for task oriented ML models

### Model Use Case

This model will leverage historical service request data and relevant details to predict the time required for fulfilling new service requests for a Telecom customer

Step 1 : Install FLAML Library for AutoML and Update Pandas

** NOTE : Install libomp using `brew install libomp` to run this and install Flaml-AutoML on M1 Mac.

In [None]:
%pip install flaml
%pip install --upgrade pandas "dask[complete]"
%pip install "flaml[automl]" 

Step 2. Load the dataset and create training and test datasets


In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
try:
    data = pd.read_csv('../datasets/pro_MTNNSR_v3.csv')

    y = data['Request_Closing_Time_in_Seconds_CLM']
    x = data.drop('Request_Closing_Time_in_Seconds_CLM',axis=1)

    X_train,X_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)
except (ServerError, Exception):
    from sklearn.datasets import make_classification
    from sklearn.model_selection import train_test_split
    from pandas import DataFrame

    X, y = make_classification(n_samples=539383, n_features=10)
    X = DataFrame(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
X_train.head()

3. Run FLAML

In [None]:
''' import AutoML class from flaml package '''
from flaml import AutoML
automl = AutoML()

In [None]:
settings = {
    "time_budget": 600,  # total running time in seconds
    "metric": 'r2', 
                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)
    "task": 'regression',  # task type
    "log_file_name": 'isr-experiment.log',  # flaml log file
    "seed": 7654321,    # random seed
}

Step 4: Run Training

In [None]:
automl.fit(X_train=X_train, y_train=y_train, **settings)

In [None]:
'''retrieve best config and best learner'''
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best accuracy on validation data: {0:.4g}'.format(1-automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

In [None]:
automl.model.estimator

In [None]:
'''pickle and save the automl object'''
import pickle
with open('isr-automl.pkl', 'wb') as f:
    pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
'''load pickled automl object'''
with open('isr-automl.pkl', 'rb') as f:
    automl = pickle.load(f)

In [None]:
'''compute predictions of testing dataset''' 
y_pred = automl.predict(x_test)
print('Predicted labels', y_pred)
print('True labels', y_test)
y_pred_1 = automl.predict(x_test)[1]

In [None]:
''' compute different metric values on testing dataset'''
from flaml.ml import sklearn_metric_loss_score
from sklearn.metrics import mean_absolute_error,r2_score
print('R2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))
print('mae', '=', sklearn_metric_loss_score('mae',y_test,y_pred))
print('rmse', '=', sklearn_metric_loss_score('rmse', y_pred, y_test))

In [None]:
from flaml.ml import sklearn_metric_loss_score
print('train rmse', '=', sklearn_metric_loss_score('rmse', automl.predict(x_train), y_train))
print(' test rmse', '=', sklearn_metric_loss_score('rmse', automl.predict(x_test), y_test))

In [None]:
from flaml.automl.data import get_output_from_log
time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \
    get_output_from_log(filename=settings['log_file_name'], time_budget=240)
for config in config_history:
    print(config)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.title('Learning Curve')
plt.xlabel('Wall Clock Time (s)')
plt.ylabel('Validation Accuracy')
plt.scatter(time_history, 1 - np.array(valid_loss_history))
plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post')
plt.show()

In [None]:
import numpy as np

def plot_prediction(train_data = X_train,
                  train_labels = y_train,
                    test_data = X_test,
                    test_labels = y_test,
                      predictions = None):
  plt.figure(figsize=(10, 7))
  train_data=np.arange(0,len(train_data),1) 

  plt.scatter(train_data, train_labels, c="b", s=4, label="Training Data!")
  test_data=np.arange(0,len(test_data),1)
  plt.scatter(test_data, test_labels, c="g", s=4, label="Testing Data")

  if predictions is not None:
    plt.scatter(test_data, predictions, c="r", s=4, label="Predictions!!!")

  plt.legend(prop={"size":14})

In [None]:
plot_prediction(predictions=y_pred)