In [None]:
import numpy as np
import pandas as pd
import math

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError

# Random seed for reproducibility
np.random.seed(29)

from flask import Flask, request, jsonify

### Models

The following section defines model variables used in model prediction and training. The states of these variables are checked and manipulated in the following sections.

In [None]:
model = MinMaxScaler()
forest_reg = RandomForestRegressor(n_estimators=31, min_samples_leaf=1, max_features=10 , random_state=29)

### Data preparation

The following section defines functions related to data preparation. Here, data is collected in JSON format from the request data, and converted into a *pandas* DataFrame for usage in model prediction and training.

In [None]:
def construct_df(request_data):
  df = pd.DataFrame(to_array(request_data), columns=[
    'heading',
    'speed',
    'fuel_consumption',
    'wind',
    'rel_wind_dir',
    'gust',
    'waves_height',
    'rel_waves_dir',
    'waves_period',
    'wwaves_height',
    'rel_wwaves_dir',
    'wwaves_period',
    'swell1_height',
    'rel_swell1_dir',
    'swell1_period',
    'swell2_height',
    'rel_swell2_dir',
    'swell2_period',
    'ocean_current_vel',
    'rel_ocean_current_dir'
  ])

  # Delete initial row containing only zeros
  df = df.drop(0)

  return df
  

def to_array(request_data):
  data = np.zeros(20)

  for dp in request_data:
    dp_data = np.array([])
    # Define vessel heading in data point
    heading = dp['vessel']['heading']
    
    dp_data = np.append(dp_data, heading)
    dp_data = np.append(dp_data, dp['vessel']['speed'])
    dp_data = np.append(dp_data, dp['vessel']['fuelConsumption']['drift'])
    # Calculate wind speed
    dp_data = np.append(dp_data, math.sqrt(dp['weather']['windU'] ** 2 + dp['weather']['windV'] ** 2))
    # Calculate wind direction and then wind direction relative to ship heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      calculate_dir(
        dp['weather']['windU'],
        dp['weather']['windV']
      )
    ))
    dp_data = np.append(dp_data, dp['weather']['gust'])
    dp_data = np.append(dp_data, dp['marineWeather']['waves']['wavesHeight'])
    # Calculate waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['waves']['wavesDirection']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['waves']['wavesPeriod'])
    dp_data = np.append(dp_data, dp['marineWeather']['wwaves']['wwavesHeight'])
    # Calculate wind waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['wwaves']['wwavesDirection']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['wwaves']['wwavesPeriod'])
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell1Height'])
    # Calculate class 1 swell waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['swellWaves']['swell1Direction']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell1Period'])
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell2Height'])
    # Calculate class 2 swell waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['swellWaves']['swell2Direction']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell2Period'])
    dp_data = np.append(dp_data, dp['marineWeather']['oceanCurrentVelocity'])
    # Calculate ocean current direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['oceanCurrentDirection']
    ))

    data = np.vstack([data, dp_data])

  return data

def calculate_dir(u, v):
  ref_u = 0
  ref_v = 1

  cos = (ref_u * u + ref_v * v) / (math.sqrt(ref_u ** 2 + ref_v ** 2) * math.sqrt(u ** 2 + v ** 2))
  rad = math.acos(cos)
  deg = rad * (180 / math.pi)

  return deg


def calculate_rel_dir(heading, param):
  param_toward = (param + 180) % 360
  relative_param = (param_toward - heading + 360) % 360

  return relative_param

### Prediction and training

The following section defines functions for model prediction and training. In addition, an optimization algorithm is defined to find optimal speed.

In [None]:
# Minimum optimal speed
min_speed = 5

def search_optimal_speed(df: pd.DataFrame):
  speed = df.at[1, 'speed']
  fuel = model_predict(df)

  df_inc = df.copy()
  df_dec = df.copy()

  df_inc.at[1, 'speed'] = speed + 1
  fuel_inc = model_predict(df_inc)

  df_dec.at[1, 'speed'] = speed - 1
  fuel_dec = model_predict(df_dec)

  if fuel_inc < fuel and df_inc.at[1, 'speed'] > min_speed:
    fuel, speed = search_optimal_speed(df_inc)
  elif fuel_dec < fuel and df_dec.at[1, 'speed'] > min_speed:
    fuel, speed = search_optimal_speed(df_dec)
  
  return fuel[0], speed

def model_predict(df: pd.DataFrame):
  data = df.drop('fuel_consumption', axis=1)
  data_scaled = model.transform(data)
  prediction = forest_reg.predict(data_scaled)

  return prediction

def model_train(df: pd.DataFrame):
  train_set, test_set = train_test_split(df, test_size=0.2, random_state=29)

  x_train = train_set.drop('fuel_consumption', axis=1)
  y_train = train_set['fuel_consumption'].copy()

  # Scale data so that all attributes range from 0 to 1
  model.fit(x_train)
  x_train_scaled = model.transform(x_train)

  # Train model using scaled training data
  forest_reg.fit(x_train_scaled, y_train)

  # Evaluate model using scaled test data
  x_test = test_set.drop('fuel_consumption', axis=1)
  y_test = test_set['fuel_consumption'].copy()

  x_test_scaled = model.transform(x_test)
  predictions = forest_reg.predict(x_test_scaled)

  mse = mean_squared_error(y_test, predictions)
  rmse = np.sqrt(mse)

  print('RMSE:', rmse)

### API controller

The following section defines an API controller managing HTTP traffic in and out of the service. This section runs constantly, and communicates with the rest of the applicaiton through function calls.

In [None]:
app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
  data = request.json
  try:
    check_is_fitted(forest_reg)
    df = construct_df(data)
    fuel, speed = search_optimal_speed(df)
    data = {
      'fuel': fuel,
      'speed': speed
    }
    response = jsonify(data), 200
  except NotFittedError:
    response = '', 503
  return response

@app.route('/train', methods=['POST'])
def train():
  data = request.json
  df = construct_df(data)
  model_train(df)
  return '', 200

if __name__ == '__main__':
  app.run()