In [None]:
import numpy as np
import pandas as pd
import math

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

np.random.seed(29)

from flask import Flask, request

In [None]:
def construct_df(requestData):
  df = pd.DataFrame(to_array(requestData), columns=[
    'heading',
    'speed',
    'fuel_consumption',
    'wind',
    'rel_wind_dir',
    'gust',
    'waves_height',
    'rel_waves_dir',
    'waves_period',
    'wwaves_height',
    'rel_wwaves_dir',
    'wwaves_period',
    'swell1_height',
    'rel_swell1_dir',
    'swell1_period',
    'swell2_height',
    'rel_swell2_dir',
    'swell2_period',
    'ocean_current_vel',
    'rel_ocean_current_dir'
  ])

  # Delete initial row containing only zeros
  df = df.drop(0)

  return df
  

def to_array(requestData):
  data = np.zeros(20)

  for dp in requestData:
    dp_data = np.array([])
    # Define vessel heading in data point
    heading = dp['vessel']['heading']
    
    dp_data = np.append(dp_data, heading)
    dp_data = np.append(dp_data, dp['vessel']['speed'])
    dp_data = np.append(dp_data, dp['vessel']['fuelConsumption']['drift'])
    # Calculate wind speed
    dp_data = np.append(dp_data, math.sqrt(dp['weather']['windU'] ** 2 + dp['weather']['windV'] ** 2))
    # Calculate wind direction and then wind direction relative to ship heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      calculate_dir(
        dp['weather']['windU'],
        dp['weather']['windV']
      )
    ))
    dp_data = np.append(dp_data, dp['weather']['gust'])
    dp_data = np.append(dp_data, dp['marineWeather']['waves']['wavesHeight'])
    # Calculate waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['waves']['wavesDirection']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['waves']['wavesPeriod'])
    dp_data = np.append(dp_data, dp['marineWeather']['wwaves']['wwavesHeight'])
    # Calculate wind waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['wwaves']['wwavesDirection']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['wwaves']['wwavesPeriod'])
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell1Height'])
    # Calculate class 1 swell waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['swellWaves']['swell1Direction']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell1Period'])
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell2Height'])
    # Calculate class 2 swell waves direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['swellWaves']['swell2Direction']
    ))
    dp_data = np.append(dp_data, dp['marineWeather']['swellWaves']['swell2Period'])
    dp_data = np.append(dp_data, dp['marineWeather']['oceanCurrentVelocity'])
    # Calculate ocean current direction relative to vessel heading
    dp_data = np.append(dp_data, calculate_rel_dir(
      heading,
      dp['marineWeather']['oceanCurrentDirection']
    ))

    data = np.vstack([data, dp_data])

  return data

def calculate_dir(u, v):
  refU = 0
  refV = 1

  cos = (refU * u + refV * v) / (math.sqrt(refU ** 2 + refV ** 2) * math.sqrt(u ** 2 + v ** 2))
  rad = math.acos(cos)
  deg = rad * (180 / math.pi)

  return deg


def calculate_rel_dir(heading, param):
  param_toward = (param + 180) % 360
  relative_param = (param_toward - heading + 360) % 360

  return relative_param

In [None]:
def train_model(df: pd.DataFrame):
  train_set, test_set = train_test_split(df, test_size=0.2, random_state=29)

  data = train_set.drop('fuel_consumption', axis=1)
  data_labels = train_set['fuel_consumption'].copy()

  # Scale data so that all attributes range from 0 to 1
  scaler = MinMaxScaler()
  model = scaler.fit(data)
  data_scaled = model.transform(data)

  # # Linear regression
  # print('LINEAR REGRESSION:')

  # lin_reg = LinearRegression()
  # lin_reg.fit(data_scaled, data_labels)

  # some_data = data.iloc[:5]
  # some_labels = data_labels.iloc[:5]
  # some_data_scaled = model.transform(some_data)

  # print ('Predictions:', lin_reg.predict(some_data_scaled))
  # print ('Labels:', list(some_labels))

  # # RMSE of linear regression
  # data_predictions = lin_reg.predict(data_scaled)
  # lin_mse = mean_squared_error(data_labels, data_predictions)
  # lin_rsme = np.sqrt(lin_mse)

  # print('Root mean squared error:', lin_rsme)

  # # Validation of linear regression
  # lin_scores = cross_val_score(lin_reg, data_scaled, data_labels, scoring="neg_mean_squared_error", cv=10)
  # lin_rmse_scores = np.sqrt(-lin_scores)

  # display_scores(lin_rmse_scores)

  # # Decision tree regression
  # print('DECISION TREE REGRESSION:')

  # tree_reg = DecisionTreeRegressor(random_state=29)
  # tree_reg.fit(data_scaled, data_labels)

  # # RMSE of decision tree regression
  # data_predictions = tree_reg.predict(data_scaled)
  # tree_mse = mean_squared_error(data_labels, data_predictions)
  # tree_rmse = np.sqrt(tree_mse)

  # print('Root mean squared error:', tree_rmse)

  # # Validation of decision tree regression
  # tree_scores = cross_val_score(tree_reg, data_scaled, data_labels, scoring='neg_mean_squared_error', cv=10)
  # tree_rmse_scores = np.sqrt(-tree_scores)

  # display_scores(tree_rmse_scores)

  # Random forest regression
  # print('RANDOM FOREST REGRESSION:')

  forest_reg = RandomForestRegressor(max_features=10 , n_estimators=31, random_state=29)
  forest_reg.fit(data_scaled, data_labels)

  # # RMSE of random forest regression
  # data_predictions = forest_reg.predict(data_scaled)
  # forest_mse = mean_squared_error(data_labels, data_predictions)
  # forest_rmse = np.sqrt(forest_mse)

  # print('Root mean squared error:', forest_rmse)

  # # Validation of random forest regression
  # forest_scores = cross_val_score(forest_reg, data_scaled, data_labels, scoring='neg_mean_squared_error', cv=10)
  # forest_rmse_scores = np.sqrt(-forest_scores)

  # display_scores(forest_rmse_scores)

  # # AdaBoost regression
  # print('ADABOOST REGRESSION:')

  # ada_reg = AdaBoostRegressor(n_estimators=100, random_state=29)
  # ada_reg.fit(data_scaled, data_labels)

  # # RMSE of AdaBoost regression
  # data_predictions = ada_reg.predict(data_scaled)
  # ada_mse = mean_squared_error(data_labels, data_predictions)
  # ada_rmse = np.sqrt(ada_mse)

  # print('Root mean squared error:', ada_rmse)

  # # Validation of AdaBoost regression
  # ada_scores = cross_val_score(ada_reg, data_scaled, data_labels, scoring='neg_mean_squared_error', cv=10)
  # ada_rmse_scores = np.sqrt(-ada_scores)

  # display_scores(ada_rmse_scores)

  # # Hyperparameter tuning of random forest regression using grid search
  # param_grid = [
  #   {'n_estimators': [31], 'max_features': [10]}
  # ]

  # grid_search = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
  # grid_search.fit(data_scaled, data_labels)

  # print(grid_search.best_params_)

  # cvres = grid_search.cv_results_
  # for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
  #   print(np.sqrt(-mean_score), params)

  X_test = test_set.drop('fuel_consumption', axis=1)
  y_test = test_set['fuel_consumption'].copy()

  X_test_scaled = model.transform(X_test)
  predictions = forest_reg.predict(X_test_scaled)

  mse = mean_squared_error(y_test, predictions)
  rmse = np.sqrt(mse)

  print(rmse)

def display_scores(scores):
  print('Scores:', scores)
  print('Mean:', scores.mean())
  print('Standard deviation:', scores.std())

In [None]:
app = Flask(__name__)

@app.route('/train', methods=['POST'])
def train():
  data = request.json
  df = construct_df(data)
  train_model(df)
  return '', 200

if __name__ == '__main__':
  app.run()