In [1]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn import svm
from sklearn.preprocessing import Normalizer
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import Ridge

from LSSVMRegression import LSSVMRegression

from windTransformer import WindVectorTransformer
from windTransformer import WindDegreeTransformer
from windData import WindDataCollector

import matplotlib.pyplot as plt

import numpy as np
import math
import pandas as pd

from influxdb import InfluxDBClient # install via "pip install influxdb"

import datetime

In [2]:
def eval_metrics(actual, pred):
	rmse = np.sqrt(mean_squared_error(actual, pred))
	mae = mean_absolute_error(actual, pred)
	r2 = r2_score(actual, pred)
	return rmse, mae, r2


## Data prep

In [3]:
start_time = datetime.datetime(2021, 1, 1, 0, 0, 0).strftime("'%Y-%m-%dT%H:%M:%SZ'")


dataCollector = WindDataCollector()

gen_df = dataCollector.getGenerationData(now = start_time, delta="90")
wind_df = dataCollector.getWindData(now = start_time, delta="90")

gen_df_alligned = pd.merge_asof(wind_df,gen_df,left_index=True, right_index=True)

In [4]:
train_length = int(len(gen_df_alligned)*0.9)

train_X = gen_df_alligned.iloc[:train_length][[
#    "Direction",
    "Speed"]]
test_X = gen_df_alligned.iloc[train_length:][[
#    "Direction",
    "Speed"]]

train_y = gen_df_alligned.iloc[:train_length]["Total"]
test_y = gen_df_alligned.iloc[train_length:]["Total"]

In [5]:
import plotly.express as px


fig = px.scatter(gen_df_alligned, x="Speed", y="Total")
fig.show()

## Linear regression

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [6]:
pipeline = Pipeline(steps=[
    ("column_transformers", ColumnTransformer([
#        ("direction_degree_transformer", WindDegreeTransformer(), ["Direction"]),
        ("polynomial_features", PolynomialFeatures() , ["Speed"])
    ])),
	("linear_model", LinearRegression())
])
parameters = {
	'column_transformers__polynomial_features__degree':[1,2,3,4,5,6,7,8]
}

In [7]:
tscv = TimeSeriesSplit(n_splits=5)
pipeline = GridSearchCV(pipeline, param_grid=parameters, n_jobs=15, cv= tscv)

pipeline.fit(train_X, np.ravel(train_y))

bestParams = pipeline.best_params_

predicted_qualities = pipeline.predict(test_X)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print(bestParams)
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

{}
  RMSE: 5.483803100370458
  MAE: 4.161473104822195
  R2: 0.8200147718988211


## Ridge Regression

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html

In [8]:
pipeline = Pipeline(steps=[
    ("column_transformers", ColumnTransformer([
#        ("direction_degree_transformer", WindDegreeTransformer(), ["Direction"]),
        ("polynomial_features", PolynomialFeatures() , ["Speed"])
    ])),
	("linear_model", Ridge())
])
parameters = {
    'column_transformers__polynomial_features__degree':[1,2,3,4,5,6,7,8],
	"linear_model__alpha":[0.001, 0.01, 0.1, 1, 10, 100]
}

In [9]:
tscv = TimeSeriesSplit(n_splits=5)
pipeline = GridSearchCV(pipeline, param_grid=parameters, n_jobs=15, cv= tscv)

pipeline.fit(train_X, np.ravel(train_y))

bestParams = pipeline.best_params_

predicted_qualities = pipeline.best_estimator_.predict(test_X)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print(bestParams)
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

{'linear_model__alpha': 100}
  RMSE: 5.51850138060956
  MAE: 4.1970619493139
  R2: 0.8177298846864669
