In [None]:
from numpy import loadtxt
import pandas as pd

#load the data file
data = loadtxt("turbine.dat", skiprows=4)

#convert it to a dataframe using the following fields: height over sea level, fall, net fall, flow, power
df = pd.DataFrame(data, columns=['height', 'fall', 'net_fall', 'flow', 'power'])

# Separate the input and output columns
X = df.iloc[:, :-1]  # Select all columns except the last one
y = df.iloc[:, -1]   # Select only the last column

# Split the data into training and testing sets
x_train = X.iloc[:400, :]  # Select the first 400 rows for training
z_train = y.iloc[:400]
x_test = X.iloc[400:, :]   # Select the last 50 rows for testing
z_test = y.iloc[400:]


In [None]:
# ======= Multilinear Regression (MLR) =======

# Analize the data using a linear regression model
from sklearn import linear_model

mlr = linear_model.LinearRegression()
mlr.fit(x_train, z_train, 1)

pred_train = mlr.predict(x_train)
pred_test  = mlr.predict(x_test)

err_train = 100 * sum(abs(pred_train - z_train)) / sum(z_train)
err_test = 100 * sum(abs(pred_test - z_test)) / sum(z_test)


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 8))

# Plot the training set scatter with blue color
ax.scatter(z_train, pred_train, c='blue', label='Training Set')

# Plot the test set scatter with red color
ax.scatter(z_test, pred_test, c='red', label='Test Set')

ax.set_xlabel("real power", fontsize=16)
ax.set_ylabel("predicted power", fontsize=16)

ax.text(0.05, 0.9, "Prediction error TEST (MLR) = %.2f%%\n" % err_train, transform=ax.transAxes, fontsize=16)
ax.text(0.05, 0.85, "Prediction error TRAIN (MLR) = %.2f%%\n" % err_test, transform=ax.transAxes, fontsize=16)

# Add a legend to the plot
ax.legend(loc='lower right', fontsize=14)

#fig.savefig("turbine-mlr-test.png", bbox_inches='tight')
plt.show()