In [1]:
import pandas as pd 
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [2]:
dataset = pd.read_csv("Data.csv")

### . Dataset from `UCI ML Repository`
* `PE` - is __target__ value (Energy Output)
* `AT` - ambient temperature
* `V`  - exhoust vacuem
* `AP` - amient presure
* `RH` - relative humidity

In [3]:
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB


In [5]:
dataset.describe()

Unnamed: 0,AT,V,AP,RH,PE
count,9568.0,9568.0,9568.0,9568.0,9568.0
mean,19.651231,54.305804,1013.259078,73.308978,454.365009
std,7.452473,12.707893,5.938784,14.600269,17.066995
min,1.81,25.36,992.89,25.56,420.26
25%,13.51,41.74,1009.1,63.3275,439.75
50%,20.345,52.08,1012.94,74.975,451.55
75%,25.72,66.54,1017.26,84.83,468.43
max,37.11,81.56,1033.3,100.16,495.76


In [6]:
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

## Test / Train Split

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 420)

In [8]:
print(len(X_train), len(X_test))

7654 1914


# Multiple Linear Regression

In [9]:
from sklearn.linear_model import LinearRegression

regressor_lin = LinearRegression()
regressor_lin.fit(X_train, y_train)

In [10]:
pd.DataFrame({"y_test": y_test, "y_pred": regressor_lin.predict(X_test)})

Unnamed: 0,y_test,y_pred
4593,463.71,463.391272
7982,476.22,478.636332
7051,467.41,472.446417
8200,481.24,474.511147
1958,458.40,458.303653
...,...,...
4807,454.35,447.557344
5748,475.96,470.986902
2223,439.94,436.464924
6908,450.88,451.421072


In [11]:
r2_lin = r2_score(y_test, regressor_lin.predict(X_test))
r2_lin

0.9310033919202232

# Polynomial Regression

In [17]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree= 2)
X_poly = poly_features.fit_transform(X_train)

In [19]:
X_train

Unnamed: 0,AT,V,AP,RH
912,23.72,66.48,1003.61,73.75
3808,26.96,73.68,1013.81,84.77
1733,18.00,44.06,1016.80,78.88
3991,22.16,58.82,1009.61,87.16
1388,9.42,41.40,1029.60,87.43
...,...,...,...,...
266,27.87,70.79,1003.96,72.17
1209,28.90,68.12,1011.87,46.56
2675,29.26,69.34,1009.76,58.64
4671,22.50,59.54,1007.99,93.63


In [18]:
X_poly[0]

array([1.00000000e+00, 2.37200000e+01, 6.64800000e+01, 1.00361000e+03,
       7.37500000e+01, 5.62638400e+02, 1.57690560e+03, 2.38056292e+04,
       1.74935000e+03, 4.41959040e+03, 6.67199928e+04, 4.90290000e+03,
       1.00723303e+06, 7.40162375e+04, 5.43906250e+03])

In [13]:
regressor_poly = LinearRegression()
regressor_poly.fit(X_poly, y_train)

In [60]:
r2_poly = r2_score(y_test, regressor_poly.predict(poly_features.transform(X_test)))
r2_poly

0.9431256992761239

# Decision Tree Regresion

In [61]:
from sklearn.tree import DecisionTreeRegressor

regressor_tree = DecisionTreeRegressor()
regressor_tree.fit(X_train, y_train)

In [62]:
r2_tree = r2_score(y_test, regressor_tree.predict(X_test))
r2_tree

0.9386160443770943

# Random Fores Regression

In [63]:
from sklearn.ensemble import RandomForestRegressor

regressor_forest = RandomForestRegressor(n_estimators= 10)
regressor_forest.fit(X_train, y_train)

In [64]:
r2_forest = r2_score(y_test, regressor_forest.predict(X_test))
r2_forest

0.9610379545247809

# Evaluating Regression Models

In [66]:
[r2_lin, r2_poly, r2_tree, r2_forest]

[0.9310033919202232,
 0.9431256992761239,
 0.9386160443770943,
 0.9610379545247809]

In [69]:
pd.DataFrame(data = [[r2_lin, r2_poly, r2_tree, r2_forest]], columns=["r2_lin", "r2_poly", "r2_tree", "r2_forest"])

Unnamed: 0,r2_lin,r2_poly,r2_tree,r2_forest
0,0.931003,0.943126,0.938616,0.961038
