In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score

In [67]:
# STEP 1: Data Preprocessing
df = pd.read_csv('Data.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [68]:
# STEP 2: Regression model fitting
# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Polynomial Regression
poly_reg = PolynomialFeatures(degree=4)
X_train_poly = poly_reg.fit_transform(X_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_train_poly, y_train)

# Support Vector Regression
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_scaled = sc_X.fit_transform(X_train)
y_train_scaled = sc_y.fit_transform(y_train.reshape(-1, 1))
svr_reg = SVR(kernel='rbf')
svr_reg.fit(X_train_scaled, y_train_scaled)

# Decision Tree Regression
dt_reg = DecisionTreeRegressor(random_state=0)
dt_reg.fit(X_train, y_train)

# Random Forest Regression
rf_reg = RandomForestRegressor(n_estimators=10, random_state=0)
rf_reg.fit(X_train, y_train)



  return f(*args, **kwargs)


RandomForestRegressor(n_estimators=10, random_state=0)

In [69]:
# STEP 3: Performance evaluation on training set
# regressor.score(X,y) secara otomatis menghitung Rsq dari y dan y_pred, dimana y_pred = regressor.predict(X) 
lin_rsq = lin_reg.score(X_train, y_train)
poly_rsq = lin_reg_2.score(X_train_poly, y_train)
svr_rsq = svr_reg.score(X_train_scaled, y_train_scaled)
dt_rsq = dt_reg.score(X_train, y_train)
rf_rsq = rf_reg.score(X_train, y_train)

print(f'Linear Regression Rsq        : {lin_rsq}')
print(f'Polynomial Regression Rsq    : {poly_rsq}')
print(f'Support Vector Regression Rsq: {svr_rsq}')
print(f'Decision Tree Regression Rsq : {dt_rsq}')
print(f'Random Forest Regression Rsq : {rf_rsq}')

Linear Regression Rsq        : 0.9277253998587902
Polynomial Regression Rsq    : 0.9418288594744669
Support Vector Regression Rsq: 0.9451810801207933
Decision Tree Regression Rsq : 1.0
Random Forest Regression Rsq : 0.9920809229378007


In [70]:
# STEP 4: Performance evaluation on test set
lin_reg_pred = lin_reg.predict(X_test)
poly_reg_pred = lin_reg_2.predict(poly_reg.fit_transform(X_test))
svr_reg_pred = sc_y.inverse_transform(svr_reg.predict(sc_X.transform(X_test)))
dt_reg_pred = dt_reg.predict(X_test)
rf_reg_pred = rf_reg.predict(X_test)

lin_rsq_test = lin_reg.score(X_test, y_test)
poly_rsq_test = lin_reg_2.score(poly_reg.fit_transform(X_test), y_test)
svr_rsq_test = svr_reg.score(sc_X.transform(X_test), sc_y.transform(y_test.reshape(-1, 1)))
dt_rsq_test = dt_reg.score(X_test, y_test)
rf_rsq_test = rf_reg.score(X_test, y_test)

print(f'Linear Regression Rsq        : {lin_rsq_test}')
print(f'Polynomial Regression Rsq    : {poly_rsq_test}')
print(f'Support Vector Regression Rsq: {svr_rsq_test}')
print(f'Decision Tree Regression Rsq : {dt_rsq_test}')
print(f'Random Forest Regression Rsq : {rf_rsq_test}')


[[431.23 460.01 461.14 ... 473.26 438.   463.28]]
Linear Regression Rsq        : 0.9325315554761303
Polynomial Regression Rsq    : 0.9458193624590466
Support Vector Regression Rsq: 0.948078404998626
Decision Tree Regression Rsq : 0.922905874177941
Random Forest Regression Rsq : 0.9615908334363876
