# **Regression Model Selection**

Name: Seanrei Ethan M. Valdeabella

Date: January 17, 2025

# **1. Multiple Linear Regression**

In [None]:
#@title Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Importing the Dataset
dataset = pd.read_csv("Data.csv")

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
#@title Splitting the Dataset into Training Set and Test Set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [None]:
#@title Training the Whole Dataset
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
#@title Predicting the Test Set Results
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


In [None]:
#@title Evaluating the Model's Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9325315554761303

# **2. Polynomial Regression**

In [None]:
#@title Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Importing the Dataset
dataset = pd.read_csv("Data.csv")

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
#@title Splitting the Dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#@title Training the Polynomial Regression model on the Training Set
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

pol_reg = PolynomialFeatures(degree=4)
X_poly = pol_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

In [None]:
#@title Predicting the Test Set Results

y_pred = regressor.predict(pol_reg.transform(X_test))
np.printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.16 431.23]
 [458.26 460.01]
 [460.72 461.14]
 ...
 [469.49 473.26]
 [438.53 438.  ]
 [461.62 463.28]]


In [None]:
#@title Evaluating the Model's Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9455261540927579

# **3. Support Vector Regression**

In [None]:
#@title Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Importing the Dataset

dataset = pd.read_csv("Data.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1:].values

In [None]:
#@title Splitting the Dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#@title Feature Scaling
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()

X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [None]:
#@title Training the SVR model on the Training Set
from sklearn.svm import SVR

regressor = SVR(kernel="rbf")
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [None]:
#@title Predicting the Test Set Results
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


In [None]:
#@title Evaluating the Model's Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9480784049986258

# **4. Decision Tree**

In [None]:
#@title Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Importing the Dataset

dataset = pd.read_csv("Data.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1:].values

In [None]:
#@title Splitting the Dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#@title Training the Decision Tree Regression model on the Training Set
from sklearn.tree import DecisionTreeRegressor

regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)

In [None]:
#@title Predicting the Test Set Results

y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


In [None]:
#@title Evaluating the Model's Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.922905874177941

# **5. Random Forest Regression**

In [None]:
#@title Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#@title Importing the Dataset

dataset = pd.read_csv("Data.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1:].values

In [None]:
#@title Splitting the Dataset into the Training Set and Test Set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#@title Training the Decision Tree Regression model on the Training Set
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [None]:
#@title Predicting the Test Set Results
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.1  431.23]
 [458.81 460.01]
 [463.01 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


In [None]:
#@title Evaluating the Model's Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9616644273498386