# Regression Model Selection

### Importing the libaries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

### Importing the dataset

In [2]:
dataset = pd.read_csv('Data.csv')
dataset.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB


In [4]:
dataset.isna().sum()

AT    0
V     0
AP    0
RH    0
PE    0
dtype: int64

**Note:** please refer to the preprocessing toolkit if necessary.

# Multiple Linear Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [5]:
y_pred = regressor.predict(X_test)

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9325315554761303

# Polynomial Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_train = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [5]:
X_test = poly_reg.transform(X_test)
y_pred = regressor.predict(X_test)

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9458193390165572

# Support Vector Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values.reshape(-1, 1)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [5]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train.ravel())

SVR()

In [6]:
X_test = sc_X.fit_transform(X_test)
y_pred = regressor.predict(X_test)

In [7]:
from sklearn.metrics import r2_score
y_pred = sc_y.inverse_transform(y_pred)
r2_score(y_test, y_pred)

0.9483693304317924

# Decision Tree Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

In [5]:
y_pred = regressor.predict(X_test)

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.922905874177941

# Random Forest Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(max_features='log2', random_state=0)
regressor.fit(X_train, y_train)

RandomForestRegressor(max_features='log2', random_state=0)

In [5]:
y_pred = regressor.predict(X_test)

In [6]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9670697512311753