# **Multiple Linear Regression**

In [4]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# Importing the dataset
dataset=pd.read_csv("/kaggle/input/airpressure/Folds5x2_pp.csv")
x=dataset.iloc[:, :-1].values
y=dataset.iloc[:,-1].values 
print(dataset)

         AT      V       AP     RH      PE
0      8.34  40.77  1010.84  90.01  480.48
1     23.64  58.49  1011.40  74.20  445.75
2     29.74  56.90  1007.15  41.91  438.76
3     19.07  49.69  1007.22  76.79  453.09
4     11.80  40.66  1017.13  97.20  464.43
...     ...    ...      ...    ...     ...
9563  15.12  48.92  1011.80  72.93  462.59
9564  33.41  77.95  1010.30  59.72  432.90
9565  15.99  43.34  1014.20  78.66  465.96
9566  17.65  59.87  1018.58  94.65  450.93
9567  23.68  51.30  1011.86  71.24  451.67

[9568 rows x 5 columns]


In [6]:
#Check for missing values
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9568 entries, 0 to 9567
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AT      9568 non-null   float64
 1   V       9568 non-null   float64
 2   AP      9568 non-null   float64
 3   RH      9568 non-null   float64
 4   PE      9568 non-null   float64
dtypes: float64(5)
memory usage: 373.9 KB
None


In [7]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)

In [8]:
# Training the Multiple Linear Regression model on the Training set
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(x_train,y_train)

In [9]:
# Predicting the Test set results
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[459.36 458.92]
 [433.94 430.55]
 [474.87 473.85]
 ...
 [445.7  442.69]
 [445.4  447.31]
 [434.67 429.42]]


In [10]:
# Predicting a paticular result
regressor.predict([[8.34, 40.77, 1010.84, 90.01]])

array([477.05])

In [11]:
# Evaluating the Model Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.931485529163037

# **Polynomial Regression**

In [12]:
# Training the Polynomial Regression model on the Training set
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
x_poly = poly_reg.fit_transform(x_train)
regressor_2 = LinearRegression()
regressor_2.fit(x_poly, y_train)

In [13]:
# Predicting the Test set results
y_pred = regressor_2.predict(poly_reg.transform(x_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[459.05 458.92]
 [435.62 430.55]
 [474.37 473.85]
 ...
 [445.46 442.69]
 [443.85 447.31]
 [435.79 429.42]]


In [14]:
# Predicting a paticular result
regressor_2.predict(poly_reg.fit_transform([[8.34, 40.77, 1010.84, 90.01]]))

array([480.18])

In [15]:
# Evaluating the Model Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9446120068088645

# **Support Vector Regression (SVR)**

In [37]:
# Importing the dataset
dataset = pd.read_csv('/kaggle/input/airpressure/Folds5x2_pp.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
y = y.reshape(len(y),1)

In [38]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [39]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
sc_y = StandardScaler()
x_train = sc_x.fit_transform(x_train)
y_train = sc_y.fit_transform(y_train)

In [40]:
# Training the SVR model on the Training set
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


In [41]:
# Predicting the Test set results
y_pred = sc_y.inverse_transform(regressor.predict(sc_x.transform(x_test)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.22 426.18]
 [448.88 451.1 ]
 [444.04 442.87]
 ...
 [455.18 454.2 ]
 [447.12 444.13]
 [432.72 436.58]]


In [42]:
# Predicting a paticular result
regressor_2.predict(poly_reg.fit_transform([[8.34, 40.77, 1010.84, 90.01]]))

array([480.18])

In [43]:
# Evaluating the Model Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.945384075850576

# **Decision Tree Regression**

In [56]:
# Importing the dataset
dataset = pd.read_csv('/kaggle/input/airpressure/Folds5x2_pp.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [57]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [52]:
# Training the Decision Tree Regression model on the Training set
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(x_train, y_train)

In [53]:
# Predicting the Test set results
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.83 426.18]
 [455.23 451.1 ]
 [441.43 442.87]
 ...
 [454.25 454.2 ]
 [444.23 444.13]
 [437.14 436.58]]


In [54]:
# Predicting a paticular result
regressor_2.predict(poly_reg.fit_transform([[8.34, 40.77, 1010.84, 90.01]]))

array([480.18])

In [55]:
# Evaluating the Model Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9358153652260122

# **Random Forest Regression**

In [58]:
# Training the Random Forest Regression model on the whole dataset
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(x_train, y_train)

In [59]:
# Predicting the Test set results
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.77 426.18]
 [448.62 451.1 ]
 [441.1  442.87]
 ...
 [454.33 454.2 ]
 [446.42 444.13]
 [435.72 436.58]]


In [60]:
# Predicting a paticular result
regressor_2.predict(poly_reg.fit_transform([[8.34, 40.77, 1010.84, 90.01]]))

array([480.18])

In [61]:
# Evaluating the Model Performance
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9600247281286117