## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [4]:
from sklearn.linear_model import LinearRegression
regressor_mlr = LinearRegression()
regressor_mlr.fit(X_train, y_train)

## Predicting the Test set results for Multiple Linear Regression



In [5]:
y_pred_mlr = regressor_mlr.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_mlr.reshape(len(y_pred_mlr),1), y_test.reshape(len(y_test),1)),1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


## Training the Polynomial Regression model on the Training set

In [6]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor_pr = LinearRegression()
regressor_pr.fit(X_poly, y_train)

## Predicting the Test set results for Polynomial Regression

In [7]:
y_pred_pr = regressor_pr.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_pr.reshape(len(y_pred_pr),1), y_test.reshape(len(y_test),1)),1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.66 463.28]]


## Feature Scaling for SVR

In [9]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_scaled = sc_X.fit_transform(X_train)
y_train_scaled = sc_y.fit_transform(y_train.reshape(-1, 1))

## Training the SVR model on the Training set

In [10]:
from sklearn.svm import SVR
regressor_svr = SVR(kernel = 'rbf')
regressor_svr.fit(X_train_scaled, y_train_scaled)

  y = column_or_1d(y, warn=True)


## Predicting the Test set results for SVR

In [11]:
y_pred_svr = sc_y.inverse_transform(regressor_svr.predict(sc_X.transform(X_test)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_svr.reshape(len(y_pred_svr),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


## Training the Decision Tree Regression model on the Training set

In [12]:
from sklearn.tree import DecisionTreeRegressor
regressor_dt = DecisionTreeRegressor(random_state = 0)
regressor_dt.fit(X_train, y_train)

## Predicting the Test set results for Descision Tree

In [13]:
y_pred_dt = regressor_dt.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_dt.reshape(len(y_pred_dt),1), y_test.reshape(len(y_test),1)),1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


## Training the Random Forest Regression model on the Training dataset

In [14]:
from sklearn.ensemble import RandomForestRegressor
regressor_rf = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor_rf.fit(X_train, y_train)

## Predicting the Test set results for Random Forest

In [15]:
y_pred_rf = regressor_rf.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_rf.reshape(len(y_pred_rf),1), y_test.reshape(len(y_test),1)),1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


## Evaluating the Model Performances

In [16]:
from sklearn.metrics import r2_score
r2score_mlr = r2_score(y_test, y_pred_mlr)
r2score_pr = r2_score(y_test, y_pred_pr)
r2score_svr = r2_score(y_test, y_pred_svr)
r2score_dt = r2_score(y_test, y_pred_dt)
r2score_rf = r2_score(y_test, y_pred_rf)

In [17]:
model_scores = {
    "Multiple Linear Regression (MLR)": r2score_mlr,
    "Polynomial Regression (PR)": r2score_pr,
    "Support Vector Regression (SVR)": r2score_svr,
    "Decision Tree (DT)": r2score_dt,
    "Random Forest (RF)": r2score_rf
}
print("Model Performance (R2 Scores):")
for model, score in model_scores.items():
    print(f"{model}: {score:.4f}")


Model Performance (R2 Scores):
Multiple Linear Regression (MLR): 0.9325
Polynomial Regression (PR): 0.9458
Support Vector Regression (SVR): 0.9481
Decision Tree (DT): 0.9229
Random Forest (RF): 0.9616


In [18]:
best_model = max(model_scores, key=model_scores.get)
best_score = model_scores[best_model]
print("\nBest Performing Model:")
print(f"{best_model} with an R2 score of {best_score:.4f}")


Best Performing Model:
Random Forest (RF) with an R2 score of 0.9616
