In [1]:
import numpy as np
import pandas as pd

In [2]:
data=pd.read_csv('data.csv')

In [3]:
data.head()

Unnamed: 0,Ambient Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,electrical energy output
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [4]:
data.shape

(9568, 5)

In [5]:
data.isnull().sum()

Ambient Temperature         0
Exhaust Vacuum              0
Ambient Pressure            0
Relative Humidity           0
electrical energy output    0
dtype: int64

In [6]:
data.describe()

Unnamed: 0,Ambient Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,electrical energy output
count,9568.0,9568.0,9568.0,9568.0,9568.0
mean,19.651231,54.305804,1013.259078,73.308978,454.365009
std,7.452473,12.707893,5.938784,14.600269,17.066995
min,1.81,25.36,992.89,25.56,420.26
25%,13.51,41.74,1009.1,63.3275,439.75
50%,20.345,52.08,1012.94,74.975,451.55
75%,25.72,66.54,1017.26,84.83,468.43
max,37.11,81.56,1033.3,100.16,495.76


In [7]:
data.rename(columns={'Ambient Temperature':'ambient_temperature','Exhaust Vacuum':'exhaust_vacuum','Relative Humidity':'relative_humidity','electrical energy output':'electrical_energy_output'},inplace=True)

In [8]:
data.corr()

Unnamed: 0,ambient_temperature,exhaust_vacuum,Ambient Pressure,relative_humidity,electrical_energy_output
ambient_temperature,1.0,0.844107,-0.507549,-0.542535,-0.948128
exhaust_vacuum,0.844107,1.0,-0.413502,-0.312187,-0.86978
Ambient Pressure,-0.507549,-0.413502,1.0,0.099574,0.518429
relative_humidity,-0.542535,-0.312187,0.099574,1.0,0.389794
electrical_energy_output,-0.948128,-0.86978,0.518429,0.389794,1.0


In [10]:
pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/24/ec/ad387100fa3cc2b9b81af0829b5ecfe75ec5bb19dd7c19d4fea06fb81802/xgboost-2.0.3-py3-none-win_amd64.whl.metadata
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.1/99.8 MB 1.1 MB/s eta 0:01:32
   ---------------------------------------- 0.5/99.8 MB 5.6 MB/s eta 0:00:18
    --------------------------------------- 2.2/99.8 MB 13.8 MB/s eta 0:00:08
   - -------------------------------------- 3.7/99.8 MB 18.1 MB/s eta 0:00:06
   -- ------------------------------------- 5.4/99.8 MB 23.1 MB/s eta 0:00:05
   -- ------------------------------------- 6.8/99.8 MB 24.0 MB/s eta 0:00:04
   --- -----------------------------------

In [11]:
import xgboost as xgb
from xgboost import XGBRegressor

In [12]:
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

In [13]:
models={
    'Linear_Regression':LinearRegression(),
    'Support_Vector_Regression':SVR(),
    'Random_Forest_Regression':RandomForestRegressor(n_estimators=12,random_state=42),
    'Ridge':Ridge(),
    'XGB_Regressor':XGBRegressor()
}

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [15]:
import matplotlib.pyplot as plt

In [16]:
def show_plot(x,y):
    plt.figure(figsize=(8, 6))
    plt.scatter(x,y,color='red')
    plt.gca().set_facecolor('#E5E5E5')
    plt.grid(True,linestyle='--',alpha=0.6)
    plt.show()

In [17]:
def model_selection(x,y,t_size,r_state):
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=t_size,random_state=r_state)
    for model_name,model in models.items():
        regression=model
        regression.fit(x_train,y_train)
        train_pred=regression.predict(x_train)
        print('Analyzing =>',model_name)
        print('R2 Score of training dataset')
        print(r2_score(y_train,train_pred))
        print('Visualizing Predictions of training data')
        #show_plot(x,y,y_train,train_pred)
        test_pred=regression.predict(x_test)
        print('R2 Score of test dataset')
        print(r2_score(y_test,test_pred))
        print('Visualizing Predictions of test data')
        #show_plot(x,y,y_test,test_pred)
        print()
        print('------------------------------------------------------------')
        print()

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
scaler=StandardScaler()

In [20]:
x=data.iloc[:,:-1]
y=data.iloc[:,-1]

In [21]:
x_scaler=scaler.fit_transform(x)

In [22]:
model_selection(x_scaler,y,.2,42)

Analyzing => Linear_Regression
R2 Score of training dataset
0.928331545565795
Visualizing Predictions of training data
R2 Score of test dataset
0.9301046431962188
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Support_Vector_Regression
R2 Score of training dataset
0.9407824561073126
Visualizing Predictions of training data
R2 Score of test dataset
0.942164272573348
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Random_Forest_Regression
R2 Score of training dataset
0.992797708897883
Visualizing Predictions of training data
R2 Score of test dataset
0.9596912789204433
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Ridge
R2 Score of training dataset
0.9283314805158668
Visualizing Predictions of training data
R2 Score of test dataset
0.930108492047272
Visualizing Predictions of test data

-------

In [23]:
model_selection(x,y,.2,42)

Analyzing => Linear_Regression
R2 Score of training dataset
0.928331545565795
Visualizing Predictions of training data
R2 Score of test dataset
0.9301046431962188
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Support_Vector_Regression
R2 Score of training dataset
0.3842212521437096
Visualizing Predictions of training data
R2 Score of test dataset
0.38951004564542047
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Random_Forest_Regression
R2 Score of training dataset
0.9927767311506587
Visualizing Predictions of training data
R2 Score of test dataset
0.9598052433445434
Visualizing Predictions of test data

------------------------------------------------------------

Analyzing => Ridge
R2 Score of training dataset
0.9283315455428218
Visualizing Predictions of training data
R2 Score of test dataset
0.9301047172736033
Visualizing Predictions of test data

---

In [24]:
def make_prediction(model):
    test_cases=[
        [11.04,41.74,1022.6,77.51],
        [14.96,41.76,1024.07,73.17],
        [25.18,62.96,1020.04,59.08],
        [5.11,39.4,1012.16,92.14]
    ]
    for test_case in test_cases: print(model.predict(np.array(test_case).reshape(1,4)))

In [25]:
def model_creation(regression,x,y):
    model=regression
    model.fit(x,y)
    make_prediction(model)

In [28]:
model_creation(RandomForestRegressor(n_estimators=20,random_state=42),x,y)

[476.6695]
[463.6315]
[444.1005]
[488.0845]


In [27]:
import warnings
warnings.filterwarnings('ignore')

In [29]:
model_creation(XGBRegressor(),x,y)

[476.88724]
[463.63425]
[444.67712]
[487.82715]
