In [1]:
import pandas as pd
from pandas import read_csv

# dataset read and preprocessing
path = "sample_data/train.csv"
data = pd.read_csv(path)

data=data.drop(["tracking_id","datetime","turbine_status","cloud_level"],axis=1)
data.dropna(inplace=True)

X=data.drop("windmill_generated_power(kW/h)",axis=1)
Y=data["windmill_generated_power(kW/h)"]

# Feature selection
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
rfe = RFE(estimator=DecisionTreeRegressor(), n_features_to_select=7)
Feature=rfe.fit(X,Y)

# removing false features
shortlist=[]
for i in range(17):
  if(Feature.support_[i]==False):
    shortlist.append(X.columns[i])
shortlist 
X=X.drop(shortlist,axis=1)

# train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2, random_state=2)


INDIVIDUAL SCORES OF ALL ALGORITHMS

In [2]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
import lightgbm as lgb
from catboost import CatBoostRegressor
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

# ADABoost
ADAboost = AdaBoostRegressor(DecisionTreeRegressor(),n_estimators=100,learning_rate=1)
ADAboost.fit(X_train, y_train)
print('ADABoost : '+ str(ADAboost.score(X_test, y_test)))

# LGBM
lgbm = lgb.LGBMRegressor(boosting_type='gbdt',learning_rate=0.1,feature_fraction=0.9)
lgbm.fit(X_train, y_train)
print('LGBMoost : '+ str(lgbm.score(X_test, y_test)))

# CB
catB = CatBoostRegressor(verbose=False)
catB.fit(X_train,y_train,verbose=False, plot=True)
print('CatBoost : '+ str(catB.score(X_test, y_test)))

# XGBoost
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', learning_rate = 0.4, n_estimators = 100,seed=13)
xg_reg.fit(X_train, y_train)
print('XGBoost : '+ str(xg_reg.score(X_test, y_test)))

# RF
rf= RandomForestRegressor(n_estimators= 100)  
rf.fit(X_train, y_train)
print('Random Forest : '+ str(rf.score(X_test, y_test)))

# DT
dt = DecisionTreeRegressor()
dt.fit(X_train,y_train)
print('DT : '+ str(dt.score(X_test, y_test)))

# GB
gb = GradientBoostingRegressor(n_estimators=500, learning_rate=1, max_depth=1)
gb.fit(X_train, y_train)
print('GB : '+ str(gb.score(X_test, y_test)))

ADABoost : 0.9411407041202939
LGBMoost : 0.9426259416282259


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

CatBoost : 0.947228694408439
XGBoost : 0.9436688663803452
Random Forest : 0.9411319885776612
DT : 0.9145767546488857
GB : 0.9131815392575608


STACKING NEW
(L0 : CB, RF, ADAB, LGBM)
(L1 : XGB)

In [3]:
from sklearn.ensemble import StackingRegressor

list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('lgbm',lgbm)]
level1 = xg_reg
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.9508482063246649


STACKING NEW
(L0 : CB, XGB, ADAB, LGBM)
(L1 : RF)

In [4]:
from sklearn.ensemble import StackingRegressor

list0=[('CB', catB),('XGB', xg_reg),('ADAB',ADAboost),('lgbm',lgbm)]
level1 = rf
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.9457774276841668


STACKING NEW
(L0 : CB, XGB, ADAB, RF)
(L1 : LGBM)

In [5]:
from sklearn.ensemble import StackingRegressor

list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('XGB',xg_reg)]
level1 = lgbm
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.9474025625891626


STACKING NEW
(L0 : CB, LGBM, ADAB, RF)
(L1 : Ridge)

In [6]:
from sklearn.linear_model import Ridge
from sklearn.ensemble import StackingRegressor

ridgeR = Ridge(alpha = 1)
list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('LGBM',lgbm)]
level1 = ridgeR
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.9465633587606096


STACKING NEW
(L0 : CB, LGBM, ADAB, RF)
(L1 : Lasso)

In [7]:
from sklearn.linear_model import Lasso
 
lasso = Lasso(alpha = 1)
list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('LGBM',lgbm)]
level1 = lasso
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.926503754821175


STACKING NEW
(L0 : CB, LGBM, ADAB, RF)
(L1 : ElasticNet)

In [8]:
from sklearn.linear_model import ElasticNet

e_net = ElasticNet(alpha = 1)
list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('LGBM',lgbm)]
level1 = e_net
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.9392181090973569


STACKING NEW
(L0 : CB, LGBM, ADAB, RF)
(L1 : LR)

In [9]:
from sklearn.linear_model import LinearRegression

LR = LinearRegression()
list0=[('CB', catB),('RF', rf),('ADAB',ADAboost),('LGBM',lgbm)]
level1 = LR
model = StackingRegressor(estimators=list0, final_estimator=level1, cv=5)
print('Stacking : '+ str(model.fit(X_train, y_train).score(X_test, y_test)))

Stacking : 0.946667778159189
