In [None]:
import numpy as np 
import pandas as pd 
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import math
import seaborn as sns
import matplotlib.pyplot as plt

from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, HistGradientBoostingRegressor, VotingRegressor, StackingRegressor, ExtraTreesRegressor
from lightgbm import LGBMRegressor
from sklearn.naive_bayes import GaussianNB


In [None]:
model_path = ""


# Read Dataset

In [None]:

csv1 = "solar_dataset.csv"
df = pd.read_csv(csv1)
df.head(2)

# Drop Dimentions as per Analysis

In [None]:
df = df[df['Power'] >= 500] 

In [None]:
column_to_drop=['Dew_Point_Avg',
 'Rain_Accu_mm',
 'Atmospheric_Pressure_QNH_Avg',
 'Atmospheric_Pressure_QFE_Avg',
 'Direct_Theoretical_Radiation_Avg',
 'Horizontal_Solar_Radiation_Avg',
 'Global_Energy_Avg',
 'Diffuse_Energy_Avg',
 'Direct_Energy_Avg',
 'Wind_Direction_Avg']
df=df.drop(column_to_drop,axis=1)

In [None]:
df.columns

In [None]:
df.shape

# Spilt Data

In [None]:
def split_data(df):
    X = df.drop(columns=['Power'])
    pac = df['Power'].values
    train_size = int(0.8*len(pac)) ##80% train 
    X_train ,  X_test  =   X.iloc[:train_size,:],   X.iloc[train_size:,:];
    pac_train, pac_test= pac[:train_size],   pac[train_size:];    
    ix = pac_test > 0
    pac_test = pac_test[ix]
    X_test = X_test[ix]
    return X_train, X_test, pac_train, pac_test;

X_train, X_test, y_train, y_test = split_data(df)

In [None]:
RF_f=RandomForestRegressor(bootstrap=False, max_depth=60, max_features='sqrt', min_samples_split=4, n_estimators=1700)
RF_f.fit(X_train,y_train)

In [None]:
XT=ExtraTreesRegressor(n_estimators=1000,random_state=73)
XT.fit(X_train,y_train)

In [None]:
Ada_f=AdaBoostRegressor(learning_rate=0.03, loss='exponential', n_estimators=2300, random_state=73)
Ada_f.fit(X_train,y_train)


In [None]:
GB_f=GradientBoostingRegressor(max_depth=10, max_features='sqrt',
                          min_samples_split=12, n_estimators=1500)
GB_f.fit(X_train,y_train)


In [None]:
HGB_f=HistGradientBoostingRegressor(learning_rate=0.02, loss='least_absolute_deviation', max_depth=40, max_iter=750, min_samples_leaf=2, random_state=73)
HGB_f.fit(X_train,y_train)

In [None]:
XGB=XGBRegressor(n_estimators=1200,learning_rate=0.05,random_state=73)
XGB.fit(X_train,y_train)


In [None]:
LGBg_f=LGBMRegressor(objective='regression',num_leaves=5,  learning_rate=0.05, n_estimators=720, max_bin = 55, bagging_fraction = 0.8,
                              bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,
                              min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
LGBg_f.fit(X_train,y_train)

In [None]:
estimators=[('lgbgf',LGBg_f),('xt',XT),('xgb',XGB),('hgb',HGB_f)]
Stack=StackingRegressor(estimators=estimators,final_estimator=HGB_f)
Stack.fit(X_train,y_train)


In [None]:
Vote=VotingRegressor([('lgbgf',LGBg_f),('xt',XT),('xgb',XGB),('hgb',HGB_f)])
Vote.fit(X_train,y_train)

# Save the Models

In [None]:


models={"Ada_f":Ada_f,"XT":XT,"RF_f":RF_f,"HGB_f":HGB_f,"XGB":XGB,"GB_f":GB_f,"LGBg_f":LGBg_f,"Vote":Vote,"Stack":Stack}

for key,model in models.items():
  filename = path + str(key) + ".sav" 
  joblib.dump(model, filename)  
  print(key)