In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

import re

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif

from sklearn.model_selection import train_test_split

import xgboost as xgb

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential

from sklearn.metrics import mean_squared_error, r2_score, median_absolute_error

import warnings
warnings.filterwarnings('ignore')


ModuleNotFoundError: No module named 'numpy'

In [2]:
df=pd.read_csv("SolarPrediction.csv")


In [3]:
df['Data']=df['Data'].apply(lambda x: x.split()[0])

In [4]:
df['Month']=pd.to_datetime(df['Data']).dt.month
df['Day']=pd.to_datetime(df['Data']).dt.day
df['Year']=pd.to_datetime(df['Data']).dt.year
df['Hour']=pd.to_datetime(df['Time']).dt.hour
df['Minute']=pd.to_datetime(df['Time']).dt.minute
df['Second']=pd.to_datetime(df['Time']).dt.second

In [5]:
df['SunriseHour']=df['TimeSunRise'].apply(lambda x: re.search(r'^\d+',x).group(0)).astype(int)
df['SunriseMinute']=df['TimeSunRise'].apply(lambda x: re.search(r':(\d+)',x).group(1)).astype(int)
df['SunsetHour']=df['TimeSunSet'].apply(lambda x: re.search(r':(\d+)',x).group(1)).astype(int)
df['SunsetMinute']=df['TimeSunSet'].apply(lambda x: re.search(r':(\d+)',x).group(1)).astype(int)

In [6]:
df.drop(['UNIXTime','Data','Time','TimeSunRise','TimeSunSet'],axis=1,inplace=True)

In [None]:
df.isnull().sum().sum()

In [8]:
input_features=df.drop(['Radiation'],axis=1)
output_features=df['Radiation']

In [None]:
cor=df.corr()
plt.figure(figsize=(10,10))
sns.heatmap(cor,annot=True)
plt.show()

In [10]:
del df

In [11]:
output_features=output_features.apply(lambda x: int(x*100))

In [12]:
scalar=MinMaxScaler()
scaled_input_features=scalar.fit_transform(input_features)
input_features_columns=input_features.columns

In [13]:
feature_selector= SelectKBest(chi2,k='all')
selected_features=feature_selector.fit(scaled_input_features,output_features)

In [None]:
imp=pd.DataFrame(selected_features.scores_,index=input_features_columns,columns=['Importance'])
imp.sort_values(by='Importance',ascending=False, inplace=True)
imp

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(imp.Importance)
plt.xticks(rotation=90)
plt.show()

In [None]:
model=ExtraTreesClassifier(n_estimators=20,verbose=2, n_jobs=-1)
model.fit(scaled_input_features,output_features)

In [None]:
imp=pd.DataFrame(model.feature_importances_,index=input_features_columns,columns=['Importance'])
imp.sort_values(by='Importance',ascending=False,inplace=True)
imp

In [None]:
plt.bar(imp.index,imp['Importance'])
plt.xticks(rotation=90)
plt.show()

In [19]:
#feature Engineering

In [None]:
input_features.drop(['Year','SunriseHour'],axis=1)
pd.DataFrame(input_features)


In [None]:
transform_features=['Temperature','Pressure','Humidity','WindDirection(Degrees)','Speed']
for i in transform_features:
    fig, (ax1,ax2,ax3,ax4,ax5)= plt.subplots(5,1, figsize=(10,6))
    input_features[i].hist(ax=ax1, bins=50)
    ((input_features[i]+1).transform(np.log)).hist(ax=ax2, bins=50)
    pd.DataFrame(stats.boxcox(input_features[i]+1)[0]).hist(ax=ax3, bins=50)
    pd.DataFrame(MinMaxScaler().fit_transform(pd.DataFrame(input_features[i]))).hist(ax=ax4, bins=50)    
    pd.DataFrame(StandardScaler().fit_transform(pd.DataFrame(input_features[i]))).hist(ax=ax5, bins=50)    
    ax1.set_ylabel('Normal')
    ax2.set_ylabel('Log')
    ax3.set_ylabel('Boxcox')
    ax4.set_ylabel('MinMax')
    ax5.set_ylabel('Standard')
    ax1.set_title(i)

In [44]:
transform={'Temperture':(input_features["Temperature"]+1).transform(np.log),
           'Speed': (input_features['Speed']+1).transform(np.log),
           'WindDirection(Degrees)': MinMaxScaler().fit_transform(pd.DataFrame(input_features['WindDirection(Degrees)'])),
           'Pressure': stats.boxcox(input_features['Pressure']+1)[0],
           'Humidity': stats.boxcox(input_features['Humidity']+1)[0]}
           

In [45]:
for i in transform:
    input_features[i]=transform[i]

In [46]:
xTrain, xTest, yTrain, yTest=train_test_split(input_features,output_features, test_size=0.3)

In [None]:
pd.DataFrame(xTrain)

In [48]:
scaler=StandardScaler()
xTrain=scaler.fit_transform(xTrain)
xTest=scaler.transform(xTest)

#XGBoost

In [None]:
from xgboost import XGBRegressor

model=XGBRegressor(learning_rate=0.1,max_depth=8)
model.fit(xTrain,yTrain)

In [50]:
y_pred=model.predict(xTest)

In [None]:
rmse=np.sqrt(mean_squared_error(yTest,y_pred))
r2=r2_score(yTest,y_pred)

print(f"rmse : {rmse:.2f}")
print(f"r2 : {r2:.2f}")

#Multilayer Perceptron Model

In [None]:
yTrain.apply(lambda x: x/100)

In [None]:

yTest.apply(lambda x: x/100)

In [54]:
model=Sequential([
    Dense(123,activation='relu',input_dim=16),
    Dropout(0.33),
    Dense(64,activation='relu'),
    Dropout(0.33),
    Dense(32,activation='relu'),
    Dropout(0.33),
    Dense(1)
])
    


In [55]:
model.compile(optimizer='adam',loss='mse',metrics=['mse'])

In [None]:
model.summary()

In [None]:
history=model.fit(xTrain,yTrain,validation_split=0.1,epochs=20,batch_size=32)

In [None]:
fit=history.history
for i in fit:
    plt.plot(fit[i])
    plt.show()