# Import required Libraries And Dataset

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import math
import matplotlib
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from IPython.core.pylabtools import figsize
import seaborn as sns

In [None]:
data_path="solar_dataset.csv"
power_threshold=500
tt_ratio=0.8

Read DataSet (Provide location of the dataset)

In [None]:


csv_file = data_path
df = pd.read_csv(csv_file)


 Print the dataframe to check the structure

In [None]:
df.head(5)

Set a Threshold of Power as data below a certain threshold affects the prediction

In [None]:
df = df[df['Power'] >= power_threshold] 

In [None]:
df.columns

In [None]:

columns_to_drop_1=['Timestamp','Solar_Elevation_Avg','Day', 'Direct_Radiation_Integr', 'Direct_Radiation_Ins','Solar_Azimuth_Ins','Solar_Elevation_Ins',
            'Inclined_Solar_Radiation_Integr','Inclined_Solar_Radiation_Ins', 'Horizontal_Solar_Radiation_Integr','Horizontal_Solar_Radiation_Ins',
            'Diffuse_Radiation_Integr','Diffuse_Radiation_Ins','Global_Radiation_Integr',  'Global_Radiation_Ins',  'Wind_Direction_Max', 'Wind_Direction_Sdev', 'Wind_Speed_Max', 'Wind_Speed_Sdev','Air_Temperature_Max', 'Air_Temperature_Min',
            'Battery_Level_Avg','Pyrheliometer_Error_Avg','Geotrac3K_Status_Logic_OR_Status',  'Access_Control_Logic_OR_Status']

df=df.drop(columns_to_drop_1,axis=1)

In [None]:
df.columns

In [None]:

df.rename(columns = {'Horizontal_Solar_Radiation_Avg':'HSR_Avg','Relative_Humidity_Avg':'RH_Avg','Rain_Accu_mm':'RA_mm',
                     'Air_Temperature_Avg':'AT_Avg','Wind_Speed_Avg':'WS_Avg','Wind_Direction_Avg':'WD_Avg','Dew_Point_Avg':'DP_Avg',
                    'Global_Radiation_Avg':'GR_Avg','Diffuse_Radiation_Avg':'DiffR_Avg','Atmospheric_Pressure_QNH_Avg':'AP_QNH_Avg',
                    'Atmospheric_Pressure_QFE_Avg':'AP_QFE_Avg','Direct_Radiation_Avg':'DirR_Avg','Inclined_Solar_Radiation_Avg':'ISR_Avg',
                    'Solar_Azimuth_Avg':'SA_Avg','Direct_Theoretical_Radiation_Avg':'DTR_Avg','Global_Energy_Avg':'GE_Avg',
                    'Diffuse_Energy_Avg':'DiffE_Avg','Direct_Energy_Avg':'DirE_Avg'}, inplace = True)

In [None]:
df.columns

# Data Transformation

In [None]:

scaler = MinMaxScaler() 
scaled_values = scaler.fit_transform(df) 
df.loc[:,:] = scaled_values

In [None]:
def split_data(df):
    X = df.drop(columns=['Power'])
    pac = df['Power'].values
    train_size = int(tt_ratio*len(pac)) 
    X_train ,  X_test  =   X.iloc[:train_size,:],   X.iloc[train_size:,:]
    pac_train, pac_test= pac[:train_size],   pac[train_size:]   
    ix = pac_test > 0
    pac_test = pac_test[ix]
    X_test = X_test[ix]
    return X_train, X_test, pac_train, pac_test;

X_train, X_test, y_train, y_test = split_data(df)

In [None]:
lasso = Lasso(alpha = 0.00001)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
lasso_coeff = pd.DataFrame({'Feature Importance':lasso.coef_}, index=df.columns[:-1])
lasso_coeff=lasso_coeff.sort_values('Feature Importance', ascending=True)

Print and observe lasso coeeficients

In [None]:
lasso_coeff

Print features whose lasso coeeficient is zero

In [None]:
lasso_coeff[lasso_coeff['Feature Importance']==0].sort_values('Feature Importance')

In [None]:
e_net = ElasticNet(alpha = 0.00001)
e_net.fit(X_train, y_train)
e_net_coeff = pd.DataFrame({'Feature Importance':e_net.coef_}, index=df.columns[:-1])
e_net_coeff=e_net_coeff.sort_values('Feature Importance', ascending=True)

In [None]:
e_net_coeff

In [None]:
e_net_coeff[e_net_coeff['Feature Importance']==0].sort_values('Feature Importance')

In [None]:
figsize(12, 10)
matplotlib.rcParams['font.size'] = 16

g = lasso_coeff['Feature Importance'].plot(kind='barh',figsize=(20,12), color='#8f63f4')
plt.title("Lasso Feature Importance Plot",fontsize=20)
plt.savefig(fname="Lasso_Feature_importance_plot",dpi=300)

In [None]:
figsize(12, 10)
matplotlib.rcParams['font.size'] = 16

g = e_net_coeff['Feature Importance'].plot(kind='barh',figsize=(20,12), color='#FFA500')
plt.title("Elastic Net Feature Importance Plot",fontsize=20)
plt.savefig(fname="E_net_Feature_importance_plot",dpi=300)

In [None]:
column_to_drop=[]
column_to_drop+=['AP_QFE_Avg','RA_mm','DP_Avg','AP_QNH_Avg']
df=df.drop(column_to_drop,axis=1)

In [None]:
matplotlib.rcParams['font.size'] = 10
corr = df.iloc[:,:-1].corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr, annot=True, square=True);
plt.title("Correlation Matrix Heatmap",fontsize=20)
plt.savefig(fname="Correlation_Matrix_Heatmap_brfore_feature_removal",dpi=300)

In [None]:
column_to_drop_2=['HSR_Avg','GE_Avg','DiffE_Avg','DirE_Avg','DTR_Avg']
df=df.drop(column_to_drop_2,axis=1)

In [None]:
matplotlib.rcParams['font.size'] = 14
corr = df.iloc[:,:-1].corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr, annot=True, square=True);
plt.title("Correlation Matrix Heatmap After Dimentionality Reduction",fontsize=20)
plt.savefig(fname="Correlation_Matrix_Heatmap_afyter_feature_removal",dpi=300)

In [None]:
fig, ax = plt.subplots()
sns.kdeplot(data=df['Power'].squeeze(), ax=ax, color='red', shade=True, label='Power')
sns.kdeplot(data=df['RH_Avg'].squeeze(), ax=ax, color='lightgreen', shade=True, label='Relative Humidity Avg')
sns.kdeplot(data=df['AT_Avg'].squeeze(), ax=ax, color='gold', shade=True,label='Air Temperature Avg')
sns.kdeplot(data=df['WS_Avg'].squeeze(), ax=ax, color='purple',shade=True, label='Global Radiation Avg')
sns.kdeplot(data=df['GR_Avg'].squeeze(), ax=ax, color='red',shade=True, label='Wind Speed Avg')
sns.kdeplot(data=df['DiffR_Avg'].squeeze(), ax=ax, color='blue',shade=True, label='Diffuse Radiation Avg')
sns.kdeplot(data=df['DirR_Avg'].squeeze(), ax=ax, color='pink',shade=True, label='Direct Radiation Avg')
sns.kdeplot(data=df['ISR_Avg'].squeeze(), ax=ax, color='brown',shade=True, label='Inclined Solar Radiation Avg')
sns.kdeplot(data=df['SA_Avg'].squeeze(), ax=ax, color='darkgreen',shade=True, label='Solar Azimuth Avg')
sns.kdeplot(data=df['WD_Avg'].squeeze(), ax=ax, color='grey',shade=True, label='Wind Direction Avg')
plt.xlabel('Normalized Feature', fontsize=18)
ax.legend(bbox_to_anchor=(1, 1), loc='upper right')
plt.tight_layout()
plt.title("Feature Distribution Plot",fontsize=20)
plt.savefig(fname="F_dis_thres",dpi=300)
plt.show()