# Import Statements



In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import pylab
import scipy.stats as stats
import seaborn as sns

from io import StringIO
import sys

import sklearn
import sklearn.linear_model as sl
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split, GroupShuffleSplit, GridSearchCV
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler

# Data Pre-processing

### Data Loading

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv("/content/drive/MyDrive/nevada_weather.csv")

In [None]:
# Dropping Year Column
df.drop("Year",axis=1, inplace=True)

In [None]:
# Mapping isDay (false/true) -> (0/1)
df.replace(to_replace=True, value=1, inplace=True)
df.replace(to_replace=False, value=0, inplace=True)

In [None]:
# Dropping columns
df = df.drop(["DHI", "Clearsky DHI", "Clearsky DNI", "Clearsky GHI", "DNI", "Fill Flag", "Dew Point"], axis = 1)

# Model Spilting

In [None]:
# Mapping the data
total_records = len(df)
features = df[:total_records-288].drop('GHI', axis=1).reset_index()
output = df['GHI'][288:].reset_index()
df = pd.concat([features, output], join='inner', axis=1)
df.drop(['index'], axis=1, inplace=True)

In [None]:
df_day = df[df.is_day == 1].drop('is_day', axis=1)
df_night = df[df.is_day == 0].drop('is_day', axis=1)

In [None]:
def feature_output_split(x):
  total_records = len(x)
  features = x.drop('GHI', axis=1)
  output = x['GHI']

  return features, output

In [None]:
feature_df, GHI = feature_output_split(df)
day_feature_df, day_GHI = feature_output_split(df_day)
night_feature_df, night_GHI = feature_output_split(df_night)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(feature_df, GHI, test_size=.20, random_state=42)
day_x_train, day_x_test, day_y_train, day_y_test = train_test_split(day_feature_df, day_GHI, test_size=.20, random_state=42)
night_x_train, night_x_test, night_y_train, night_y_test = train_test_split(night_feature_df, night_GHI, test_size=.20, random_state=42)

# Linear Regression

In [None]:
def apply_LR(x_train, y_train, x_test, y_test):
  linear = LinearRegression()
  linear.fit(x_train, y_train)

  y_train_pred = linear.predict(x_train)
  train_rmse = metrics.mean_squared_error(y_train, y_train_pred) ** 0.5
  print("Train RMSE: ", train_rmse)
  y_test_pred = linear.predict(x_test)
  test_rmse = metrics.mean_squared_error(y_test, y_test_pred) ** 0.5
  print("Test RMSE: ", test_rmse)

In [None]:
# whole data
apply_LR(x_train, y_train, x_test, y_test)

Train RMSE:  151.55383564616952
Test RMSE:  152.01901309262257


In [None]:
# day_time_data
apply_LR(day_x_train, day_y_train, day_x_test, day_y_test)

Train RMSE:  113.40550303779567
Test RMSE:  114.88453421034431


In [None]:
# night_time_data
apply_LR(night_x_train, night_y_train, night_x_test, night_y_test)

Train RMSE:  6.392624176593561
Test RMSE:  6.225040022294496


In [None]:
def combine_result(y1, y1_pred, y2, y2_pred):
  combined_y_pred = np.concatenate([y1_pred, y2_pred])
  combined_y = np.concatenate([y1, y2])
  rmse = metrics.mean_squared_error(combined_y, combined_y_pred) ** 0.5
  return rmse

In [None]:
# combining day and night time results
day_linear = LinearRegression()
day_linear.fit(day_x_train, day_y_train)
night_linear = LinearRegression()
night_linear.fit(night_x_train, night_y_train)

day_y_train_pred = day_linear.predict(day_x_train)
night_y_train_pred = night_linear.predict(night_x_train)
train_rmse = combine_result(day_y_train, day_y_train_pred, night_y_train, night_y_train_pred)
print("Train RMSE: ", train_rmse)

day_y_test_pred = day_linear.predict(day_x_test)
night_y_test_pred = night_linear.predict(night_x_test)
test_rmse = combine_result(day_y_test, day_y_test_pred, night_y_test, night_y_test_pred)
print("Test RMSE: ", test_rmse)

Train RMSE:  81.13455703527238
Test RMSE:  82.18219926974604


# Lasso Regression

In [None]:
from sklearn.model_selection import learning_curve

# Ridge Regression

# Polynomial Regression

In [None]:
# # whole data
# for n in [2, 3, 4]:
#   apply_polynomial_regression(x_train, y_train, x_test, y_test, n)

In [None]:
# # day_time_data
# for n in [2, 3, 4]:
#   apply_polynomial_regression(day_x_train, day_y_train, day_x_test, day_y_test, n)

In [None]:
# # night_time_data
# for n in [2, 3, 4]:
#   apply_polynomial_regression(night_x_train, night_y_train, night_x_test, night_y_test, n)

In [None]:
# combining day and night time results
# day_poly_reg = PolynomialFeatures(degree= 4)
# poly_day_x_train = day_poly_reg.fit_transform(day_x_train) 
# poly_day_x_test = day_poly_reg.fit_transform(day_x_test)

# night_poly_reg = PolynomialFeatures(degree= 4)
# poly_night_x_train = night_poly_reg.fit_transform(night_x_train) 
# poly_night_x_test = night_poly_reg.fit_transform(night_x_test)

# day_linear = LinearRegression()
# day_linear.fit(poly_day_x_train, day_y_train)
# night_linear = LinearRegression()
# night_linear.fit(poly_night_x_train, night_y_train)

# day_y_train_pred = day_linear.predict(poly_day_x_train)
# night_y_train_pred = night_linear.predict(poly_night_x_train)
# train_rmse = combine_result(day_y_train, day_y_train_pred, night_y_train, night_y_train_pred)
# print("Train RMSE: ", train_rmse)

# day_y_test_pred = day_linear.predict(poly_day_x_test)
# night_y_test_pred = night_linear.predict(poly_night_x_test)
# test_rmse = combine_result(day_y_test, day_y_test_pred, night_y_test, night_y_test_pred)
# print("Test RMSE: ", test_rmse)

# **SVM Regression**

In [None]:
from sklearn.svm import SVR

In [None]:
def SVM_Regress(x_train, y_train, x_test, y_test):
  svm = SVR(kernel = "poly",verbose = True)
  svm.fit(x_train, y_train)

  y_train_pred = svm.predict(x_train)
  train_rmse = metrics.mean_squared_error(y_train, y_train_pred) ** 0.5
  print("Train RMSE: ", train_rmse)
  y_test_pred = svm.predict(x_test)
  test_rmse = metrics.mean_squared_error(y_test, y_test_pred) ** 0.5
  print("Test RMSE: ", test_rmse)
  return svm

In [None]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
day_x_train = sc_X.fit_transform(day_x_train)
day_x_test = sc_X.fit_transform(day_x_test)
night_x_train = sc_X.fit_transform(night_x_train)
night_x_test = sc_X.fit_transform(night_x_test)
x_train=sc_X.fit_transform(x_train)
x_test=sc_X.fit_transform(x_test)

In [None]:
# whole data
whole_data_model=SVM_Regress(x_train, y_train, x_test, y_test)

[LibSVM]Train RMSE:  171.53191472055016
Test RMSE:  169.90716384327686


In [None]:
# day_time_data
day_time_model=SVM_Regress(day_x_train, day_y_train, day_x_test, day_y_test)

[LibSVM]Train RMSE:  156.17002206452625
Test RMSE:  155.22730672917223


In [None]:
# night_time_data
night_time_model=SVM_Regress(night_x_train, night_y_train, night_x_test, night_y_test)

[LibSVM]Train RMSE:  6.733930510704196
Test RMSE:  6.526060133629841


In [None]:
# combining day and night time results

day_y_train_pred = day_time_model.predict(day_x_train)
night_y_train_pred = night_time_model.predict(night_x_train)
train_rmse = combine_result(day_y_train, day_y_train_pred, night_y_train, night_y_train_pred)
print("Train RMSE: ", train_rmse)

day_y_test_pred = day_time_model.predict(day_x_test)
night_y_test_pred = night_time_model.predict(night_x_test)
test_rmse = combine_result(day_y_test, day_y_test_pred, night_y_test, night_y_test_pred)
print("Test RMSE: ", test_rmse)

Train RMSE:  111.65941968463086
Test RMSE:  110.97913855360879


In [None]:
# whole_y_train_pred=whole_data_model.predict(x_train)
# train_rmse = combine_result(day_y_train, day_y_train_pred, night_y_train, night_y_train_pred)
# print("Train RMSE: ", train_rmse)


# whole_y_test_pred=whole_data_model.predict(x_test)

# print("Test RMSE: ", test_rmse)

# **Neural network**

In [None]:
from keras.layers import LeakyReLU
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
def baseline_model(inp_dim):
  # create model
  model = Sequential()

  model.add(Dense(inp_dim, input_dim=inp_dim, kernel_initializer='normal'))
  model.add(LeakyReLU(alpha=0.01))

  model.add(Dense(64,kernel_initializer='normal'))
  model.add(LeakyReLU(alpha=0.01))

  model.add(Dense(32,kernel_initializer='normal'))
  model.add(LeakyReLU(alpha=0.01))
  
  model.add(Dense(1, kernel_initializer='normal'))
  # Compile model
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

In [None]:
# print(len(day_x_train[0]))
# print(len(day_x_test[0]))
# print(len(night_x_train[0]))

In [None]:
list(x_train)

[array([-1.01885106,  1.41001185,  0.2181925 ,  0.2955323 ,  0.14062049,
        -0.61273134, -0.35157401, -0.49236289, -1.34867522, -0.71965517,
         1.39954136, -0.65517908,  0.67914822,  1.10658081,  0.97886827]),
 array([ 1.31129666,  0.7247338 , -0.21534146, -0.87481423,  0.01255546,
        -0.61273134, -0.88865699, -0.23886997, -0.76734061,  0.53676685,
         1.64352611,  0.21717334, -1.18669694,  0.03758057,  0.97886827]),
 array([-1.01885106,  1.52422486,  0.79623778, -1.4599875 , -0.29014369,
        -0.61273134,  0.4399167 ,  0.16493016, -0.38793781, -0.71965517,
         1.64352611, -0.65517908,  1.30435289, -0.03877659,  0.97886827]),
 array([-0.7275826 ,  1.06737283, -1.2269207 ,  0.2955323 , -0.23193231,
        -0.61273134,  0.04417135,  0.54362176,  0.54001171, -0.71965517,
        -0.30835186,  0.74058479,  0.13209415, -0.64963387, -1.02158792]),
 array([ 0.43749127,  0.83894681,  1.08526041, -0.28964097,  0.54810011,
        -0.61273134, -0.35157401,  0.047642

In [None]:
model = baseline_model(15)
model.fit(x_train,y_train,epochs = 1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fa54ab150d0>

In [None]:
y_pred_test = model.predict(x_test)
test_rmse = metrics.mean_squared_error(y_test, y_pred_test) ** 0.5

y_pred_train = model.predict(x_train)
train_rmse = metrics.mean_squared_error(y_train, y_pred_train) ** 0.5

print("Train RMSE :"+str(train_rmse))
print("Test RMSE : "+str(test_rmse))

Train RMSE :39.27563641627194
Test RMSE : 46.208057288399104


In [None]:
model = baseline_model(14)
model.fit(day_x_train,day_y_train,epochs = 1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fa553714710>

In [None]:
day_y_pred_test = model.predict(day_x_test)
day_test_rmse = metrics.mean_squared_error(day_y_test, day_y_pred_test) ** 0.5

day_y_pred_train = model.predict(day_x_train)
day_train_rmse = metrics.mean_squared_error(day_y_train, day_y_pred_train) ** 0.5

print("Train RMSE :"+str(day_train_rmse))
print("Test RMSE : "+str(day_test_rmse))

Train RMSE :67.32261799728947
Test RMSE : 73.22614489536214


In [None]:
model = baseline_model(14)
model.fit(night_x_train,night_y_train,epochs = 1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fa550399fd0>

In [None]:
night_y_pred_test = model.predict(night_x_test)
night_test_rmse = metrics.mean_squared_error(night_y_test, night_y_pred_test) ** 0.5

night_y_pred_train = model.predict(night_x_train)
night_train_rmse = metrics.mean_squared_error(night_y_train, night_y_pred_train) ** 0.5

print("Train RMSE :"+str(night_train_rmse))
print("Test RMSE : "+str(night_test_rmse))

Train RMSE :0.5378806881748405
Test RMSE : 2.2929465676926113


In [None]:
test_rmse = combine_result(day_y_test, day_y_pred_test, night_y_test, night_y_pred_test)
print("Test RMSE: ", test_rmse)
train_rmse = combine_result(day_y_train, day_y_pred_train, night_y_train, night_y_pred_train)
print("Train RMSE: ", train_rmse)

Test RMSE:  52.33299669940412
Train RMSE:  48.093334903675945


# Feature Expansion

## Without PCA

## With PCA