In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score, mean_squared_error, r2_score, mean_absolute_error, max_error
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor


In [None]:
mydata = pd.read_csv('/content/SolarPrediction.csv')

In [None]:
mydata.head()

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 0:00,23:55:26,1.21,48,30.46,59,177.39,5.62,6:13:00,18:13:00
1,1475229023,9/29/2016 0:00,23:50:23,1.21,48,30.46,58,176.78,3.37,6:13:00,18:13:00
2,1475228726,9/29/2016 0:00,23:45:26,1.23,48,30.46,57,158.75,3.37,6:13:00,18:13:00
3,1475228421,9/29/2016 0:00,23:40:21,1.21,48,30.46,60,137.71,3.37,6:13:00,18:13:00
4,1475228124,9/29/2016 0:00,23:35:24,1.17,48,30.46,62,104.95,5.62,6:13:00,18:13:00


In [None]:
mydata['Year'] = pd.DatetimeIndex(mydata['Data']).year
mydata['Month'] = pd.DatetimeIndex(mydata['Data']).month
mydata['Day'] = pd.DatetimeIndex(mydata['Data']).day


In [None]:
mydata['Hour'] = pd.DatetimeIndex(mydata['Time']).hour
mydata['Minute'] = pd.DatetimeIndex(mydata['Time']).minute
mydata['Second'] = pd.DatetimeIndex(mydata['Time']).second


In [None]:
mydata['SunPerDay'] = pd.DatetimeIndex(mydata['TimeSunSet']) - pd.DatetimeIndex(mydata['TimeSunRise'])
mydata['SunPerDayHours'] = pd.DatetimeIndex(mydata['TimeSunSet']).hour - pd.DatetimeIndex(mydata['TimeSunRise']).hour


In [None]:
mydata.drop(['Time', 'Data', 'TimeSunRise', 'TimeSunSet', 'SunPerDay'], axis=1, inplace=True)


In [None]:
mydata.isnull().sum()


Unnamed: 0,0
UNIXTime,0
Radiation,0
Temperature,0
Pressure,0
Humidity,0
WindDirection(Degrees),0
Speed,0
Year,0
Month,0
Day,0


In [None]:
mydata.info()
mydata.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32686 entries, 0 to 32685
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   UNIXTime                32686 non-null  int64  
 1   Radiation               32686 non-null  float64
 2   Temperature             32686 non-null  int64  
 3   Pressure                32686 non-null  float64
 4   Humidity                32686 non-null  int64  
 5   WindDirection(Degrees)  32686 non-null  float64
 6   Speed                   32686 non-null  float64
 7   Year                    32686 non-null  int32  
 8   Month                   32686 non-null  int32  
 9   Day                     32686 non-null  int32  
 10  Hour                    32686 non-null  int32  
 11  Minute                  32686 non-null  int32  
 12  Second                  32686 non-null  int32  
 13  SunPerDayHours          32686 non-null  int32  
dtypes: float64(4), int32(7), int64(3)
memo

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,Year,Month,Day,Hour,Minute,Second,SunPerDayHours
count,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0
mean,1478047000.0,207.124697,51.103255,30.422879,75.016307,143.489821,6.243869,2016.0,10.526066,15.825766,11.557425,27.556691,17.441902,11.347886
std,3005037.0,315.916387,6.201157,0.054673,25.990219,83.1675,3.490474,0.0,1.096691,8.711824,6.912034,17.268507,12.951109,0.476307
min,1472724000.0,1.11,34.0,30.19,8.0,0.09,0.0,2016.0,9.0,1.0,0.0,0.0,0.0,11.0
25%,1475546000.0,1.23,46.0,30.4,56.0,82.2275,3.37,2016.0,10.0,9.0,6.0,15.0,5.0,11.0
50%,1478026000.0,2.66,50.0,30.43,85.0,147.7,5.62,2016.0,11.0,16.0,12.0,30.0,18.0,11.0
75%,1480480000.0,354.235,55.0,30.46,97.0,179.31,7.87,2016.0,11.0,23.0,18.0,45.0,22.0,12.0
max,1483265000.0,1601.26,71.0,30.56,103.0,359.95,40.5,2016.0,12.0,31.0,23.0,57.0,59.0,12.0


In [None]:
X = mydata.drop(columns=['Radiation'])
y = mydata['Radiation']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train_scaled, y_train)


dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train_scaled, y_train)


xgb_model = XGBRegressor(random_state=42)
xgb_model.fit(X_train_scaled, y_train)




In [None]:
lr_preds = lr_model.predict(X_test_scaled)
rf_preds = rf_model.predict(X_test_scaled)
dt_preds = dt_model.predict(X_test_scaled)
xgb_preds = xgb_model.predict(X_test_scaled)


In [None]:
print("Linear Regression R2:", r2_score(y_test, lr_preds))
print("Random Forest R2:", r2_score(y_test, rf_preds))
print("Decision Tree R2:", r2_score(y_test, dt_preds))
print("XGBoost R2:", r2_score(y_test, xgb_preds))


Linear Regression R2: 0.6239417259189338
Random Forest R2: 0.935821204522368
Decision Tree R2: 0.8821631791579465
XGBoost R2: 0.9285813931762983


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score, max_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Sample data (replace this with your actual dataset)
X, y = mydata.drop(columns=['Radiation']), mydata['Radiation']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train old model (e.g., Linear Regression)
old_model = LinearRegression()
old_model.fit(X_train, y_train)
y_pred_old = old_model.predict(X_test)

# Train new model (e.g., Random Forest)
new_model = RandomForestRegressor()
new_model.fit(X_train, y_train)
y_pred_new = new_model.predict(X_test)

# Calculate metrics for the old model
mae_old = mean_absolute_error(y_test, y_pred_old)
mse_old = mean_squared_error(y_test, y_pred_old)
r2_old = r2_score(y_test, y_pred_old)
variance_old = explained_variance_score(y_test, y_pred_old)
max_err_old = max_error(y_test, y_pred_old)

# Calculate metrics for the new model
mae_new = mean_absolute_error(y_test, y_pred_new)
mse_new = mean_squared_error(y_test, y_pred_new)
r2_new = r2_score(y_test, y_pred_new)
variance_new = explained_variance_score(y_test, y_pred_new)
max_err_new = max_error(y_test, y_pred_new)

# Display the results
print("Old Parameters ------- New Parameters")
print(f"MAE : {mae_old} ------- {mae_new}")
print(f"MSE : {mse_old} ------- {mse_new}")
print(f"R^2 : {r2_old} ------- {r2_new}")
print(f"Var : {variance_old} ------- {variance_new}")
print(f"Max : {max_err_old} ------- {max_err_new}")


Old Parameters ------- New Parameters
MAE : 145.93109764307636 ------- 29.732634551850715
MSE : 37360.18564879768 ------- 6314.876231336868
R^2 : 0.6239417259192305 ------- 0.9364360370445151
Var : 0.6240038428068435 ------- 0.9364475974418769
Max : 919.828366060648 ------- 870.1324000000004
