In [None]:

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import *
from tensorflow.keras.layers import Input, concatenate
from tensorflow.keras.layers import LSTM, Bidirectional, Conv1D, Flatten
from tensorflow.keras.models import Model

In [None]:

# load data
df = pd.read_csv("SolarPrediction.csv")

# basic time analyse
df["Datetime"] = pd.to_datetime(df["Data"], format="%m/%d/%Y %I:%M:%S %p")
df["DatePart"] = df["Datetime"].dt.date

print("Step 1 Complete - Basic Time Analysis")
print("Current column:", df.columns.tolist())
print("Data Sample:")
display(df[["Data", "Time", "Datetime"]].head(2))

In [None]:

# load data
df = pd.read_csv("SolarPrediction.csv")

# basic time analyse
df["Datetime"] = pd.to_datetime(df["Data"], format="%m/%d/%Y %I:%M:%S %p")
df["DatePart"] = df["Datetime"].dt.date

print("Step 1 Complete - Basic Time Analysis")
print("Current column:", df.columns.tolist())
print("Data Sample:")
display(df[["Data", "Time", "Datetime"]].head(2))

In [None]:

# validate the time format
def validate_time(time_str):
    try:
        pd.to_datetime(time_str, format="%H:%M:%S")
        return True
    except ValueError:
        return False

# delete irrelevant time
valid_time_mask = df["Time"].apply(validate_time)
df = df[valid_time_mask].copy()

# standardize time format
df["Time"] = pd.to_datetime(df["Time"], format="%H:%M:%S").dt.time

# combine accurate timestamp
df["Datetime"] = pd.to_datetime(
    df["DatePart"].astype(str) + " " + df["Time"].astype(str),
    format="%Y-%m-%d %H:%M:%S"
)

print("Step 2 Complete - Time Standardization")
print("Remaining Records:", len(df))
print("Time range:", df["Datetime"].min(), "~", df["Datetime"].max())


In [None]:

# generate sunrise time set
df["SunRise"] = pd.to_datetime(
    df["DatePart"].astype(str) + " " + df["TimeSunRise"],
    format="%Y-%m-%d %H:%M:%S"
)

# generate sunset time set
df["SunSet"] = pd.to_datetime(
    df["DatePart"].astype(str) + " " + df["TimeSunSet"],
    format="%Y-%m-%d %H:%M:%S"
)

# generate cross time problem
mask = df["SunSet"] < df["SunRise"]
df.loc[mask, "SunSet"] += pd.Timedelta(days=1)

print("\nStep 3 Complete - Sunrise and Sunset Time Processing")
print("Example of Sunrise and Sunset Time:")
display(df[["SunRise", "SunSet"]].head(2))
print("Is there an abnormal time", df["SunRise"].gt(df["SunSet"]).any())


In [None]:


# time feature
df['Hour'] = df['Datetime'].dt.hour
df['DayOfYear'] = df['Datetime'].dt.dayofyear
df['Season'] = df['Datetime'].dt.month % 12 // 3 + 1

# Periodic coding
df['Hour_sin'] = np.sin(2 * np.pi * df['Hour']/24)
df['Hour_cos'] = np.cos(2 * np.pi * df['Hour']/24)

# Astronomical characteristics
df['DaylightDuration'] = (df['SunSet'] - df['SunRise']).dt.total_seconds() / 3600
df['IsDaylight'] = ((df['Datetime'] >= df['SunRise']) & (df['Datetime'] <= df['SunSet'])).astype(int)
df['SinceSunrise'] = (df['Datetime'] - df['SunRise']).dt.total_seconds() / 3600
df['ToSunset'] = (df['SunSet'] - df['Datetime']).dt.total_seconds() / 3600

# Wind direction coding
df['WindDirection_sin'] = np.sin(np.radians(df['WindDirection(Degrees)']))
df['WindDirection_cos'] = np.cos(np.radians(df['WindDirection(Degrees)']))

# standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numeric_cols = ['Temperature', 'Pressure', 'Humidity', 'Speed']
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

print("\nStep4 Completion - Feature Engineering")
print("Add feature column:", [c for c in df.columns if c not in ["Data", "Time", "DatePart"]])
print("Standardized statistics:")
display(df[numeric_cols].describe().loc[["mean", "std"]])



In [None]:


LOOKBACK = 24  # use 2-hour data before
FORECAST = 12  # predict 1 hour later

feature_columns = [
    'Radiation',
    'Hour_sin', 'Hour_cos',
    'Temperature', 'Pressure', 'Humidity',
    'WindDirection_sin', 'WindDirection_cos',
    'Speed', 'IsDaylight',
    'SinceSunrise', 'ToSunset'
]

def create_sequences(data, lookback, forecast):
    X, y = [], []
    for i in range(len(data) - lookback - forecast + 1):
        X.append(data.iloc[i:i+lookback][feature_columns].values)
        y.append(data.iloc[i+lookback:i+lookback+forecast]['Radiation'].values)
    return np.array(X), np.array(y)

X, y = create_sequences(df, LOOKBACK, FORECAST)

print("\nStep5 Complete - Dataset Construction")
print("Input shape:", X.shape)
print("Output shape:", y.shape)
print("Verification of input dimension for the first sample:", X[0].shape == (LOOKBACK, len(feature_columns)))
