In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

# Function to standardize and parse the datetime string correctly
def standardize_parse_datetime(date_str, time_str):
    parts = date_str.split('/')
    if len(parts[2]) == 2:
        parts[2] = '20' + parts[2]  # Assuming all dates are in the 2000s
    standardized_date_str = '/'.join(parts)
    datetime_str = standardized_date_str + ' ' + time_str
    return pd.to_datetime(datetime_str, format='%m/%d/%Y %H:%M:%S')

# Load the data
plant1_data = pd.read_csv('Plant_1_Generation_Data.csv')
plant2_data = pd.read_csv('Plant_2_Generation_Data.csv')
solar_data = pd.read_csv('SolarRecording.csv')

# Convert DATE_TIME in plant data to datetime for consistency
plant1_data['DATE_TIME'] = pd.to_datetime(plant1_data['DATE_TIME'], format='%d/%m/%y %H:%M')
plant2_data['DATE_TIME'] = pd.to_datetime(plant2_data['DATE_TIME'], format='%d/%m/%y %H:%M')

# Combine plant data
combined_plant_data = pd.concat([plant1_data, plant2_data])

# Standardize and parse the datetime in solar data
solar_data['DateTime'] = solar_data.apply(lambda row: standardize_parse_datetime(row['Date'], row['Time']), axis=1)

# Merge the combined plant data with solar data on the nearest datetime
merged_data = pd.merge_asof(combined_plant_data.sort_values('DATE_TIME'), 
                            solar_data.sort_values('DateTime'), 
                            left_on='DATE_TIME', 
                            right_on='DateTime', 
                            direction='nearest')

In [5]:
# Assuming merged_data is preprocessed and ready for modeling

# Feature selection
features = merged_data[['Radiation', 'Temperature', 'Pressure', 'Humidity', 'WindDirection(Degrees)', 'Speed']]
target = merged_data['DAILY_YIELD']

# Normalize the features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features)
scaled_target = scaler.fit_transform(target.values.reshape(-1, 1))

# Function to create sequences for LSTM
def create_sequences(features, target, time_steps=1):
    X, y = [], []
    for i in range(len(features) - time_steps):
        v = features[i:(i + time_steps)]
        X.append(v)
        y.append(target[i + time_steps])
    return np.array(X), np.array(y)

# Creating time series sequences
time_steps = 10  # Number of time steps you want to look back
X, y = create_sequences(scaled_features, scaled_target, time_steps)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the LSTM model with a ReLU activation function in the output layer
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(time_steps, X_train.shape[2])))
model.add(Dense(1, activation='relu'))  # ReLU to ensure non-negative predictions
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=150, batch_size=100)



Epoch 1/150


  super().__init__(**kwargs)


[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0722
Epoch 2/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0598
Epoch 3/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0567
Epoch 4/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0559
Epoch 5/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0547
Epoch 6/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0534
Epoch 7/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0526
Epoch 8/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0512
Epoch 9/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0508
Epoch 10/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0350
Epoch 80/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0346
Epoch 81/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0348
Epoch 82/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0347
Epoch 83/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0346
Epoch 84/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0350
Epoch 85/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0340
Epoch 86/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0344
Epoch 87/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0341
Epoch 88/150
[1m1092/1092[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x2898bac10>

In [13]:
# Assume time_steps = 3 and model is already trained
time_steps = 10

# Get the last time_steps data points from the dataset to predict the next day
last_data_points = scaled_features[-time_steps:]

# Reshape the data to fit the model input shape (1, time_steps, number of features)
last_data_points = last_data_points.reshape((1, time_steps, last_data_points.shape[1]))

# Predict the future power output
predicted_output = model.predict(last_data_points)

# Inverse transform the prediction to get the actual value
predicted_daily_yield = scaler.inverse_transform(predicted_output)

print(f'Predicted Daily Yield for one day after the dataset ends: {predicted_daily_yield[0][0]}')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Predicted Daily Yield for one day after the dataset ends: 5680.96240234375


In [14]:
# Initialize the array with the last data points from the scaled features
input_sequence = scaled_features[-time_steps:].reshape((1, time_steps, scaled_features.shape[1]))

# Predict the next 5 days
predicted_daily_yields = []
for i in range(5):  # for the next 5 days
    # Predict the next day's yield using the model
    daily_yield_prediction = model.predict(input_sequence)

    # Store the predicted daily yield
    predicted_daily_yields.append(scaler.inverse_transform(daily_yield_prediction)[0][0])

    # Update the input sequence: remove the oldest day and add the predicted day
    input_sequence = np.roll(input_sequence, -1, axis=1)
    input_sequence[0, -1, :] = daily_yield_prediction

# Display the predicted daily yields for the next 5 days
for i, yield_prediction in enumerate(predicted_daily_yields, 1):
    print(f"Day {i} predicted Daily Yield: {yield_prediction}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Day 1 predicted Daily Yield: 5680.96240234375
Day 2 predicted Daily Yield: 590.1162109375
Day 3 predicted Daily Yield: 1348.227783203125
Day 4 predicted Daily Yield: 0.0
Day 5 predicted Daily Yield: 0.0


In [15]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Make predictions on the test set
predicted_test = model.predict(X_test)

# Inverse transform the predictions and actual values to their original scale
predicted_test = scaler.inverse_transform(predicted_test)
y_test_inverse = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate the MSE, RMSE, MAE, and R2 metrics
mse = mean_squared_error(y_test_inverse, predicted_test)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_inverse, predicted_test)
r2 = r2_score(y_test_inverse, predicted_test)

print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')


[1m853/853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 487us/step
Mean Squared Error: 3343750.539753143
Root Mean Squared Error: 1828.5925023780294
Mean Absolute Error: 1202.0999917061379
R-squared: 0.6369227844283137


In [74]:
time_steps = 3

# Get the last time_steps data points from the dataset
last_sequence = scaled_features[-time_steps:]

for i in range(3):  # Predict for the next 3 days
    # Reshape input for model
    reshaped_sequence = last_sequence.reshape(1, time_steps, last_sequence.shape[-1])

    # Predict next day
    next_day_prediction = model.predict(reshaped_sequence)

    # Replace nan values with 0
    next_day_prediction = np.nan_to_num(next_day_prediction)

    # Shift input sequence by 1 time step
    last_sequence = np.roll(last_sequence, -1, axis=0)

    # Replace last time step with predicted values
    last_sequence[-1] = next_day_prediction[0]

# The last element of last_sequence now contains predicted features for 3rd day
predicted_daily_yield_third_day = scaler.inverse_transform(last_sequence[-1].reshape(1, -1))

print(f'Predicted Daily Yield for 3rd day: {predicted_daily_yield_third_day[0][0]}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Predicted Daily Yield for 3rd day: 0.0


In [103]:
# Assuming time_steps, model, scaled_features, and scaler are already defined

# Define the day to predict (e.g., day 3)
day_to_predict = 3

# Get the last time_steps data points from the dataset to predict the day
last_data_points = scaled_features[-time_steps:]

# Reshape the data to fit the model input shape (1, time_steps, number of features)
last_data_points = last_data_points.reshape((1, time_steps, last_data_points.shape[1]))

# Iterate to predict each day up to the desired day
for day in range(1, day_to_predict + 1):
    # Predict the future power output for the next day
    predicted_output = model.predict(last_data_points)

    # Inverse transform the prediction to get the actual value
    predicted_daily_yield = scaler.inverse_transform(predicted_output)

    if day == day_to_predict:
        print(f'Predicted Daily Yield for day {day_to_predict} after the dataset ends: {predicted_daily_yield[0][0]}')
    else:
        # Update last_data_points with the new prediction for the next iteration
        last_data_points = np.append(last_data_points, predicted_output.reshape((1, 1, predicted_output.shape[1])), axis=1)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 2, the array at index 0 has size 6 and the array at index 1 has size 1