In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
# Replace 'path_to_your_model.h5' with the actual path to your .h5 file
DC_model = load_model('DC_model.h5')
AC_model = load_model('AC_model.h5')
consumption_model = load_model('consumption_model.h5')

# Read the datasets into DataFrames
weather_data = pd.read_csv("C:/Users/yujin/Downloads/MONASH/Y3 S3/FIT 3164/fyp/Weather_Sensor_Data.csv")
power_data = pd.read_csv("C:/Users/yujin/Downloads/MONASH/Y3 S3/FIT 3164/fyp/Power_Generation_Data.csv")



In [3]:
# Convert DATE_TIME columns to datetime objects
weather_data['DATE_TIME'] = pd.to_datetime(weather_data['DATE_TIME'], format='%d/%m/%Y %H:%M')
power_data['DATE_TIME'] = pd.to_datetime(power_data['DATE_TIME'], format='%d/%m/%Y %H:%M')

# Group the Power Generation Data by DATE_TIME and calculate mean for DC_POWER and AC_POWER
grouped_power_data = power_data.groupby('DATE_TIME', as_index=False)[['DC_POWER', 'AC_POWER']].mean()

# Merge the grouped Power Generation Data with Weather Sensor Data
merged_data = pd.merge(grouped_power_data, weather_data, on='DATE_TIME', how='inner')

# Extract time-based features
merged_data['HOUR'] = merged_data['DATE_TIME'].dt.hour
merged_data['DAY_OF_WEEK'] = merged_data['DATE_TIME'].dt.dayofweek
merged_data['MONTH'] = merged_data['DATE_TIME'].dt.month

# Calculate rolling means and sums
window_size = 4
merged_data['ROLLING_DC_POWER_MEAN'] = merged_data['DC_POWER'].rolling(window=window_size, min_periods=1).mean()
merged_data['ROLLING_AC_POWER_MEAN'] = merged_data['AC_POWER'].rolling(window=window_size, min_periods=1).mean()
merged_data['ROLLING_MODULE_TEMP_MEAN'] = merged_data['MODULE_TEMPERATURE'].rolling(window=window_size, min_periods=1).mean()
merged_data['ROLLING_IRRADIATION_MEAN'] = merged_data['IRRADIATION'].rolling(window=window_size, min_periods=1).mean()

# Function to prepare data
def prepare_data(features, target_column):
    y = merged_data[target_column]
    X_train, X_test, y_train, y_test = train_test_split(merged_data[features], y, test_size=0.2, random_state=42)
    
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Reshape the data to a 3D format required for LSTM
    X_train_lstm = np.reshape(X_train_scaled, (X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
    X_test_lstm = np.reshape(X_test_scaled, (X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
    
    return X_train_lstm, X_test_lstm, y_train, y_test

In [5]:
# Assuming 'merged_data' is the final DataFrame you've prepared for prediction
# Define the features as used in the models
dc_features = ['MODULE_TEMPERATURE', 'IRRADIATION', 'HOUR', 'DAY_OF_WEEK', 'MONTH',
               'ROLLING_DC_POWER_MEAN', 'ROLLING_MODULE_TEMP_MEAN', 'ROLLING_IRRADIATION_MEAN']
target_dc = 'DC_POWER'
ac_features = ['MODULE_TEMPERATURE', 'IRRADIATION', 'HOUR', 'DAY_OF_WEEK', 'MONTH',
               'ROLLING_AC_POWER_MEAN', 'ROLLING_MODULE_TEMP_MEAN', 'ROLLING_IRRADIATION_MEAN']
target_ac = 'AC_POWER'
# Prepare the data for AC Power
X_train_ac, X_test_ac, y_train_ac, y_test_ac = prepare_data(ac_features, target_ac)

# Prepare the data for DC Power
X_train_dc, X_test_dc, y_train_dc, y_test_dc = prepare_data(dc_features, target_dc)

# Make predictions
DC_predictions = DC_model.predict(X_test_dc)
AC_predictions = AC_model.predict(X_test_ac)

prediction_data = merged_data.iloc[int(len(merged_data) * 0.8):]

# Add predictions to the prediction_data DataFrame
prediction_data['Predicted_DC_POWER'] = DC_predictions.flatten()
prediction_data['Predicted_AC_POWER'] = AC_predictions.flatten()

# Export the predictions to CSV files
prediction_data.to_csv('DC_AC_Power_Predictions.csv', index=False)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data['Predicted_DC_POWER'] = DC_predictions.flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_data['Predicted_AC_POWER'] = AC_predictions.flatten()


In [6]:
df1 = pd.read_csv("C:/Users/yujin/Downloads/MONASH/Y3 S3/FIT 3164/fyp/powerconsumption.csv")

In [7]:
df1['power consumption'] = df1[['PowerConsumption_Zone1', 'PowerConsumption_Zone2', 'PowerConsumption_Zone3']].mean(axis=1)
print(df1.head())

        Datetime  Temperature  Humidity  WindSpeed  GeneralDiffuseFlows  \
0  1/1/2017 0:00        6.559      73.8      0.083                0.051   
1  1/1/2017 0:10        6.414      74.5      0.083                0.070   
2  1/1/2017 0:20        6.313      74.5      0.080                0.062   
3  1/1/2017 0:30        6.121      75.0      0.083                0.091   
4  1/1/2017 0:40        5.921      75.7      0.081                0.048   

   DiffuseFlows  PowerConsumption_Zone1  PowerConsumption_Zone2  \
0         0.119             34055.69620             16128.87538   
1         0.085             29814.68354             19375.07599   
2         0.100             29128.10127             19006.68693   
3         0.096             28228.86076             18361.09422   
4         0.085             27335.69620             17872.34043   

   PowerConsumption_Zone3  power consumption  
0             20240.96386       23475.178480  
1             20131.08434       23106.947957  
2    

In [8]:
# Assuming your DataFrame is named df
df1['Datetime'] = pd.to_datetime(df1['Datetime'])
df1.set_index('Datetime', inplace=True)

# Optional: Create additional time features
df1['hour'] = df1.index.hour
df1['day_of_week'] = df1.index.dayofweek
# Add more as needed

# Select features and target
features1 = df1[['Temperature', 'Humidity', 'WindSpeed', 'GeneralDiffuseFlows', 'DiffuseFlows', 'hour', 'day_of_week']] # Add or remove features based on your Step 3 decisions
target1 = df1['power consumption']

# Normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features1)
scaled_target = scaler.fit_transform(target1.values.reshape(-1,1))

# Sequence creation (example with a lookback of 3 time steps)
def create_sequences(features, target, sequence_length=3):
    X1, y1 = [], []
    for i in range(len(features) - sequence_length):
        X1.append(features[i:i+sequence_length])
        y1.append(target[i+sequence_length])
    return np.array(X1), np.array(y1).reshape(-1, 1)

X1, y1 = create_sequences(scaled_features, scaled_target, sequence_length=3)

# Split the data into training and testing sets
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.2, random_state=42)


In [12]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Predict on the test data
y_pred1 = consumption_model.predict(X_test1)

# Merge the predictions with the test set
df2 = df1.iloc[int(len(df1) * 0.8)+1:]
df2['Predictions'] = y_pred1.flatten()

# Output to CSV
df2.to_csv('consumption_Predictions.csv', index=False)

[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Predictions'] = y_pred1.flatten()
