In [1]:
import pandas as pd
import numpy as np

## half day

In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np


data = pd.read_csv('use.csv')
# Convert to datetime format
data['connectionTime'] = pd.to_datetime(data['connectionTime'])

# Set connectionTime as index
data.set_index('connectionTime', inplace=True)

# Sum up the kWhDelivered per half day
data_grouped = data['kWhDelivered'].resample('12H').sum().reset_index()
data_grouped['connectionTime'] = pd.to_datetime(data_grouped['connectionTime'])

# Normalize the data
scaler = MinMaxScaler()
data_grouped['scaled_kWhDelivered'] = scaler.fit_transform(data_grouped['kWhDelivered'].values.reshape(-1, 1))

# Now you have data_grouped with half-day kWhDelivered sums and normalized values
# Add day_of_week as a cyclical feature
data_grouped['day_of_week'] = data_grouped['connectionTime'].dt.dayofweek

# Convert the 'connectionTime' to datetime
data_grouped['connectionTime'] = pd.to_datetime(data_grouped['connectionTime'])

# Drop rows where 'day_of_week' is 5 or 6
data_grouped = data_grouped[~data_grouped['day_of_week'].isin([5, 6])]
data_grouped['sin_day_of_week'] = np.sin(2 * np.pi * data_grouped['day_of_week'] / 10)
data_grouped['cos_day_of_week'] = np.cos(2 * np.pi * data_grouped['day_of_week'] / 10)


In [3]:


# Initialize MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale the kWhDelivered and cyclical features
scaled_features = scaler.fit_transform(data_grouped[['scaled_kWhDelivered', 'sin_day_of_week', 'cos_day_of_week']].values)

# Function to create dataset with look_back
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# Specify the look_back window
look_back = 2

# Prepare the dataset for training
dataset = scaled_features.astype('float32')

# Split into train and test sets
train_size = int(len(dataset) * 0.90)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]

# Reshape into X=t and Y=t+1
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test = create_dataset(test, look_back)

# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], look_back, X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], look_back, X_test.shape[2]))




In [None]:
from keras.layers import GRU
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Conv1D, MaxPooling1D, Flatten
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
import keras.backend as K



def custom_mae(y_true, y_pred):
    return K.mean(K.abs(y_true - y_pred), axis=-1)
def cnn_bilstm(input_shape):
    # Model Definition with CNN and Bidirectional LSTM
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='LeakyReLU', input_shape=(look_back, X_train.shape[2])))
    model.add(Bidirectional(LSTM(150)))
    model.add(Dense(1))
    model.compile(loss=custom_mae, optimizer='adam')
    return model


# Create a function for each model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(150, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(loss=custom_mae, optimizer='adam')
    return model

def create_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(150, input_shape=input_shape))
    model.add(Dense(1))
    model.compile(loss=custom_mae, optimizer='adam')
    return model

# For XGBoost and SVM, reshape data to 2D
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Train each model and get MAE
results = {}



# cnnbilstm
cnnlstm_model = cnn_bilstm((look_back, X_train.shape[2]))
cnnlstm_model.fit(X_train, Y_train, epochs=200, batch_size=1, verbose=0)
cnnlstm_predict = cnnlstm_model.predict(X_test)
cnnlstm_mae = mean_absolute_error(Y_test, cnnlstm_predict)
results['CNNLSTM'] = cnnlstm_mae


# LSTM
lstm_model = create_lstm_model((look_back, X_train.shape[2]))
lstm_model.fit(X_train, Y_train, epochs=200, batch_size=1, verbose=0)
lstm_predict = lstm_model.predict(X_test)
lstm_mae = mean_absolute_error(Y_test, lstm_predict)
results['LSTM'] = lstm_mae

# GRU
gru_model = create_gru_model((look_back, X_train.shape[2]))
gru_model.fit(X_train, Y_train, epochs=200, batch_size=1, verbose=0)
gru_predict = gru_model.predict(X_test)
gru_mae = mean_absolute_error(Y_test, gru_predict)
results['GRU'] = gru_mae

# XGBoost
xgb_model = XGBRegressor()
xgb_model.fit(X_train_flat, Y_train)
xgb_predict = xgb_model.predict(X_test_flat)
xgb_mae = mean_absolute_error(Y_test, xgb_predict)
results['XGBoost'] = xgb_mae

# SVM
svm_model = SVR()
svm_model.fit(X_train_flat, Y_train)
svm_predict = svm_model.predict(X_test_flat)
svm_mae = mean_absolute_error(Y_test, svm_predict)
results['SVM'] = svm_mae

# Create a DataFrame to display results
results_df = pd.DataFrame(list(results.items()), columns=['Model', 'MAE'])
results_df




In [None]:
data_grouped['scaled_kWhDelivered'].mean()

0.23018418342207428