In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    
    # Normalize the temperature columns only
    temp_columns = ['Temperature']
    # data[temp_columns] = (data[temp_columns] - data[temp_columns].mean()) / data[temp_columns].std()

    return data

# Split the data into train and test sets
def split_data(data, test_size=0.5):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Preprocess the dataset
data = preprocess_data(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Initialize lists to store results
results = []
k =1
# Train and evaluate model for each ID
for id_ in train_data.keys():  # Corrected slicing
    # Prepare training and testing data
    print(f"{k} Processing ID: {id_}")
    k =k+1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
    
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Save predictions to a CSV file
results_df.to_csv('phase1_with_survey_method1_cnn_lstm_5050.csv', index=False)

# Calculate overall metrics
overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
overall_rmse = math.sqrt(overall_mse)

print(f"Overall Test Loss (MSE): {overall_mse}")
print(f"Overall Test Loss (MAE): {overall_mae}")
print(f"Overall Test Loss (RMSE): {overall_rmse}")


1 Processing ID: Exp_737
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
2 Processing ID: Exp_93
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
3 Processing ID: Exp_62
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
4 Processing ID: Exp_208
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
5 Processing ID: Exp_17
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
6 Processing ID: Exp_529
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
7 Processing ID: Exp_49
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
8 Processing ID: Exp_587
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
9 Processing ID: Exp_460
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
10 Processing ID: Exp_567
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
11 Processing

ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    
    return data

# Split the data into train and test sets
def split_data(data, test_size=0.5):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Preprocess the dataset
data = preprocess_data(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Initialize lists to store results
results = []
k =1
# Train and evaluate model for each ID
for id_ in train_data.keys():  # Corrected slicing
    # Prepare training and testing data
    print(f"{k} Processing ID: {id_}")
    k =k+1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])
    
    # Debug: Print shapes of sequences and targets
    print(f"Train sequences shape: {train_sequences.shape}")
    print(f"Train targets shape: {train_targets.shape}")
    print(f"Test sequences shape: {test_sequences.shape}")
    print(f"Test targets shape: {test_targets.shape}")
    
    # Check if sequences are valid
    if train_sequences.shape[0] == 0 or test_sequences.shape[0] == 0:
        print(f"No valid sequences for ID: {id_}")
        continue
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
    print("predictions",predictions)
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Debug: Check if DataFrame is empty
if results_df.empty:
    print("Results DataFrame is empty after dropping NaN values. Exiting.")
else:
    # Save predictions to a CSV file
    results_df.to_csv('phase1_with_survey_method1_cnn_lstm_5050.csv', index=False)

    # Calculate overall metrics
    overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_rmse = math.sqrt(overall_mse)

    print(f"Overall Test Loss (MSE): {overall_mse}")
    print(f"Overall Test Loss (MAE): {overall_mae}")
    print(f"Overall Test Loss (RMSE): {overall_rmse}")


1 Processing ID: Exp_737
Train sequences shape: (876, 24, 5)
Train targets shape: (876,)
Test sequences shape: (876, 24, 5)
Test targets shape: (876,)
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
predictions [[-0.00325875]
 [ 0.04674207]
 [-0.02253164]
 [ 0.0273657 ]
 [ 0.11811745]
 [ 0.34181404]
 [ 0.49983376]
 [ 0.5841862 ]
 [ 0.54883903]
 [ 0.3792979 ]
 [ 0.2951541 ]
 [ 0.20423946]
 [ 0.2127769 ]
 [ 0.15028964]
 [ 0.12549342]
 [ 0.08347864]
 [ 0.07955378]
 [ 0.04013585]
 [ 0.08443538]
 [ 0.06396447]
 [ 0.09174342]
 [ 0.06225422]
 [ 0.10787893]
 [ 0.06752487]
 [ 0.05571224]
 [ 0.01765694]
 [ 0.06767689]
 [ 0.12581778]
 [ 0.3102923 ]
 [ 0.6197976 ]
 [ 0.6541016 ]
 [ 0.7458354 ]
 [ 0.70419395]
 [ 0.79029083]
 [ 0.7732643 ]
 [ 0.78372574]
 [ 0.6504947 ]
 [ 0.56776744]
 [ 0.52874494]
 [ 0.5231112 ]
 [ 0.44860792]
 [ 0.40830886]
 [ 0.43613976]
 [ 0.4120856 ]
 [ 0.37265354]
 [ 0.33727592]
 [ 0.25970036]
 [ 0.23993526]
 [ 0.23785292]
 [ 0.28653467]
 [ 0.28689396

In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    
    return data

# Split the data into train and test sets
def split_data(data, test_size=0.25):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Preprocess the dataset
data = preprocess_data(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Initialize lists to store results
results = []
k =1
# Train and evaluate model for each ID
for id_ in train_data.keys():  # Corrected slicing
    # Prepare training and testing data
    print(f"{k} Processing ID: {id_}")
    k =k+1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])
    

    # Check if sequences are valid
    if train_sequences.shape[0] == 0 or test_sequences.shape[0] == 0:
        print(f"No valid sequences for ID: {id_}")
        continue
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
  
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Debug: Check if DataFrame is empty
if results_df.empty:
    print("Results DataFrame is empty after dropping NaN values. Exiting.")
else:
    # Save predictions to a CSV file
    results_df.to_csv('phase1_with_survey_method1_cnn_lstm_7525.csv', index=False)

    # Calculate overall metrics
    overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_rmse = math.sqrt(overall_mse)

    print(f"Overall Test Loss (MSE): {overall_mse}")
    print(f"Overall Test Loss (MAE): {overall_mae}")
    print(f"Overall Test Loss (RMSE): {overall_rmse}")


1 Processing ID: Exp_737
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step
2 Processing ID: Exp_93
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step
3 Processing ID: Exp_62
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step
4 Processing ID: Exp_208
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step
5 Processing ID: Exp_17
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
6 Processing ID: Exp_529
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step
7 Processing ID: Exp_49
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 117ms/step
8 Processing ID: Exp_587
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step
9 Processing ID: Exp_460
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
10 Processing ID: Exp_567
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
11 Processin

In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    
    return data

# Split the data into train and test sets
def split_data(data, test_size=0.2):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Preprocess the dataset
data = preprocess_data(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Initialize lists to store results
results = []
k =1
# Train and evaluate model for each ID
for id_ in train_data.keys():  # Corrected slicing
    # Prepare training and testing data
    print(f"{k} Processing ID: {id_}")
    k =k+1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])
    

    # Check if sequences are valid
    if train_sequences.shape[0] == 0 or test_sequences.shape[0] == 0:
        print(f"No valid sequences for ID: {id_}")
        continue
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
  
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Debug: Check if DataFrame is empty
if results_df.empty:
    print("Results DataFrame is empty after dropping NaN values. Exiting.")
else:
    # Save predictions to a CSV file
    results_df.to_csv('phase1_with_survey_method1_cnn_lstm_8020.csv', index=False)

    # Calculate overall metrics
    overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_rmse = math.sqrt(overall_mse)

    print(f"Overall Test Loss (MSE): {overall_mse}")
    print(f"Overall Test Loss (MAE): {overall_mae}")
    print(f"Overall Test Loss (RMSE): {overall_rmse}")


1 Processing ID: Exp_737
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
2 Processing ID: Exp_93
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step
3 Processing ID: Exp_62
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
4 Processing ID: Exp_208
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
5 Processing ID: Exp_17
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
6 Processing ID: Exp_529
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
7 Processing ID: Exp_49
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
8 Processing ID: Exp_587
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
9 Processing ID: Exp_460
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
10 Processing ID: Exp_567
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step
11 Processing

In [8]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, StandardScaler
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded', 'electrically_heated','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    
    return data

# Split the data into train and test sets
def split_data(data, test_size=0.2):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()[:35]
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Normalize the input features
def normalize_data(train_data, test_data):
    scaler = StandardScaler()
    for id_ in train_data.keys():
        train_data[id_][['Demand_kWh', 'Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded', 'no_of_people']] = \
            scaler.fit_transform(train_data[id_][['Demand_kWh', 'Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded', 'no_of_people']])
        
        test_data[id_][['Demand_kWh', 'Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded', 'no_of_people']] = \
            scaler.transform(test_data[id_][['Demand_kWh', 'Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded', 'no_of_people']])
    return train_data, test_data

# Preprocess the dataset
data = preprocess_data(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Normalize the data
train_data, test_data = normalize_data(train_data, test_data)

# Initialize lists to store results
results = []
k =1
# Train and evaluate model for each ID
for id_ in train_data.keys():  # Corrected slicing
    # Prepare training and testing data
    print(f"{k} Processing ID: {id_}")
    k =k+1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])
    

    
    # Check if sequences are valid
    if train_sequences.shape[0] == 0 or test_sequences.shape[0] == 0:
        print(f"No valid sequences for ID: {id_}")
        continue
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    history = model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=1)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
    
    # Debug: Check if predictions contain NaN
    if np.isnan(predictions).any():
        print(f"Predictions contain NaN values for ID: {id_}")
    
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Debug: Check if DataFrame is empty
if results_df.empty:
    print("Results DataFrame is empty after dropping NaN values. Exiting.")
else:
    # Save predictions to a CSV file
    results_df.to_csv('phase1_with_survey_method1_cnn_lstm_5050.csv', index=False)

    # Calculate overall metrics
    overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_rmse = math.sqrt(overall_mse)

    print(f"Overall Test Loss (MSE): {overall_mse}")
    print(f"Overall Test Loss (MAE): {overall_mae}")
    print(f"Overall Test Loss (RMSE): {overall_rmse}")


1 Processing ID: Exp_737


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, StandardScaler
import math
import warnings
warnings.filterwarnings("ignore")

# Load the dataset
file_path = 'phase_1_data_with_survey.csv'
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['home_size_encoded'] = label_encoder.fit_transform(data['home_size'])
data['electric_car_encoded'] = data['electric_car'].map({'Yes': 1, 'No': 0})
data['electrically_heated_encoded'] = data['electrically_heated'].map({'Yes': 1, 'No': 0})

# Function to create sequences for CNN-LSTM model
def create_sequences(data, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length][['Demand_kWh', 'Temperature','home_size_encoded', 'electric_car_encoded','no_of_people']].values
        target = data.iloc[i+seq_length]['Demand_kWh']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define CNN-LSTM model
def create_cnn_lstm_model(input_shape):
    model = Sequential([
        Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu', return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Preprocess the data
def preprocess_data(data):
    # Ensure the data is sorted by Date
    data = data.sort_values(by=['From']).reset_index(drop=True)
    return data

# Split the data into train and test sets
def split_data(data, test_size=0.5):
    train_data = {}
    test_data = {}

    ids = data['ID'].unique()[:35]
    
    for id_ in ids:
        id_data = data[data['ID'] == id_]
        train, test = train_test_split(id_data, test_size=test_size, shuffle=False)
        train_data[id_] = train 
        test_data[id_] = test
    
    return train_data, test_data

# Normalize the input features
def normalize_data(train_data, test_data):
    scaler = StandardScaler()
    for id_ in train_data.keys():
        features = ['Temperature', 'home_size_encoded', 'electric_car_encoded', 'no_of_people']
        
        train_features = train_data[id_][features]
        test_features = test_data[id_][features]
        
        train_data[id_][features] = scaler.fit_transform(train_features)
        test_data[id_][features] = scaler.transform(test_features)
    return train_data, test_data

# Check for NaN values in the dataset
def check_nan(data):
    nan_values = data.isna().sum()
    print("NaN values in the dataset:")
    print(nan_values)
    if nan_values.any() > 0:
        print("Warning: NaN values found in the dataset.")
        data = data.dropna()
    return data

# Preprocess the dataset
data = preprocess_data(data)
data = check_nan(data)

# Split the data into train and test sets
train_data, test_data = split_data(data)

# Normalize the data
train_data, test_data = normalize_data(train_data, test_data)

# Initialize lists to store results
results = []
k = 1
# Train and evaluate model for each ID
for id_ in train_data.keys():
    print(f"{k} Processing ID: {id_}")
    k += 1
    train_sequences, train_targets = create_sequences(train_data[id_])
    test_sequences, test_targets = create_sequences(test_data[id_])

    # Debug: Print shapes of sequences and targets
    print(f"Train sequences shape: {train_sequences.shape}")
    print(f"Train targets shape: {train_targets.shape}")
    print(f"Test sequences shape: {test_sequences.shape}")
    print(f"Test targets shape: {test_targets.shape}")

    # Check if sequences are valid
    if train_sequences.shape[0] == 0 or test_sequences.shape[0] == 0:
        print(f"No valid sequences for ID: {id_}")
        continue
    
    # Reshape input data for the CNN-LSTM model
    input_shape = (train_sequences.shape[1], train_sequences.shape[2])
    
    # Create and train the model
    model = create_cnn_lstm_model(input_shape)
    history = model.fit(train_sequences, train_targets, epochs=50, batch_size=32, validation_split=0.2, verbose=1)
    
    # Predict on the test data
    predictions = model.predict(test_sequences)
    
    # Debug: Check if predictions contain NaN
    if np.isnan(predictions).any():
        print(f"Predictions contain NaN values for ID: {id_}")
    
    # Store results
    for i in range(len(test_targets)):
        results.append([id_, test_data[id_].iloc[i + 24]['From'], test_targets[i], predictions[i][0]])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['ID', 'Date', 'Actual_Demand', 'Predicted_Demand'])

# Identify NaN values
nan_values = results_df.isna().sum()
print("NaN values before handling:")
print(nan_values)

# Drop rows with NaN values
results_df = results_df.dropna()

# Debug: Check if DataFrame is empty
if results_df.empty:
    print("Results DataFrame is empty after dropping NaN values. Exiting.")
else:
    # Save predictions to a CSV file
    results_df.to_csv('phase1_with_survey_method1_cnn_lstm_5050.csv', index=False)

    # Calculate overall metrics
    overall_mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
    overall_rmse = math.sqrt(overall_mse)

    print(f"Overall Test Loss (MSE): {overall_mse}")
    print(f"Overall Test Loss (MAE): {overall_mae}")
    print(f"Overall Test Loss (RMSE): {overall_rmse}")


NaN values in the dataset:
ID                                 0
From                               0
Date                               0
Hour                               0
Participation_Phase                0
Demand_kWh                         0
Temperature                        0
home_size                          0
electric_car                       0
no_of_people                       0
electrically_heated                0
home_size_encoded                  0
electric_car_encoded               0
electrically_heated_encoded    64800
dtype: int64
NaN values before handling:
ID                  0
Date                0
Actual_Demand       0
Predicted_Demand    0
dtype: int64
Results DataFrame is empty after dropping NaN values. Exiting.
