In [12]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
import numpy as np

In [13]:
# Load datasets
energy_data = pd.read_csv('data/energy_dataset.csv')
weather_data = pd.read_csv('data/weather_features.csv')

In [14]:
# Convert time columns to datetime format with UTC
energy_data['time'] = pd.to_datetime(energy_data['time'], utc=True)
weather_data['dt_iso'] = pd.to_datetime(weather_data['dt_iso'], utc=True)

In [15]:
# Ensure the 'time' and 'dt_iso' columns are sorted
energy_data = energy_data.sort_values('time')
weather_data = weather_data.sort_values('dt_iso')

In [16]:
# Merge datasets on the datetime column (asof merge for time-series data)
data = pd.merge_asof(energy_data, weather_data, left_on='time', right_on='dt_iso')

In [17]:
# Drop columns with all missing values
all_missing_cols = data.columns[data.isnull().all()].tolist()
data = data.drop(columns=all_missing_cols)

In [None]:
# Handle missing values using SimpleImputer (mean strategy)
imputer = SimpleImputer(strategy='mean')
# Select columns with numeric types for imputation
numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
data_imputed = pd.DataFrame(imputer.fit_transform(data[numeric_cols]), columns=numeric_cols)

In [19]:
# Add back non-numeric columns (if any were dropped) and re-align the DataFrame
non_numeric_cols = data.drop(columns=numeric_cols).columns
data_imputed = pd.concat([data_imputed, data[non_numeric_cols].reset_index(drop=True)], axis=1)

In [20]:
# Normalize features for QNN using MinMaxScaler
scaler = MinMaxScaler()
data_imputed[numeric_cols] = scaler.fit_transform(data_imputed[numeric_cols])

In [21]:
# Convert the preprocessed data to numpy arrays for QNN input
X_qnn = np.array(data_imputed[numeric_cols])

In [22]:
# Save preprocessed data to CSV
data_imputed.to_csv('data_preprocessed_for_qnn.csv', index=False)
# Print the shape of the final preprocessed data for QNN
print(f"Preprocessed data shape for QCNN: {X_qnn.shape}")

Preprocessed data shape for QCNN: (35064, 38)
