In [19]:
! pip install tensorflow


Collecting tensorflow
  Downloading tensorflow-2.13.1-cp38-cp38-win_amd64.whl.metadata (2.6 kB)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
  Downloading tensorflow-2.13.0-cp38-cp38-win_amd64.whl.metadata (2.6 kB)
Collecting tensorflow-intel==2.13.0 (from tensorflow)
  Downloading tensorflow_intel-2.13.0-cp38-cp38-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.13.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.13.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.1.21 (from tensorflow-intel==2.13.0->tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow-intel==2.13.0->tensorflow)
  Using cached gast-0.4.0-py3-none-any.whl.

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.4.0 requires fsspec, which is not installed.
grpcio-status 1.66.1 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 4.25.5 which is incompatible.
torch 2.4.0 requires typing-extensions>=4.8.0, but you have typing-extensions 4.5.0 which is incompatible.


In [13]:
# preprocessing/data_preprocessing.py

import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler
import joblib

def preprocess_meteorological_data(file_path):
    # Load data
    met_data = pd.read_csv(file_path)

    # Combine DATE and MST into a single datetime column
    # met_data['Timestamp'] = pd.to_datetime(met_data['DATE'] + ' ' + met_data['MST'])
        # Combine DATE and MST into a single datetime column
    met_data['Timestamp'] = pd.to_datetime(met_data['datetime'].astype(str), format='%Y%m%d%H%M%S')

    # Sort data by Timestamp
    met_data.sort_values('Timestamp', inplace=True)

    # Handle missing values in input features using KNN imputation
    input_features = ['Tower Dry Bulb Temp [deg C]', 'Tower RH [%]', 'Station Pressure [mBar]',
                      'Avg Wind Speed @ 6ft [m/s]', 'Avg Wind Direction @ 6ft [deg from N]']

    # Initialize KNN imputer
    imputer = KNNImputer(n_neighbors=5)

    # Fit and transform the input features
    met_data_imputed = imputer.fit_transform(met_data[input_features])

    # Update the DataFrame with imputed values
    met_data[input_features] = met_data_imputed

    # Handle missing values in the target variable separately
    target_variable = 'Global CMP22 (vent/cor) [W/m^2]'

    # Option 1: Interpolate missing target values
    # met_data[target_variable].interpolate(method='time', inplace=True)

    # Option 2: Drop rows with missing target values (uncomment if preferred)
    met_data.dropna(subset=[target_variable], inplace=True)

    # Rename columns for simplicity
    met_data.rename(columns={
        'Tower Dry Bulb Temp [deg C]': 'Temperature',
        'Tower RH [%]': 'Humidity',
        'Station Pressure [mBar]': 'Pressure',
        'Avg Wind Speed @ 6ft [m/s]': 'Wind Speed',
        'Avg Wind Direction @ 6ft [deg from N]': 'Wind Direction',
        'Global CMP22 (vent/cor) [W/m^2]': 'Irradiance'
    }, inplace=True)

    # Feature scaling for input features
    # Normalizes numerical features to a 0-1 range using Min-Max scaling.
    scaler = MinMaxScaler()
    met_data[['Temperature', 'Humidity', 'Pressure', 'Wind Speed']] = scaler.fit_transform(
        met_data[['Temperature', 'Humidity', 'Pressure', 'Wind Speed']])
    joblib.dump(scaler, 'scaler_y.pkl')

    # Wind Direction encoding (convert degrees to sine and cosine components)
    # Converts wind direction from degrees to sine and cosine components to handle its circular nature.
    met_data['Wind Dir Sin'] = np.sin(np.deg2rad(met_data['Wind Direction']))
    met_data['Wind Dir Cos'] = np.cos(np.deg2rad(met_data['Wind Direction']))
    met_data.drop('Wind Direction', axis=1, inplace=True)

    # Temporal features
    # Extracts hour of the day and day of the year from the Timestamp.
	# Normalizes these features.
    met_data['Hour'] = met_data['Timestamp'].dt.hour / 23.0  # Normalize Hour
    met_data['DayOfYear'] = met_data['Timestamp'].dt.dayofyear / 365.0  # Normalize DayOfYear

    # Prepare target variables (future irradiance)
    target = 'Irradiance'
    for minutes in [5, 15, 30, 60]:
        met_data[f'Irradiance_{minutes}min_ahead'] = met_data[target].shift(-minutes)

    # Drop rows with any remaining missing values (after shifting)
    met_data.dropna(inplace=True)

    # Reset index and return the processed DataFrame
    return met_data.reset_index(drop=True)

In [16]:
# preprocessing/image_preprocessing.py

import cv2
import numpy as np
import glob
import os
import pandas as pd

def preprocess_images(image_folder):
    image_paths = sorted(glob.glob(os.path.join(image_folder, '*.jpg')))
    images = []
    image_timestamps = []

    for path in image_paths:
        # Extract timestamp from image filename
        # Assuming filename format: image_YYYYMMDD_HHMM.jpg
        filename = os.path.basename(path)
        timestamp_str = filename.replace('.jpg', '')
        timestamp = pd.to_datetime(timestamp_str, format='%Y%m%d%H%M%S')

        img = cv2.imread(path)
        if img is None:
            continue  # Skip if the image is not readable
        # Iterates over each image path, reads the image using OpenCV, and resizes it to 128x128 pixels
	    # Normalizes pixel values to the range [0, 1] by dividing by 255
        img = cv2.resize(img, (128, 128))
        img = img / 255.0  # Normalize pixel values
        images.append(img)
        image_timestamps.append(timestamp)

    return images, image_timestamps

In [12]:
met_data = preprocess_meteorological_data('../combined_data.csv')
met_data
# images, image_timestamps = preprocess_images('data/images/')

Unnamed: 0,datetime,Irradiance,Temperature,Humidity,Wind Speed,Pressure,Timestamp,Wind Dir Sin,Wind Dir Cos,Hour,DayOfYear,Irradiance_5min_ahead,Irradiance_15min_ahead,Irradiance_30min_ahead,Irradiance_60min_ahead
0,20230101000000,-0.899771,0.426223,0.616318,0.048901,0.250588,2023-01-01 00:00:00,-0.763796,-0.645458,0.000000,0.002740,-0.840211,-0.794020,-0.647894,-0.716989
1,20230101000100,-0.876596,0.425231,0.618392,0.039946,0.250559,2023-01-01 00:01:00,-0.793353,-0.608761,0.000000,0.002740,-0.889579,-0.809629,-0.621173,-0.776699
2,20230101000200,-0.858901,0.423942,0.621088,0.063700,0.250568,2023-01-01 00:02:00,-0.685818,-0.727773,0.000000,0.002740,-0.926488,-0.844199,-0.645756,-0.811686
3,20230101000300,-0.843903,0.423381,0.623266,0.063164,0.250575,2023-01-01 00:03:00,-0.799685,-0.600420,0.000000,0.002740,-0.958692,-0.872226,-0.731193,-0.821406
4,20230101000400,-0.859976,0.422571,0.625858,0.065147,0.250555,2023-01-01 00:04:00,-0.829038,-0.559193,0.000000,0.002740,-0.977529,-0.854995,-0.716767,-0.846818
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
987775,20241116225500,-0.748687,0.433245,0.537300,0.038445,0.254646,2024-11-16 22:55:00,-0.810042,-0.586372,0.956522,0.879452,-0.776067,-0.733515,-0.750811,-0.600193
987776,20241116225600,-0.760332,0.433526,0.532219,0.039410,0.254645,2024-11-16 22:56:00,-0.811064,-0.584958,0.956522,0.879452,-0.746704,-0.728514,-0.749872,-0.552789
987777,20241116225700,-0.771023,0.433890,0.533671,0.020643,0.254641,2024-11-16 22:57:00,-0.876307,-0.481754,0.956522,0.879452,-0.755439,-0.744534,-0.756626,-0.577916
987778,20241116225800,-0.757686,0.434154,0.531700,0.044611,0.254643,2024-11-16 22:58:00,-0.973579,-0.228351,0.956522,0.879452,-0.763985,-0.745426,-0.747452,-0.572389


In [17]:
images, image_timestamps = preprocess_images('../training_images/')
print(len(images))
print(len(image_timestamps))


47416
47416


In [24]:
print(images[0])

[[[0.07843137 0.0745098  0.08627451]
  [0.09019608 0.09019608 0.09019608]
  [0.0627451  0.0627451  0.0627451 ]
  ...
  [0.09803922 0.09803922 0.09803922]
  [0.0745098  0.0745098  0.0745098 ]
  [0.08627451 0.08627451 0.08627451]]

 [[0.92941176 0.92941176 0.92941176]
  [0.92941176 0.92941176 0.92941176]
  [0.89019608 0.89019608 0.89019608]
  ...
  [0.0627451  0.0627451  0.0627451 ]
  [0.05882353 0.05882353 0.05882353]
  [0.05882353 0.05882353 0.05882353]]

 [[0.30980392 0.30980392 0.30980392]
  [0.30588235 0.30588235 0.30588235]
  [0.27058824 0.27058824 0.27058824]
  ...
  [0.0745098  0.0745098  0.0745098 ]
  [0.0745098  0.0745098  0.0745098 ]
  [0.07058824 0.07058824 0.07058824]]

 ...

 [[0.07058824 0.07058824 0.07058824]
  [0.07058824 0.07058824 0.07058824]
  [0.07058824 0.07058824 0.07058824]
  ...
  [0.08235294 0.08235294 0.08235294]
  [0.07843137 0.07843137 0.07843137]
  [0.0627451  0.0627451  0.0627451 ]]

 [[0.07843137 0.07843137 0.07843137]
  [0.06666667 0.06666667 0.06666667]


In [31]:
# training/train_model.py

import numpy as np
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from models.hybrid import create_hybrid_model
import pandas as pd

def train_hybrid_model(met_data, images, image_timestamps):
    # Prepare data
    sequence_length = 60  # Number of past minutes to consider
    features = ['Temperature', 'Humidity', 'Pressure', 'Wind Speed', 'Wind Dir Sin', 'Wind Dir Cos',
                'Hour', 'DayOfYear']

    # Align images with meteorological data
    met_data = align_data_with_images(met_data, images, image_timestamps)

    # Extract features and targets
    X_num = met_data[features].values
    y = met_data[[f'Irradiance_{minutes}min_ahead' for minutes in [5, 15, 30, 60]]].values
    X_img = np.array(met_data['Image'].tolist())

    # Create sequences
    def create_sequences(X_num, X_img, y, seq_length):
        X_num_seq, X_img_seq, y_seq = [], [], []
        for i in range(len(X_num) - seq_length):
            X_num_seq.append(X_num[i:i+seq_length])
            X_img_seq.append(X_img[i+seq_length-1])  # Use image at the last timestamp in the sequence
            y_seq.append(y[i+seq_length-1])
        return np.array(X_num_seq), np.array(X_img_seq), np.array(y_seq)

    X_num_seq, X_img_seq, y_seq = create_sequences(X_num, X_img, y, sequence_length)

    # Train-test split
    split_index = int(0.8 * len(X_num_seq))
    X_num_train, X_num_test = X_num_seq[:split_index], X_num_seq[split_index:]
    X_img_train, X_img_test = X_img_seq[:split_index], X_img_seq[split_index:]
    y_train, y_test = y_seq[:split_index], y_seq[split_index:]

    # Create model
    num_features = X_num_train.shape[2]
    model = create_hybrid_model(sequence_length, num_features)

    # Compile model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

    # Callbacks
    early_stopping = EarlyStopping(patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(factor=0.5, patience=3)

    # Train model
    history = model.fit(
        [X_img_train, X_num_train],
        y_train,
        validation_data=([X_img_test, X_num_test], y_test),
        epochs=50,
        batch_size=32,
        callbacks=[early_stopping, reduce_lr]
    )

    return model, history

def align_data_with_images(met_data, images, image_timestamps):
    # Ensure all inputs are lists/arrays
    images = list(images)
    image_timestamps = list(image_timestamps)

    # Create a DataFrame for image timestamps
    image_df = pd.DataFrame({
        'Timestamp': image_timestamps, 
        'Image': [tuple(img.flatten()) for img in images]  # Convert images to tuples
    })
    image_df['Timestamp'] = pd.to_datetime(image_df['Timestamp'])

    # Merge meteorological data with images
    met_data['Timestamp'] = pd.to_datetime(met_data['Timestamp'])
    merged_data = pd.merge_asof(
        met_data.sort_values('Timestamp'),
        image_df.sort_values('Timestamp'),
        on='Timestamp',
        direction='backward'
    )

    # If any images are missing, create a Series with the first image
    if merged_data['Image'].isnull().any():
        first_image_tuple = tuple(images[0].flatten())
        merged_data['Image'] = merged_data['Image'].fillna(first_image_tuple)

    # Convert tuples back to numpy arrays
    merged_data['Image'] = merged_data['Image'].apply(lambda x: np.array(x).reshape(images[0].shape))

    return merged_data

In [35]:
model, history = train_hybrid_model(met_data, images, image_timestamps)

MemoryError: 

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

def evaluate_model(model, met_data, images):
    # Prepare data (similar to training data preparation)
    sequence_length = 60
    features = ['Temperature', 'Humidity', 'Pressure', 'Wind Speed', 'Wind Dir Sin', 'Wind Dir Cos',
                'Hour', 'DayOfYear']  # Exclude direct current irradiance as a feature

    X_num = met_data[features].values
    y = met_data[[f'Irradiance_{minutes}min_ahead' for minutes in [5, 15, 30, 60]]].values
    X_img = np.array(images)

    # Create sequences
    def create_sequences(X_num, X_img, y, seq_length):
        X_num_seq, X_img_seq, y_seq = [], [], []
        for i in range(len(X_num) - seq_length):
            X_num_seq.append(X_num[i:i+seq_length])
            X_img_seq.append(X_img[i+seq_length-1])
            y_seq.append(y[i+seq_length-1])
        return np.array(X_num_seq), np.array(X_img_seq), np.array(y_seq)

    X_num_seq, X_img_seq, y_seq = create_sequences(X_num, X_img, y, sequence_length)

    # Use the last 20% for evaluation
    split_index = int(0.8 * len(X_num_seq))
    X_num_test = X_num_seq[split_index:]
    X_img_test = X_img_seq[split_index:]
    y_test = y_seq[split_index:]

    # Predictions
    y_pred = model.predict([X_img_test, X_num_test])

    # Calculate RMSE and MAE for each horizon
    horizons = [5, 15, 30, 60]
    for i, minutes in enumerate(horizons):
        rmse = np.sqrt(mean_squared_error(y_test[:, i], y_pred[:, i]))
        mae = mean_absolute_error(y_test[:, i], y_pred[:, i])
        print(f'{minutes}-Minute Ahead Prediction - RMSE: {rmse:.2f}, MAE: {mae:.2f}')

        # Plot actual vs predicted
        plt.figure(figsize=(10, 4))
        plt.plot(y_test[:, i], label='Actual')
        plt.plot(y_pred[:, i], label='Predicted')
        plt.title(f'{minutes}-Minute Ahead Prediction')
        plt.xlabel('Samples')
        plt.ylabel('Irradiance (W/m^2)')
        plt.legend()
        plt.show()

In [34]:
import tensorflow as tf

# Explicitly allow GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            print(gpu)
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)