In [34]:
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import tensorflow as tf
import datetime
import pandas as pd

In [35]:
patch_size_pixels = 64
model_input_width = 6  # number of images before the event
buffer_size = 100
# num_classes = 6
batch_size = 8


datasets_folder = os.path.join("E:\\Workspace\\Thesis_dataset", "illinois_dataset")
# dataset_folder_name = "Quercus_germany_200_stations"

In [None]:
dataset_folder_path = datasets_folder
os.makedirs(dataset_folder_path, exist_ok=True)

record_id_column_name = 'record_id'
label_column_name = "val_yield"

train_timeseries_file_path = os.path.join(dataset_folder_path, "test_data.npz")
print(train_timeseries_file_path)
train_label_file_path = os.path.join(dataset_folder_path, "test.csv")
print(train_label_file_path)
print("-" * 60)

E:\Workspace\Thesis_dataset\illinois_dataset\test_data.npz
E:\Workspace\Thesis_dataset\illinois_dataset\test.csv
------------------------------------------------------------


In [37]:
npz_file = np.load(train_timeseries_file_path, mmap_mode="r")
first_array_key = next(iter(npz_file))
timeseries_shape = npz_file[first_array_key].shape
timeseries_shape

(6, 64, 64, 15)

In [38]:
label_csv_path = r"E:\Workspace\Thesis_dataset\illinois_dataset\train.csv"
test_label_file_path = r"E:\Workspace\Thesis_dataset\illinois_dataset\test.csv"
record_id_column_name = "id"
label_column_name = "val_yield"
timeseries_npz_path = r"E:\Workspace\Thesis_dataset\illinois_dataset\train_data.npz"
test_timeseries_file_path = r"E:\Workspace\Thesis_dataset\illinois_dataset\train_data.npz"

In [39]:
def data_generator(
    label_csv_path, record_id_column_name, label_column_name, timeseries_npz_path
):

    # Check if the inputs are a byte string and decode it to a regular string if necessary
    if isinstance(label_csv_path, bytes):
        label_csv_path = label_csv_path.decode("utf-8")
    if isinstance(timeseries_npz_path, bytes):
        timeseries_npz_path = timeseries_npz_path.decode("utf-8")
    if isinstance(record_id_column_name, bytes):
        record_id_column_name = record_id_column_name.decode("utf-8")
    if isinstance(label_column_name, bytes):
        label_column_name = label_column_name.decode("utf-8")

    # Load the labels CSV
    labels_df = pd.read_csv(label_csv_path)

    # Load the npz file
    with np.load(timeseries_npz_path, allow_pickle=True) as npz_file:
        for _, row in labels_df.iterrows():
            record_id = str(
                row[record_id_column_name]
            )  # Ensure record_id is treated as a string
            target = row[label_column_name]
            if record_id in npz_file.files:
                # Extract the time series data using record_id
                time_series = npz_file[record_id]
                # print(record_id, target)
                yield time_series, target


# Determine the output types
output_types = (tf.float32, tf.int64)

# Determine the output shapes
output_shapes = (timeseries_shape, ())  #  ((6, 11, 64, 64), ())

# Create train dataset
train_dataset = tf.data.Dataset.from_generator(
    data_generator,  # Generator function
    args=(
        train_label_file_path,
        record_id_column_name,
        label_column_name,
        train_timeseries_file_path,
    ),  # Arguments to pass to the generator
    output_types=output_types,
    output_shapes=output_shapes,
)
train_dataset = (
    train_dataset.shuffle(buffer_size=buffer_size)
    .batch(batch_size)
    .prefetch(tf.data.AUTOTUNE)
)
test_dataset = tf.data.Dataset.from_generator(
    data_generator,  # Generator function
    args=(
        test_label_file_path,
        record_id_column_name,
        label_column_name,
        test_timeseries_file_path,
    ),  # Arguments to pass to the generator
    output_types=output_types,
    output_shapes=output_shapes,
)
test_dataset = test_dataset.batch(batch_size)

In [40]:
def print_dataset_shapes(dataset, dataset_name="Dataset"):
    print(f"--- {dataset_name} ---")
    for batch in dataset.take(1):  # Take a single batch from the dataset
        data, label = batch  # Unpack the batch (time_series, target)
        print(f"Shape of data: {data.shape}")
        print(f"Shape of label: {label.shape}")
# Print shapes for train, validation, and test datasets
print_dataset_shapes(train_dataset, "Train Dataset")
print_dataset_shapes(test_dataset, "Test Dataset")


--- Train Dataset ---
Shape of data: (8, 6, 64, 64, 15)
Shape of label: (8,)
--- Test Dataset ---
Shape of data: (8, 6, 64, 64, 15)
Shape of label: (8,)


In [None]:
# Function to print the first record in the dataset
def print_first_record(dataset, dataset_name="Dataset"):
    print(f"--- First Record from {dataset_name} ---")
    for data, label in dataset.take(1):  # Take a single batch from the dataset
        print(f"Data: {data[0]}")

# Print the first record for the train dataset
print_first_record(train_dataset, "Train Dataset")

--- First Record from Train Dataset ---
Data: [[[[ 5.0000000e+00  1.2860000e+03  1.7970000e+03 ... -1.6407200e+01
     3.4343082e+01  2.2865410e+01]
   [ 5.0000000e+00  1.5090000e+03  2.0270000e+03 ... -1.7137825e+01
     3.4349812e+01  2.2864410e+01]
   [ 5.0000000e+00  1.7120000e+03  2.2330000e+03 ... -1.7570967e+01
     3.4358711e+01  2.2863091e+01]
   ...
   [ 0.0000000e+00  3.5900000e+02  7.5200000e+02 ... -1.8154581e+01
     3.4569382e+01  2.2610416e+01]
   [ 0.0000000e+00  3.6900000e+02  7.6200000e+02 ... -1.5714471e+01
     3.4572651e+01  2.2605965e+01]
   [ 0.0000000e+00  3.8400000e+02  7.7700000e+02 ... -1.3655651e+01
     3.4575119e+01  2.2602600e+01]]

  [[ 5.0000000e+00  1.0910000e+03  1.5730000e+03 ... -1.6018251e+01
     3.4352974e+01  2.2866713e+01]
   [ 5.0000000e+00  1.1890000e+03  1.6510000e+03 ... -1.7177349e+01
     3.4359638e+01  2.2865726e+01]
   [ 5.0000000e+00  1.3260000e+03  1.7800000e+03 ... -1.8147036e+01
     3.4368458e+01  2.2864424e+01]
   ...
   [ 0.0000

In [99]:
import datetime
import tensorflow as tf
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint

def train_model(model, train_dataset,  num_epochs=10):
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    # Custom learning rate scheduler
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.001,
        decay_steps=100000,
        decay_rate=0.96,
        staircase=True
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)


    # Define the path where the best model will be saved
    best_model_filepath = f"{log_dir}/best_model.h5"
    
    # ModelCheckpoint callback to save the best model based on validation accuracy
    checkpoint_callback = ModelCheckpoint(
        best_model_filepath,
        monitor='loss',  # Change to monitor validation accuracy
        save_best_only=True,
        mode='min',  # Change to 'max' for accuracy, since we want the highest value
        verbose=1
    )
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    callbacks_list = [
        tensorboard_callback,
        # LearningRateScheduler(custom_lr_scheduler, verbose=1),
        checkpoint_callback  # Add ModelCheckpoint to callbacks
    ]
    # Train the model
    history = model.fit(
        train_dataset,
        epochs=num_epochs,
        callbacks=callbacks_list,
        # validation_data=val_dataset,
    )

    # Load the best weights from the saved model
    model.load_weights(best_model_filepath)

    # Evaluate the best model on the training dataset
    train_loss, train_accuracy = model.evaluate(train_dataset, verbose=0)
    
    # Evaluate the best model on the validation dataset
    # val_loss, val_accuracy = model.evaluate(val_dataset, verbose=0)

    # Print training and validation accuracy
    print(f"Best model training accuracy: {train_accuracy:.4f}")
    print(f"Best model validation accuracy: {val_accuracy:.4f}")


    # Return the model with the best weights
    return model

In [None]:
# VIT #
import tensorflow as tf
from tensorflow.keras import layers, models

def create_vit_regression(img_size, patch_size, d_model, num_frames, num_layers, num_outputs):
    num_patches = (img_size[0] // patch_size) * (img_size[1] // patch_size)  # Number of patches per frame
    sequence_length = num_frames * num_patches  # Total number of patches (tokens)

    # Define input shape to match the 15 channels
    inputs = layers.Input(shape=(num_frames, img_size[0], img_size[1], 15))  # (batch_size, num_frames, height, width, channels)

    # Flatten spatial dimensions into patches
    x = layers.Reshape((num_frames, num_patches, patch_size * patch_size * 15))(inputs)  # Flatten into patches of size patch_size*patch_size*15

    # Add Linear Projection to d_model dimension
    x = layers.Dense(d_model)(x)  # Project each patch to the d_model dimension
    
    # Add transformer layers
    for _ in range(num_layers):
        # Add Multi-Head Attention
        attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=d_model)(x, x)
        attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output + x)  # Add residual connection
        x = attention_output
    
    # Output layer (regression)
    outputs = layers.Dense(num_outputs)(x)  # Output a single value for regression
    
    # Create the model
    model = tf.keras.Model(inputs, outputs)
    
    return model

# Model Parameters
img_size = (64, 64)  # Image size (64x64)
patch_size = 8  # Patch size (8x8)
d_model = 256  # Dimension of model (output of linear projection)
num_frames = 6  # Number of frames in the input sequence
num_layers = 4  # Number of transformer layers
num_outputs = 1  # Output for regression (e.g., crop yield)

# Create the ViT regression model
model = create_vit_regression(
    img_size=img_size,
    patch_size=patch_size,
    d_model=d_model,
    num_frames=num_frames,
    num_layers=num_layers,
    num_outputs=num_outputs
)

# Display the model summary
model.summary()


In [None]:
def train_vit_model(model, train_dataset, num_epochs=10):
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    # Custom learning rate scheduler
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.0001,
        decay_steps=100000,
        decay_rate=0.96,
        staircase=True
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    best_model_filepath = f"{log_dir}/best_model.h5"
    
    checkpoint_callback = ModelCheckpoint(
        best_model_filepath,
        monitor='loss',  
        save_best_only=True,
        mode='min',
        verbose=1
    )
    
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    callbacks_list = [
        tensorboard_callback,
        checkpoint_callback  
    ]
    
    # Train the model
    history = model.fit(
        train_dataset,
        epochs=num_epochs,
        callbacks=callbacks_list,
    )

    model.load_weights(best_model_filepath)

    train_loss = model.evaluate(train_dataset, verbose=0)

    print(f"Best model training loss: {train_loss:.4f}")

    return model


In [None]:
# Model Parameters
img_size = (64, 64)  
patch_size = 8 
d_model = 256  
num_frames = 6  
num_layers = 4  
num_outputs = 1  

# Create the ViT regression model
model = create_vit_regression(
    img_size=img_size,
    patch_size=patch_size,
    d_model=d_model,
    num_frames=num_frames,
    num_layers=num_layers,
    num_outputs=num_outputs
)

model.summary()

train_vit_model(model, train_dataset, num_epochs=10)


Epoch 1/10
      6/Unknown [1m6s[0m 355ms/step - loss: nan




Epoch 1: loss did not improve from inf
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 429ms/step - loss: nan
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 372ms/step - loss: nan




Epoch 2: loss did not improve from inf
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 453ms/step - loss: nan
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 383ms/step - loss: nan




Epoch 3: loss did not improve from inf
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 449ms/step - loss: nan
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step - loss: nan




Epoch 4: loss did not improve from inf
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 466ms/step - loss: nan
Epoch 5/10
[1m2/6[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m1s[0m 394ms/step - loss: nan

KeyboardInterrupt: 