In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import os
import tifffile as tiff
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Input, Concatenate, BatchNormalization, Dropout, GlobalAveragePooling2D, Add, LeakyReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.initializers import HeNormal, HeUniform
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam

# tf.keras.backend.clear_session()

print("split started")
# Load the dataset
df = pd.read_csv("datasets/train.csv")

print(f'Number of features: {df.shape[1]}')
print(f'Number of instances: {df.shape[0]}')
df.head()

# Preprocess numerical data
numerical_features = df[['elevation', 'ozone', 'NO2', 'azimuth', 'zenith', 'incidence_azimuth', 'incidence_zenith']]

# Numerical data scaling
scaler = StandardScaler()
numerical_features = scaler.fit_transform(numerical_features)

# Function to load and preprocess image data
def load_and_preprocess_image(filepath):
    img = tiff.imread(filepath)
    img_array = np.array(img)
    img_array = img_array / 65535.0   # Normalize pixel values
    return img_array

# Load image data
image_data = np.array([load_and_preprocess_image(os.path.join('./train/', filename)) for filename in df['file_name_l1']])

# Target variable
target = df['value_550'].values

# Split data into training, validation, and testing sets without stratification
from sklearn.model_selection import train_test_split

X_train_num, X_temp_num, X_train_img, X_temp_img, y_train, y_temp = train_test_split(numerical_features, image_data, target, test_size=0.3, random_state=42)
X_val_num, X_test_num, X_val_img, X_test_img, y_val, y_test = train_test_split(X_temp_num, X_temp_img, y_temp, test_size=0.5, random_state=42)

print("split done")

# Define the CNN and dense model
class AOTModel:
    def __init__(self, image_shape=(19, 19, 13), num_numerical_features=7):
        # Image processing Neural Network
        self.image_input = Input(shape=image_shape)
        initializer = HeUniform()
        
        image_processing_network = Conv2D(32, (3, 3), kernel_regularizer=l2(0.01), kernel_initializer=initializer)(self.image_input)
        image_processing_network = BatchNormalization()(image_processing_network)
        image_processing_network = LeakyReLU()(image_processing_network)
        image_processing_network = MaxPooling2D((2, 2))(image_processing_network)
        image_processing_network = Dropout(0.25)(image_processing_network)

        image_processing_network = Conv2D(64, (3, 3), kernel_regularizer=l2(0.01), kernel_initializer=initializer)(image_processing_network)
        image_processing_network = BatchNormalization()(image_processing_network)
        image_processing_network = LeakyReLU()(image_processing_network)
        image_processing_network = MaxPooling2D((2, 2))(image_processing_network)
        image_processing_network = Dropout(0.25)(image_processing_network)

        image_processing_network = Conv2D(128, (3, 3), kernel_regularizer=l2(0.01), kernel_initializer=initializer)(image_processing_network)
        image_processing_network = BatchNormalization()(image_processing_network)
        image_processing_network = LeakyReLU()(image_processing_network)
        image_processing_network = GlobalAveragePooling2D()(image_processing_network)
        image_processing_network = Dropout(0.5)(image_processing_network)

        # Residual connection
        # residual = Conv2D(128, (1, 1), kernel_regularizer=l2(0.01), kernel_initializer=initializer)(self.image_input)
        # residual = BatchNormalization()(residual)
        # residual = GlobalAveragePooling2D()(residual)
        # image_processing_network = Add()([image_processing_network, residual])

        # Numerical processing Neural Network
        self.numerical_input = Input(shape=(num_numerical_features,))
        numerical_processing_network = Dense(64, activation='relu', kernel_initializer=initializer)(self.numerical_input)
        numerical_processing_network = BatchNormalization()(numerical_processing_network)
        numerical_processing_network = Dropout(0.5)(numerical_processing_network)
        
        numerical_processing_network = Dense(128, activation='relu', kernel_initializer=initializer)(numerical_processing_network)
        numerical_processing_network = BatchNormalization()(numerical_processing_network)
        numerical_processing_network = Dropout(0.5)(numerical_processing_network)

        numerical_processing_network = Dense(64, activation='relu', kernel_initializer=initializer)(numerical_processing_network)
        numerical_processing_network = BatchNormalization()(numerical_processing_network)
        numerical_processing_network = Dropout(0.5)(numerical_processing_network)
        
        # Concatenation of both networks
        aot_network = Concatenate()([image_processing_network, numerical_processing_network])
        aot_network = Dense(64, activation='relu', kernel_initializer=initializer)(aot_network)
        aot_network = Dropout(0.5)(aot_network)
        aot_network = Dense(1, kernel_initializer=initializer)(aot_network)

        self.aot_network_arquitecture = aot_network
        del image_processing_network, numerical_processing_network, aot_network

    def model(self, learning_rate=0.001, optimizer_choice='adam'):
        model = Model(inputs=[self.image_input, self.numerical_input], outputs=self.aot_network_arquitecture)
        
        if optimizer_choice == 'adam':
            optimizer = Adam(learning_rate=learning_rate, clipnorm=1.0)
        elif optimizer_choice == 'rmsprop':
            optimizer = RMSprop(learning_rate=learning_rate, clipnorm=1.0)
        elif optimizer_choice == 'nadam':
            optimizer = Nadam(learning_rate=learning_rate, clipnorm=1.0)
        else:
            optimizer = Adam(learning_rate=learning_rate, clipnorm=1.0)
        
        model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['mae'])
        return model

# Instantiate the model with a chosen optimizer
model = AOTModel().model(learning_rate=0.0001, optimizer_choice='adam')

# Define callbacks
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * np.exp(-0.1)

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
# lr_scheduler = LearningRateScheduler(scheduler)

print("starting training")
# Train the model
history = model.fit(
    [X_train_img, X_train_num], y_train,
    validation_data=([X_val_img, X_val_num], y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

model.save('aot_model.keras')

# Evaluate the model
val_loss, val_mae = model.evaluate([X_val_img, X_val_num], y_val)
print(f'Validation MAE: {val_mae}')

# Evaluate on test set
test_loss, test_mae = model.evaluate([X_test_img, X_test_num], y_test)
print(f'Test MAE: {test_mae}')

2024-06-23 23:10:34.655717: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


split started
Number of features: 10
Number of instances: 10438
split done


2024-06-23 23:10:50.024772: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10689 MB memory:  -> device: 0, name: NVIDIA TITAN V, pci bus id: 0000:5e:00.0, compute capability: 7.0
2024-06-23 23:10:50.025414: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10689 MB memory:  -> device: 1, name: NVIDIA TITAN V, pci bus id: 0000:86:00.0, compute capability: 7.0


starting training
Epoch 1/200


I0000 00:00:1719180657.071169 1786170 service.cc:145] XLA service 0x7f783401fa60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1719180657.071210 1786170 service.cc:153]   StreamExecutor device (0): NVIDIA TITAN V, Compute Capability 7.0
I0000 00:00:1719180657.071217 1786170 service.cc:153]   StreamExecutor device (1): NVIDIA TITAN V, Compute Capability 7.0
2024-06-23 23:10:57.259241: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-06-23 23:10:58.395526: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m 22/229[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 8ms/step - loss: 6.1442 - mae: 1.6667

I0000 00:00:1719180678.288855 1786170 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 102ms/step - loss: 6.0242 - mae: 1.5927 - val_loss: 4.5161 - val_mae: 0.2451 - learning_rate: 1.0000e-04
Epoch 2/200
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 5.5132 - mae: 1.3024 - val_loss: 4.2333 - val_mae: 0.2110 - learning_rate: 1.0000e-04
Epoch 3/200
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 5.0666 - mae: 1.1118 - val_loss: 3.9335 - val_mae: 0.1904 - learning_rate: 1.0000e-04
Epoch 4/200
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 4.5869 - mae: 0.9200 - val_loss: 3.5908 - val_mae: 0.1593 - learning_rate: 1.0000e-04
Epoch 5/200
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 4.1180 - mae: 0.7694 - val_loss: 3.2368 - val_mae: 0.1510 - learning_rate: 1.0000e-04
Epoch 6/200
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 3.6631 - 

In [2]:
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('aot_model.keras')

# Load the new dataset
new_df = pd.read_csv("datasets/test.csv")

# Preprocess numerical data
new_numerical_features = new_df[['elevation', 'ozone', 'NO2', 'azimuth', 'zenith', 'incidence_azimuth', 'incidence_zenith']]
new_numerical_features = scaler.transform(new_numerical_features)

# Load and preprocess new image data
new_image_data = np.array([load_and_preprocess_image(os.path.join('./test/', filename)) for filename in new_df['file_name_l1']])

# Predict values for the new data
predictions = model.predict([new_image_data, new_numerical_features])

# Save the predictions to a CSV file
results = pd.DataFrame({
    'id': new_df['id'],
    'value_550': predictions.flatten()
})
results.to_csv('predictions.csv', index=False)

print(results.head())

[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step
   id  value_550
0   3   0.112031
1  25   0.117409
2  26   0.072785
3  27   0.167566
4  29   0.077490
