Horizon 1  Self-Organizing Maps (SOM) with Deep Learning Enhancements code

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h1'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    fft_vals = fft(df[target_col].dropna().values)
    fft_real = np.real(fft_vals)[:len(df[target_col])]
    fft_imag = np.imag(fft_vals)[:len(df[target_col])]
    df['fft_real'] = np.pad(fft_real, (0, len(df) - len(fft_real)), constant_values=np.nan)
    df['fft_imag'] = np.pad(fft_imag, (0, len(df) - len(fft_imag)), constant_values=np.nan)

    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_train.csv"
    test_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.9228   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.9431   [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [35m3        [39m | [35m0.9512   [39m | [35m55.96    [39m | [35m0.1849   [39m | [35m59.09    [39m | [35m0.002242 [39m | [35m8.042    [39m | [35m10.25    [39m |
| [39m4        [39m | [39m0.9268   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9024   [39m | [39m37.89    




===== Evaluation =====
Accuracy: 0.9656
Report:
               precision    recall  f1-score   support

           0     0.9020    0.8214    0.8598        56
           1     0.9545    0.9625    0.9585       240
           2     0.9917    0.9815    0.9866       487
           3     0.7895    1.0000    0.8824        30

    accuracy                         0.9656       813
   macro avg     0.9094    0.9414    0.9218       813
weighted avg     0.9671    0.9656    0.9657       813

Confusion Matrix:
 [[ 46  10   0   0]
 [  5 231   4   0]
 [  0   1 478   8]
 [  0   0   0  30]]

===== Model Metrics =====
Training Time: 9.09s
Inference/sample: 0.000266s
Testing Time: 0.22s
Model Size: 0.06 MB
Trainable Params: 9,316
RAM Usage: 645.81 MB


Horizon 2

In [5]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h2'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1


    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\Lasso_Selected_Features_H2\train_lasso_selected.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\Lasso_Selected_Features_H2\test_lasso_selected.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.9512   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.9675   [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [35m3        [39m | [35m0.9756   [39m | [35m55.96    [39m | [35m0.1849   [39m | [35m59.09    [39m | [35m0.002242 [39m | [35m8.042    [39m | [35m10.25    [39m |
| [39m4        [39m | [39m0.9593   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9756   [39m | [39m37.89    




===== Evaluation =====
Accuracy: 0.7196
Report:
               precision    recall  f1-score   support

           0     0.9302    0.9524    0.9412        42
           1     0.8058    0.8628    0.8333       226
           2     0.9691    0.6169    0.7539       509
           3     0.1765    1.0000    0.3000        36

    accuracy                         0.7196       813
   macro avg     0.7204    0.8580    0.7071       813
weighted avg     0.8866    0.7196    0.7656       813

Confusion Matrix:
 [[ 40   2   0   0]
 [  3 195  10  18]
 [  0  45 314 150]
 [  0   0   0  36]]

===== Model Metrics =====
Training Time: 6.47s
Inference/sample: 0.000365s
Testing Time: 0.30s
Model Size: 0.08 MB
Trainable Params: 14,820
RAM Usage: 1198.39 MB


Horizon 3

In [3]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h3'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()



    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H3_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H3_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.9472   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [39m2        [39m | [39m0.939    [39m | [39m18.79    [39m | [39m0.4465   [39m | [39m80.06    [39m | [39m0.007227 [39m | [39m5.206    [39m | [39m14.7     [39m |
| [35m3        [39m | [35m0.9675   [39m | [35m55.96    [39m | [35m0.1849   [39m | [35m59.09    [39m | [35m0.002242 [39m | [35m8.042    [39m | [35m10.25    [39m |
| [39m4        [39m | [39m0.9512   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9553   [39m | [39m37.89    



Report:
               precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        27
           1     0.8361    0.9343    0.8825       213
           2     0.9713    0.9676    0.9694       524
           3     0.9245    1.0000    0.9608        49

    accuracy                         0.9287       813
   macro avg     0.6830    0.7255    0.7032       813
weighted avg     0.9008    0.9287    0.9139       813

Confusion Matrix:
 [[  0  26   1   0]
 [  0 199  14   0]
 [  0  13 507   4]
 [  0   0   0  49]]

===== Model Metrics =====
Training Time: 5.33s
Inference/sample: 0.000205s
Testing Time: 0.17s
Model Size: 0.04 MB
Trainable Params: 6,116
RAM Usage: 931.06 MB


Horizon 4

In [4]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h4'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()



    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H4_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H4_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.935    [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.9797   [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [39m3        [39m | [39m0.9756   [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m59.09    [39m | [39m0.002242 [39m | [39m8.042    [39m | [39m10.25    [39m |
| [39m4        [39m | [39m0.9756   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9634   [39m | [39m37.89    



Report:
               precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        13
           1     0.9341    0.8543    0.8924       199
           2     0.9487    0.9908    0.9693       541
           3     0.9091    1.0000    0.9524        60

    accuracy                         0.9422       813
   macro avg     0.6980    0.7113    0.7035       813
weighted avg     0.9270    0.9422    0.9337       813

Confusion Matrix:
 [[  0  12   1   0]
 [  0 170  28   1]
 [  0   0 536   5]
 [  0   0   0  60]]

===== Model Metrics =====
Training Time: 6.58s
Inference/sample: 0.000205s
Testing Time: 0.17s
Model Size: 0.06 MB
Trainable Params: 9,188
RAM Usage: 1078.96 MB
