Cadiz station

In [1]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h1'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    # df['month'] = df.index.month
    # df['weekofyear'] = df.index.isocalendar().week
    # df['year'] = df.index.isocalendar().year
    # df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    # df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    # df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    # df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    # df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    # df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    # fft_vals = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_vals)[:len(df[target_col])]
    # fft_imag = np.imag(fft_vals)[:len(df[target_col])]
    # df['fft_real'] = np.pad(fft_real, (0, len(df) - len(fft_real)), constant_values=np.nan)
    # df['fft_imag'] = np.pad(fft_imag, (0, len(df) - len(fft_imag)), constant_values=np.nan)

    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_train.csv"
    test_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.9538   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [39m2        [39m | [39m0.9385   [39m | [39m18.79    [39m | [39m0.4465   [39m | [39m80.06    [39m | [39m0.007227 [39m | [39m5.206    [39m | [39m14.7     [39m |
| [39m3        [39m | [39m0.9231   [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m59.09    [39m | [39m0.002242 [39m | [39m8.042    [39m | [39m10.25    [39m |
| [39m4        [39m | [39m0.9538   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9385   [39m | [39m37.89    



               precision    recall  f1-score   support

           0     1.0000    0.6250    0.7692        16
           1     0.8193    0.9855    0.8947        69
           2     0.9773    0.9416    0.9591       137
           3     0.8571    0.6000    0.7059        10

    accuracy                         0.9181       232
   macro avg     0.9134    0.7880    0.8322       232
weighted avg     0.9267    0.9181    0.9160       232

Confusion Matrix:
 [[ 10   5   0   1]
 [  0  68   1   0]
 [  0   8 129   0]
 [  0   2   2   6]]

===== Model Metrics =====
Training Time: 6.14s
Inference/sample: 0.000707s
Testing Time: 0.16s
Model Size: 0.07 MB
Trainable Params: 13,028
RAM Usage: 645.25 MB


Cordoba station

In [2]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h1'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    # df['month'] = df.index.month
    # df['weekofyear'] = df.index.isocalendar().week
    # df['year'] = df.index.isocalendar().year
    # df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    # df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    # df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    # df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    # df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    # df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    # fft_vals = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_vals)[:len(df[target_col])]
    # fft_imag = np.imag(fft_vals)[:len(df[target_col])]
    # df['fft_real'] = np.pad(fft_real, (0, len(df) - len(fft_real)), constant_values=np.nan)
    # df['fft_imag'] = np.pad(fft_imag, (0, len(df) - len(fft_imag)), constant_values=np.nan)

    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cordoba_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cordoba_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.8387   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.9677   [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [39m3        [39m | [39m0.9677   [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m59.09    [39m | [39m0.002242 [39m | [39m8.042    [39m | [39m10.25    [39m |
| [39m4        [39m | [39m0.871    [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.8065   [39m | [39m37.89    



Report:
               precision    recall  f1-score   support

           0     0.8182    0.5294    0.6429        17
           1     0.8250    0.7333    0.7765        45
           2     0.8839    0.9802    0.9296       101
           3     1.0000    1.0000    1.0000        11

    accuracy                         0.8736       174
   macro avg     0.8818    0.8107    0.8372       174
weighted avg     0.8696    0.8736    0.8664       174

Confusion Matrix:
 [[ 9  5  3  0]
 [ 2 33 10  0]
 [ 0  2 99  0]
 [ 0  0  0 11]]

===== Model Metrics =====
Training Time: 7.03s
Inference/sample: 0.000949s
Testing Time: 0.17s
Model Size: 0.08 MB
Trainable Params: 14,820
RAM Usage: 782.83 MB


Jaen station

In [3]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h1'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    # fft_vals = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_vals)[:len(df[target_col])]
    # fft_imag = np.imag(fft_vals)[:len(df[target_col])]
    # df['fft_real'] = np.pad(fft_real, (0, len(df) - len(fft_real)), constant_values=np.nan)
    # df['fft_imag'] = np.pad(fft_imag, (0, len(df) - len(fft_imag)), constant_values=np.nan)

    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Jaen_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Jaen_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.84     [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.92     [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [35m3        [39m | [35m0.94     [39m | [35m55.96    [39m | [35m0.1849   [39m | [35m59.09    [39m | [35m0.002242 [39m | [35m8.042    [39m | [35m10.25    [39m |
| [39m4        [39m | [39m0.94     [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.88     [39m | [39m37.89    



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step





===== Evaluation =====
Accuracy: 0.7772
Report:
               precision    recall  f1-score   support

           0     0.7143    0.3846    0.5000        13
           1     0.7949    0.6200    0.6966        50
           2     0.8629    0.8843    0.8735       121
           3     0.0000    0.0000    0.0000         0

    accuracy                         0.7772       184
   macro avg     0.5930    0.4722    0.5175       184
weighted avg     0.8339    0.7772    0.7990       184

Confusion Matrix:
 [[  5   8   0   0]
 [  2  31  17   0]
 [  0   0 107  14]
 [  0   0   0   0]]

===== Model Metrics =====
Training Time: 5.44s
Inference/sample: 0.001111s
Testing Time: 0.20s
Model Size: 0.05 MB
Trainable Params: 7,396
RAM Usage: 930.57 MB


Sevilla station

In [4]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from minisom import MiniSom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)
tf.random.set_seed(42)

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)
    target_col = 'fenologia_h1'
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    # fft_vals = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_vals)[:len(df[target_col])]
    # fft_imag = np.imag(fft_vals)[:len(df[target_col])]
    # df['fft_real'] = np.pad(fft_real, (0, len(df) - len(fft_real)), constant_values=np.nan)
    # df['fft_imag'] = np.pad(fft_imag, (0, len(df) - len(fft_imag)), constant_values=np.nan)

    df.fillna(df.median(), inplace=True)
    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def transform_with_som(som, X, y_dim, x_dim):
    winners = np.array([som.winner(x) for x in X])
    indices = winners[:, 0] * y_dim + winners[:, 1]
    return to_categorical(indices, num_classes=x_dim * y_dim)

def train_and_evaluate_som_dnn(X_train, X_test, y_train, y_test, x_dim, y_dim, epochs, batch_size, dropout_rate, learning_rate):
    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_test_enc = transform_with_som(som, X_test, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_test_cat = to_categorical(y_test)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    start_train = time.time()
    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=1)
    training_time = time.time() - start_train

    start_infer = time.time()
    y_pred = model.predict(X_test_enc).argmax(axis=1)
    inference_time = (time.time() - start_infer) / len(X_test_enc)
    testing_time = time.time() - start_infer

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {acc:.4f}")
    print("Report:\n", classification_report(y_test, y_pred, digits=4))
    print("Confusion Matrix:\n", cm)

    model.save("temp_model_som_dnn.h5", include_optimizer=False)
    model_size = os.path.getsize("temp_model_som_dnn.h5") / (1024 ** 2)
    os.remove("temp_model_som_dnn.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

def objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate, X_train, y_train, X_val, y_val):
    x_dim, y_dim = int(x_dim), int(y_dim)
    batch_size, epochs = int(batch_size), int(epochs)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    som = MiniSom(x_dim, y_dim, X_train.shape[1], sigma=1.0, learning_rate=learning_rate)
    som.random_weights_init(X_train)
    som.train_random(X_train, 100)

    X_train_enc = transform_with_som(som, X_train, y_dim, x_dim)
    X_val_enc = transform_with_som(som, X_val, y_dim, x_dim)
    y_train_cat = to_categorical(y_train)
    y_val_cat = to_categorical(y_val)

    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train_enc.shape[1]))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y_train_cat.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_enc, y_train_cat, epochs=epochs, batch_size=batch_size, verbose=0)
    _, acc = model.evaluate(X_val_enc, y_val_cat, verbose=0)
    return acc

def optimize_som_dnn_hyperparameters(X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    pbounds = {
        'epochs': (50, 100),
        'batch_size': (16, 64),
        'x_dim': (5, 15),
        'y_dim': (5, 15),
        'dropout_rate': (0.1, 0.5),
        'learning_rate': (0.0005, 0.01)
    }

    optimizer = BayesianOptimization(
        f=lambda epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate:
            objective_function(epochs, batch_size, x_dim, y_dim, dropout_rate, learning_rate,
                               X_train, y_train, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    return optimizer.max['params']

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\sevilla_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\sevilla_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\nOptimizing Hyperparameters...")
    best_params = optimize_som_dnn_hyperparameters(X_train, y_train)
    print("\nBest Parameters Found:", best_params)

    print("\nTraining Final Model...")
    train_and_evaluate_som_dnn(
        X_train, X_test, y_train, y_test,
        x_dim=int(best_params['x_dim']),
        y_dim=int(best_params['y_dim']),
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        dropout_rate=float(best_params['dropout_rate']),
        learning_rate=float(best_params['learning_rate'])
    )

if __name__ == "__main__":
    main()



Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | learni... |   x_dim   |   y_dim   |
-------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.9286   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m86.6     [39m | [39m0.006187 [39m | [39m6.56     [39m | [39m6.56     [39m |
| [35m2        [39m | [35m0.9714   [39m | [35m18.79    [39m | [35m0.4465   [39m | [35m80.06    [39m | [35m0.007227 [39m | [35m5.206    [39m | [35m14.7     [39m |
| [35m3        [39m | [35m0.9857   [39m | [35m55.96    [39m | [35m0.1849   [39m | [35m59.09    [39m | [35m0.002242 [39m | [35m8.042    [39m | [35m10.25    [39m |
| [39m4        [39m | [39m0.9714   [39m | [39m36.73    [39m | [39m0.2165   [39m | [39m80.59    [39m | [39m0.001825 [39m | [39m7.921    [39m | [39m8.664    [39m |
| [39m5        [39m | [39m0.9714   [39m | [39m37.89    



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step





===== Evaluation =====
Accuracy: 0.8072
Report:
               precision    recall  f1-score   support

           0     0.1111    0.1000    0.1053        10
           1     0.8714    0.8026    0.8356        76
           2     0.9322    0.8594    0.8943       128
           3     0.3077    0.8889    0.4571         9

    accuracy                         0.8072       223
   macro avg     0.5556    0.6627    0.5731       223
weighted avg     0.8495    0.8072    0.8213       223

Confusion Matrix:
 [[  1   9   0   0]
 [  8  61   7   0]
 [  0   0 110  18]
 [  0   0   1   8]]

===== Model Metrics =====
Training Time: 5.40s
Inference/sample: 0.001052s
Testing Time: 0.23s
Model Size: 0.05 MB
Trainable Params: 7,396
RAM Usage: 1075.55 MB
