Transformer for cadiz

In [1]:
import pandas as pd
import numpy as np
import time
import os
import psutil
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras import layers, models
import tensorflow as tf
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft

np.random.seed(42)
tf.random.set_seed(42)
warnings.filterwarnings('ignore')

# ============================================
# 1. Load and Preprocess Data
# ============================================

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("Column 'TIME' missing.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h1'
    if target_col not in df.columns:
        target_col = 'target'
    if target_col not in df.columns:
        raise KeyError("Target column not found.")

    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col].astype(int)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    # df['month'] = df.index.month
    # df['weekofyear'] = df.index.isocalendar().week
    # df['year'] = df.index.isocalendar().year
    # df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    # df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    # df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    # df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    # df['correlation_target_month'] = df[target_col].rolling(window=6).corr(df['month'])
    # df['correlation_target_week'] = df[target_col].rolling(window=6).corr(df['weekofyear'])

    # fft_values = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_values)[:len(df[target_col])]
    # fft_imag = np.imag(fft_values)[:len(df[target_col])]
    # df['fft_real'] = np.concatenate([fft_real, np.nan * np.ones(len(df) - len(fft_real))])
    # df['fft_imag'] = np.concatenate([fft_imag, np.nan * np.ones(len(df) - len(fft_imag))])

    df.fillna(df.median(), inplace=True)
    return df, target_col

# ============================================
# 2. Feature Scaling
# ============================================
def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))  # 3D for Transformer
    return X_scaled, y, scaler

# ============================================
# 3. Transformer Model
# ============================================
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(input_shape, num_classes=4, embed_dim=64, num_heads=2, ff_dim=128, dropout_rate=0.1):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ============================================
# 4. Objective Function
# ============================================
def objective_function(num_units, dropout_rate, epochs, batch_size, X_train, y_train):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(num_units),
        num_heads=2,
        ff_dim=int(num_units * 2),
        dropout_rate=dropout_rate
    )
    history = model.fit(
        X_train, y_train,
        epochs=int(epochs),
        batch_size=int(batch_size),
        validation_split=0.2,
        verbose=0
    )
    return np.max(history.history['val_accuracy'])

# ============================================
# 5. Optimize Hyperparameters
# ============================================
def optimize_hyperparameters(X_train, y_train):
    pbounds = {
        'num_units': (32, 128),
        'dropout_rate': (0.1, 0.5),
        'epochs': (30, 60),
        'batch_size': (16, 64)
    }

    optimizer = BayesianOptimization(
        f=lambda num_units, dropout_rate, epochs, batch_size: objective_function(
            num_units, dropout_rate, epochs, batch_size, X_train, y_train
        ),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=15)
    return optimizer.max['params']

# ============================================
# 6. Train and Evaluate
# ============================================
def train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(best_params['num_units']),
        num_heads=2,
        ff_dim=int(best_params['num_units'] * 2),
        dropout_rate=float(best_params['dropout_rate'])
    )

    # Training time
    start_train = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        validation_split=0.2,
        verbose=1
    )
    training_time = time.time() - start_train

    # Testing time
    start_test = time.time()
    y_pred = model.predict(X_test).argmax(axis=1)
    total_testing_time = time.time() - start_test
    inference_time_per_sample = total_testing_time / len(X_test)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, digits=4))
    print(confusion_matrix(y_test, y_pred))

    model.save("temp_model.h5")
    model_size = os.path.getsize("temp_model.h5") / (1024 ** 2)
    os.remove("temp_model.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Total Testing Time: {total_testing_time:.2f}s")
    print(f"Inference Time per Sample: {inference_time_per_sample:.6f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

    return model, history

# ============================================
# 7. Main Execution
# ============================================
def main():
    train_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_train.csv"
    test_path =  r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cadiz_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Optimizing Hyperparameters...")
    best_params = optimize_hyperparameters(X_train, y_train)
    print("\n Best Hyperparameters:", best_params)

    print("\n Training Final Transformer Model...")
    model, history = train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params)

if __name__ == "__main__":
    main()



 Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | num_units |
-------------------------------------------------------------------------

| [39m1        [39m | [39m0.9692   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m51.96    [39m | [39m89.47    [39m |
| [35m2        [39m | [35m1.0      [39m | [35m23.49    [39m | [35m0.1624   [39m | [35m31.74    [39m | [35m115.2    [39m |
| [39m3        [39m | [39m0.9692   [39m | [39m44.85    [39m | [39m0.3832   [39m | [39m30.62    [39m | [39m125.1    [39m |
| [39m4        [39m | [39m0.9846   [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m35.45    [39m | [39m49.61    [39m |
| [39m5        [39m | [39m1.0      [39m | [39m30.6     [39m | [39m0.3099   [39m | [39m42.96    [39m | [39m59.96    [39m |
| [39m6        [39m | [39m1.0      [39m | [39m23.48    [39m | [39m0.2078   [39m | [39m33.0     [39m | [39m115.1    [39m |
| [39m7       




===== Evaluation =====
Accuracy: 0.9483
              precision    recall  f1-score   support

           0     1.0000    0.4375    0.6087        16
           1     0.8846    1.0000    0.9388        69
           2     1.0000    0.9781    0.9889       137
           3     0.7692    1.0000    0.8696        10

    accuracy                         0.9483       232
   macro avg     0.9135    0.8539    0.8515       232
weighted avg     0.9557    0.9483    0.9426       232

[[  7   9   0   0]
 [  0  69   0   0]
 [  0   0 134   3]
 [  0   0   0  10]]

===== Model Metrics =====
Training Time: 9.07s
Total Testing Time: 0.43s
Inference Time per Sample: 0.001867s
Model Size: 2.24 MB
Trainable Params: 189,984
RAM Usage: 1425.90 MB


Transformer for Cordoba

In [2]:
import pandas as pd
import numpy as np
import time
import os
import psutil
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras import layers, models
import tensorflow as tf
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft

np.random.seed(42)
tf.random.set_seed(42)
warnings.filterwarnings('ignore')

# ============================================
# 1. Load and Preprocess Data
# ============================================

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("Column 'TIME' missing.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h1'
    if target_col not in df.columns:
        target_col = 'target'
    if target_col not in df.columns:
        raise KeyError("Target column not found.")

    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col].astype(int)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    # df['month'] = df.index.month
    # df['weekofyear'] = df.index.isocalendar().week
    # df['year'] = df.index.isocalendar().year
    # df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    # df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    # df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    # df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    # df['correlation_target_month'] = df[target_col].rolling(window=6).corr(df['month'])
    # df['correlation_target_week'] = df[target_col].rolling(window=6).corr(df['weekofyear'])

    # fft_values = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_values)[:len(df[target_col])]
    # fft_imag = np.imag(fft_values)[:len(df[target_col])]
    # df['fft_real'] = np.concatenate([fft_real, np.nan * np.ones(len(df) - len(fft_real))])
    # df['fft_imag'] = np.concatenate([fft_imag, np.nan * np.ones(len(df) - len(fft_imag))])

    df.fillna(df.median(), inplace=True)
    return df, target_col

# ============================================
# 2. Feature Scaling
# ============================================
def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))  # 3D for Transformer
    return X_scaled, y, scaler

# ============================================
# 3. Transformer Model
# ============================================
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(input_shape, num_classes=4, embed_dim=64, num_heads=2, ff_dim=128, dropout_rate=0.1):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ============================================
# 4. Objective Function
# ============================================
def objective_function(num_units, dropout_rate, epochs, batch_size, X_train, y_train):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(num_units),
        num_heads=2,
        ff_dim=int(num_units * 2),
        dropout_rate=dropout_rate
    )
    history = model.fit(
        X_train, y_train,
        epochs=int(epochs),
        batch_size=int(batch_size),
        validation_split=0.2,
        verbose=0
    )
    return np.max(history.history['val_accuracy'])

# ============================================
# 5. Optimize Hyperparameters
# ============================================
def optimize_hyperparameters(X_train, y_train):
    pbounds = {
        'num_units': (32, 128),
        'dropout_rate': (0.1, 0.5),
        'epochs': (30, 60),
        'batch_size': (16, 64)
    }

    optimizer = BayesianOptimization(
        f=lambda num_units, dropout_rate, epochs, batch_size: objective_function(
            num_units, dropout_rate, epochs, batch_size, X_train, y_train
        ),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=15)
    return optimizer.max['params']

# ============================================
# 6. Train and Evaluate
# ============================================
def train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(best_params['num_units']),
        num_heads=2,
        ff_dim=int(best_params['num_units'] * 2),
        dropout_rate=float(best_params['dropout_rate'])
    )

    # Training time
    start_train = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        validation_split=0.2,
        verbose=1
    )
    training_time = time.time() - start_train

    # Testing time
    start_test = time.time()
    y_pred = model.predict(X_test).argmax(axis=1)
    total_testing_time = time.time() - start_test
    inference_time_per_sample = total_testing_time / len(X_test)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, digits=4))
    print(confusion_matrix(y_test, y_pred))

    model.save("temp_model.h5")
    model_size = os.path.getsize("temp_model.h5") / (1024 ** 2)
    os.remove("temp_model.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Total Testing Time: {total_testing_time:.2f}s")
    print(f"Inference Time per Sample: {inference_time_per_sample:.6f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

    return model, history

# ============================================
# 7. Main Execution
# ============================================
def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cordoba_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Cordoba_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Optimizing Hyperparameters...")
    best_params = optimize_hyperparameters(X_train, y_train)
    print("\n Best Hyperparameters:", best_params)

    print("\n Training Final Transformer Model...")
    model, history = train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params)

if __name__ == "__main__":
    main()



 Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | num_units |
-------------------------------------------------------------------------
| [39m1        [39m | [39m0.9516   [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m51.96    [39m | [39m89.47    [39m |
| [35m2        [39m | [35m0.9839   [39m | [35m23.49    [39m | [35m0.1624   [39m | [35m31.74    [39m | [35m115.2    [39m |
| [39m3        [39m | [39m0.9839   [39m | [39m44.85    [39m | [39m0.3832   [39m | [39m30.62    [39m | [39m125.1    [39m |
| [39m4        [39m | [39m0.9677   [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m35.45    [39m | [39m49.61    [39m |
| [39m5        [39m | [39m0.9355   [39m | [39m30.6     [39m | [39m0.3099   [39m | [39m42.96    [39m | [39m59.96    [39m |
| [39m6        [39m | [39m0.9839   [39m | [39m29.65    [39m | [39m0.1325   [39m | [39m54.78    [39m | [39m127.8    [39m |
| [39m7        




===== Evaluation =====
Accuracy: 0.8851
              precision    recall  f1-score   support

           0     0.6154    0.4706    0.5333        17
           1     0.8478    0.8667    0.8571        45
           2     0.9897    0.9505    0.9697       101
           3     0.6111    1.0000    0.7586        11

    accuracy                         0.8851       174
   macro avg     0.7660    0.8219    0.7797       174
weighted avg     0.8925    0.8851    0.8846       174

[[ 8  7  0  2]
 [ 5 39  1  0]
 [ 0  0 96  5]
 [ 0  0  0 11]]

===== Model Metrics =====
Training Time: 8.55s
Total Testing Time: 0.42s
Inference Time per Sample: 0.002393s
Model Size: 2.24 MB
Trainable Params: 189,984
RAM Usage: 2188.80 MB


Transformer for Jaen

In [3]:
import pandas as pd
import numpy as np
import time
import os
import psutil
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras import layers, models
import tensorflow as tf
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft

np.random.seed(42)
tf.random.set_seed(42)
warnings.filterwarnings('ignore')

# ============================================
# 1. Load and Preprocess Data
# ============================================

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("Column 'TIME' missing.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h1'
    if target_col not in df.columns:
        target_col = 'target'
    if target_col not in df.columns:
        raise KeyError("Target column not found.")

    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col].astype(int)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(window=6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(window=6).corr(df['weekofyear'])

    # fft_values = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_values)[:len(df[target_col])]
    # fft_imag = np.imag(fft_values)[:len(df[target_col])]
    # df['fft_real'] = np.concatenate([fft_real, np.nan * np.ones(len(df) - len(fft_real))])
    # df['fft_imag'] = np.concatenate([fft_imag, np.nan * np.ones(len(df) - len(fft_imag))])

    df.fillna(df.median(), inplace=True)
    return df, target_col

# ============================================
# 2. Feature Scaling
# ============================================
def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))  # 3D for Transformer
    return X_scaled, y, scaler

# ============================================
# 3. Transformer Model
# ============================================
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(input_shape, num_classes=4, embed_dim=64, num_heads=2, ff_dim=128, dropout_rate=0.1):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ============================================
# 4. Objective Function
# ============================================
def objective_function(num_units, dropout_rate, epochs, batch_size, X_train, y_train):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(num_units),
        num_heads=2,
        ff_dim=int(num_units * 2),
        dropout_rate=dropout_rate
    )
    history = model.fit(
        X_train, y_train,
        epochs=int(epochs),
        batch_size=int(batch_size),
        validation_split=0.2,
        verbose=0
    )
    return np.max(history.history['val_accuracy'])

# ============================================
# 5. Optimize Hyperparameters
# ============================================
def optimize_hyperparameters(X_train, y_train):
    pbounds = {
        'num_units': (32, 128),
        'dropout_rate': (0.1, 0.5),
        'epochs': (30, 60),
        'batch_size': (16, 64)
    }

    optimizer = BayesianOptimization(
        f=lambda num_units, dropout_rate, epochs, batch_size: objective_function(
            num_units, dropout_rate, epochs, batch_size, X_train, y_train
        ),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=15)
    return optimizer.max['params']

# ============================================
# 6. Train and Evaluate
# ============================================
def train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(best_params['num_units']),
        num_heads=2,
        ff_dim=int(best_params['num_units'] * 2),
        dropout_rate=float(best_params['dropout_rate'])
    )

    # Training time
    start_train = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        validation_split=0.2,
        verbose=1
    )
    training_time = time.time() - start_train

    # Testing time
    start_test = time.time()
    y_pred = model.predict(X_test).argmax(axis=1)
    total_testing_time = time.time() - start_test
    inference_time_per_sample = total_testing_time / len(X_test)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, digits=4))
    print(confusion_matrix(y_test, y_pred))

    model.save("temp_model.h5")
    model_size = os.path.getsize("temp_model.h5") / (1024 ** 2)
    os.remove("temp_model.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Total Testing Time: {total_testing_time:.2f}s")
    print(f"Inference Time per Sample: {inference_time_per_sample:.6f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

    return model, history

# ============================================
# 7. Main Execution
# ============================================
def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Jaen_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\Jaen_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Optimizing Hyperparameters...")
    best_params = optimize_hyperparameters(X_train, y_train)
    print("\n Best Hyperparameters:", best_params)

    print("\n Training Final Transformer Model...")
    model, history = train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params)

if __name__ == "__main__":
    main()



 Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | num_units |
-------------------------------------------------------------------------
| [39m1        [39m | [39m1.0      [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m51.96    [39m | [39m89.47    [39m |
| [39m2        [39m | [39m1.0      [39m | [39m23.49    [39m | [39m0.1624   [39m | [39m31.74    [39m | [39m115.2    [39m |
| [39m3        [39m | [39m1.0      [39m | [39m44.85    [39m | [39m0.3832   [39m | [39m30.62    [39m | [39m125.1    [39m |
| [39m4        [39m | [39m0.98     [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m35.45    [39m | [39m49.61    [39m |
| [39m5        [39m | [39m1.0      [39m | [39m30.6     [39m | [39m0.3099   [39m | [39m42.96    [39m | [39m59.96    [39m |
| [39m6        [39m | [39m1.0      [39m | [39m35.47    [39m | [39m0.1346   [39m | [39m30.11    [39m | [39m118.2    [39m |
| [39m7        



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step

===== Evaluation =====
Accuracy: 0.9185




              precision    recall  f1-score   support

           0     0.9000    0.6923    0.7826        13
           1     0.9302    0.8000    0.8602        50
           2     0.9302    0.9917    0.9600       121
           3     0.0000    0.0000    0.0000         0

    accuracy                         0.9185       184
   macro avg     0.6901    0.6210    0.6507       184
weighted avg     0.9281    0.9185    0.9204       184

[[  9   3   0   1]
 [  1  40   9   0]
 [  0   0 120   1]
 [  0   0   0   0]]

===== Model Metrics =====
Training Time: 9.11s
Total Testing Time: 0.49s
Inference Time per Sample: 0.002653s
Model Size: 1.39 MB
Trainable Params: 115,437
RAM Usage: 2794.13 MB


Transformer for Sevilla

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
import warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras import layers, models
import tensorflow as tf
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft

np.random.seed(42)
tf.random.set_seed(42)
warnings.filterwarnings('ignore')

# ============================================
# 1. Load and Preprocess Data
# ============================================

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("Column 'TIME' missing.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h1'
    if target_col not in df.columns:
        target_col = 'target'
    if target_col not in df.columns:
        raise KeyError("Target column not found.")

    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col].astype(int)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(window=6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(window=6).corr(df['weekofyear'])

    # fft_values = fft(df[target_col].dropna().values)
    # fft_real = np.real(fft_values)[:len(df[target_col])]
    # fft_imag = np.imag(fft_values)[:len(df[target_col])]
    # df['fft_real'] = np.concatenate([fft_real, np.nan * np.ones(len(df) - len(fft_real))])
    # df['fft_imag'] = np.concatenate([fft_imag, np.nan * np.ones(len(df) - len(fft_imag))])

    df.fillna(df.median(), inplace=True)
    return df, target_col

# ============================================
# 2. Feature Scaling
# ============================================
def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))  # 3D for Transformer
    return X_scaled, y, scaler

# ============================================
# 3. Transformer Model
# ============================================
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(input_shape, num_classes=4, embed_dim=64, num_heads=2, ff_dim=128, dropout_rate=0.1):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(embed_dim)(inputs)
    x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(ff_dim, activation="relu")(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ============================================
# 4. Objective Function
# ============================================
def objective_function(num_units, dropout_rate, epochs, batch_size, X_train, y_train):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(num_units),
        num_heads=2,
        ff_dim=int(num_units * 2),
        dropout_rate=dropout_rate
    )
    history = model.fit(
        X_train, y_train,
        epochs=int(epochs),
        batch_size=int(batch_size),
        validation_split=0.2,
        verbose=0
    )
    return np.max(history.history['val_accuracy'])

# ============================================
# 5. Optimize Hyperparameters
# ============================================
def optimize_hyperparameters(X_train, y_train):
    pbounds = {
        'num_units': (32, 128),
        'dropout_rate': (0.1, 0.5),
        'epochs': (30, 60),
        'batch_size': (16, 64)
    }

    optimizer = BayesianOptimization(
        f=lambda num_units, dropout_rate, epochs, batch_size: objective_function(
            num_units, dropout_rate, epochs, batch_size, X_train, y_train
        ),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=15)
    return optimizer.max['params']

# ============================================
# 6. Train and Evaluate
# ============================================
def train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params):
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        num_classes=4,
        embed_dim=int(best_params['num_units']),
        num_heads=2,
        ff_dim=int(best_params['num_units'] * 2),
        dropout_rate=float(best_params['dropout_rate'])
    )

    # Training time
    start_train = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=int(best_params['epochs']),
        batch_size=int(best_params['batch_size']),
        validation_split=0.2,
        verbose=1
    )
    training_time = time.time() - start_train

    # Testing time
    start_test = time.time()
    y_pred = model.predict(X_test).argmax(axis=1)
    total_testing_time = time.time() - start_test
    inference_time_per_sample = total_testing_time / len(X_test)

    print("\n===== Evaluation =====")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, digits=4))
    print(confusion_matrix(y_test, y_pred))

    model.save("temp_model.h5")
    model_size = os.path.getsize("temp_model.h5") / (1024 ** 2)
    os.remove("temp_model.h5")

    total_params = np.sum([np.prod(v.shape) for v in model.trainable_weights])
    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Total Testing Time: {total_testing_time:.2f}s")
    print(f"Inference Time per Sample: {inference_time_per_sample:.6f}s")
    print(f"Model Size: {model_size:.2f} MB")
    print(f"Trainable Params: {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")

    return model, history

# ============================================
# 7. Main Execution
# ============================================
def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\sevilla_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1\sevilla_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Optimizing Hyperparameters...")
    best_params = optimize_hyperparameters(X_train, y_train)
    print("\n Best Hyperparameters:", best_params)

    print("\n Training Final Transformer Model...")
    model, history = train_and_evaluate_model(X_train, X_test, y_train, y_test, best_params)

if __name__ == "__main__":
    main()



 Optimizing Hyperparameters...
|   iter    |  target   | batch_... | dropou... |  epochs   | num_units |
-------------------------------------------------------------------------
| [39m1        [39m | [39m1.0      [39m | [39m33.98    [39m | [39m0.4803   [39m | [39m51.96    [39m | [39m89.47    [39m |
| [39m2        [39m | [39m1.0      [39m | [39m23.49    [39m | [39m0.1624   [39m | [39m31.74    [39m | [39m115.2    [39m |
| [39m3        [39m | [39m1.0      [39m | [39m44.85    [39m | [39m0.3832   [39m | [39m30.62    [39m | [39m125.1    [39m |
| [39m4        [39m | [39m1.0      [39m | [39m55.96    [39m | [39m0.1849   [39m | [39m35.45    [39m | [39m49.61    [39m |
| [39m5        [39m | [39m1.0      [39m | [39m30.6     [39m | [39m0.3099   [39m | [39m42.96    [39m | [39m59.96    [39m |
| [39m6        [39m | [39m1.0      [39m | [39m18.1     [39m | [39m0.1947   [39m | [39m56.69    [39m | [39m32.88    [39m |
| [39m7        



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step





===== Evaluation =====
Accuracy: 0.9283
              precision    recall  f1-score   support

           0     0.7143    1.0000    0.8333        10
           1     1.0000    0.8816    0.9371        76
           2     0.9680    0.9453    0.9565       128
           3     0.5294    1.0000    0.6923         9

    accuracy                         0.9283       223
   macro avg     0.8029    0.9567    0.8548       223
weighted avg     0.9498    0.9283    0.9337       223

[[ 10   0   0   0]
 [  4  67   4   1]
 [  0   0 121   7]
 [  0   0   0   9]]

===== Model Metrics =====
Training Time: 13.11s
Total Testing Time: 0.62s
Inference Time per Sample: 0.002788s
Model Size: 1.39 MB
Trainable Params: 115,437
RAM Usage: 3301.42 MB
