In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt # Adăugăm matplotlib pentru vizualizare
from tensorflow.keras.optimizers import AdamW

In [2]:
import tensorflow as tf
print("GPU available:", tf.config.list_physical_devices('GPU'))


GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [9]:
print("Verificarea disponibilității GPU:")
print(tf.config.list_physical_devices('GPU'))
if tf.config.list_physical_devices('GPU'):
    print("GPU detectat! Antrenarea va fi mai rapidă.")
else:
    print("Niciun GPU detectat. Antrenarea va folosi CPU-ul.")



Verificarea disponibilității GPU:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU detectat! Antrenarea va fi mai rapidă.


In [10]:
# Celula 3: Încărcarea și pregătirea datelor
my_file = "./pentilfuran.MDE"
try:
    # Modificare aici: înlocuim delim_whitespace=True cu sep='\s+'
    df = pd.read_csv(my_file, sep='\s+', comment='#', names=["Step", "T", "E_KS", "E_tot", "Vol", "P"])
    print(f"Fișierul '{my_file}' a fost încărcat cu succes.")
except FileNotFoundError:
    print(f"Eroare: Fișierul '{my_file}' nu a fost găsit. Se va folosi un DataFrame mock pentru demonstrație.")
    # Creăm un DataFrame mock cu 90k linii pentru a simula datele reale
    num_mock_rows = 90000
    data_mock = {
        "Step": list(range(1, 101)) * (num_mock_rows // 100),
        "T": np.random.rand(num_mock_rows) * 50 + 1.45,
        "E_KS": np.sin(np.linspace(0, 2 * np.pi * num_mock_rows / 50, num_mock_rows)) * 0.2 + np.random.rand(num_mock_rows) * 0.01 - 2130.9,
        "E_tot": np.random.rand(num_mock_rows) * 0.1 - 2130.9,
        "Vol": [3287.283] * num_mock_rows,
        "P": np.random.rand(num_mock_rows) * 1 - 0.5
    }
    df = pd.DataFrame(data_mock)
    print("DataFrame mock generat.")


# Filtrarea și concatenarea datelor pe baza 'Step'
# Presupunem că vrei 900 de rânduri per bloc (de la index 1 la 900)
# Dacă vrei toate rândurile din fiecare bloc, folosește `iloc[:]` în loc de `iloc[1:901]`
dfs = [df[df['Step'] == i].iloc[1:901] for i in range(1, 100)]
df_data = pd.concat(dfs, ignore_index=True)

print(f"Numărul total de linii de date după filtrare și concatenare: {len(df_data)}")

# Extrage coloana E_KS (Energia K) pentru predicție
# `global_step_index` este indexul global al datelor concatenate
global_step_index = df_data.index.values 


Fișierul './pentilfuran.MDE' a fost încărcat cu succes.
Numărul total de linii de date după filtrare și concatenare: 89100


In [11]:
# Celula 4: Ingineria Caracteristicilor (Adăugarea Frecvențelor Dominante)
# Frecvența dominantă identificată de FFT este 0.02 Hz (cicluri per pas)
dominant_frequency = 0.02

# Calculează caracteristicile sinusoidale și cosinusoidale
# Folosim `global_step_index` pentru a asigura continuitatea fazei pe întregul set de date
df_data['sin_feature'] = np.sin(2 * np.pi * dominant_frequency * global_step_index)
df_data['cos_feature'] = np.cos(2 * np.pi * dominant_frequency * global_step_index)

# Coloanele pe care le vom scala și folosi ca intrări pentru model
features_to_scale = ['E_KS', 'sin_feature', 'cos_feature']
# Scalăm toate caracteristicile relevante
scaler = MinMaxScaler()
df_data[features_to_scale] = scaler.fit_transform(df_data[features_to_scale])

# Numărul de caracteristici de intrare pentru model
num_features = len(features_to_scale)

print(f"Caracteristici de intrare pentru model: {features_to_scale}")
print(f"Numărul de caracteristici de intrare: {num_features}")


Caracteristici de intrare pentru model: ['E_KS', 'sin_feature', 'cos_feature']
Numărul de caracteristici de intrare: 3


In [12]:
# Celula 5: Crearea secvențelor de time-series
def create_sequences(data, sequence_length, output_steps):
    """
    Creează secvențe de intrare și ținte pentru un model de time-series.
    
    Args:
        data (np.array): Datele de intrare (caracteristici multiple).
        sequence_length (int): Lungimea secvenței de intrare.
        output_steps (int): Numărul de pași viitori de prezis.
        
    Returns:
        tuple: (sequences, targets) - array-uri numpy cu secvențele de intrare și țintele.
    """
    sequences = []
    targets = []
    # Iterează până la (lungimea datelor - lungimea secvenței de intrare - lungimea secvenței de ieșire)
    for i in range(len(data) - sequence_length - output_steps + 1):
        seq = data[i : (i + sequence_length)]
        # Prezicem doar E_KS (prima coloană, index 0)
        target = data[(i + sequence_length) : (i + sequence_length + output_steps), 0] 
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Definirea lungimii secvenței și a pașilor de ieșire
sequence_length = 200 # Lungimea secvenței de intrare
OUT_STEPS = 100       # Numărul de pași viitori de prezis

# Pregătim datele pentru funcția create_sequences
# Folosim toate caracteristicile scalate
data_for_sequences = df_data[features_to_scale].values

sequences, targets = create_sequences(data_for_sequences, sequence_length, OUT_STEPS)
print(f"Forma secvențelor de intrare: {sequences.shape}")
print(f"Forma țintelor (ieșire): {targets.shape}")


Forma secvențelor de intrare: (88801, 200, 3)
Forma țintelor (ieșire): (88801, 100)


In [13]:
# Celula 6: Împărțirea datelor în seturi de antrenament, validare și testare
# Asigurăm că split-ul este consistent cu datele tale de 90k linii
# Calculează dimensiunile seturilor
total_sequences = len(sequences)
train_size = int(0.75 * total_sequences) 
val_size = int(0.20 * total_sequences)   
test_size = total_sequences - train_size - val_size 

X_train, y_train = sequences[:train_size], targets[:train_size]
X_val, y_val = sequences[train_size : train_size + val_size], targets[train_size : train_size + val_size]
X_test, y_test = sequences[train_size + val_size :], targets[train_size + val_size :]

# Reshape pentru modelul Keras (num_samples, timesteps, num_features)
# y_train, y_val, y_test trebuie să fie reshaped la (num_samples, OUT_STEPS, 1)
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))
y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], 1))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], 1))

print(f"Forma X_train: {X_train.shape}, Y_train: {y_train.shape}")
print(f"Forma X_val: {X_val.shape}, Y_val: {y_val.shape}")
print(f"Forma X_test: {X_test.shape}, Y_test: {y_test.shape}")


Forma X_train: (66600, 200, 3), Y_train: (66600, 100, 1)
Forma X_val: (17760, 200, 3), Y_val: (17760, 100, 1)
Forma X_test: (4441, 200, 3), Y_test: (4441, 100, 1)


In [14]:
# Celula 7: Definirea Modelului Hibrid CNN-LSTM
def create_improved_hybrid_model(sequence_length, num_features, output_steps):
    """
    Creează un model hibrid CNN-LSTM îmbunătățit pentru predicția seriilor de timp.
    
    Args:
        sequence_length (int): Lungimea secvenței de intrare.
        num_features (int): Numărul de caracteristici de intrare.
        output_steps (int): Numărul de pași viitori de prezis.
        
    Returns:
        tf.keras.Model: Modelul Keras compilat.
    """
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(sequence_length, num_features)), # Input cu num_features
        tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu', padding='causal'), 
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),
        
        tf.keras.layers.Conv1D(filters=128, kernel_size=5, activation='relu', padding='causal'), 
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),
        
        tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='causal'), 
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, 
                                                          dropout=0.1, recurrent_dropout=0.1)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.1, recurrent_dropout=0.1)),
        
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(128, activation='relu'),
        
        # Stratul de ieșire: prezice OUT_STEPS valori pentru o singură caracteristică (E_KS)
        tf.keras.layers.Dense(output_steps * 1), 
        tf.keras.layers.Reshape([output_steps, 1]) 
    ])
    model.summary() # Afișează sumarul modelului
    return model

# Inițializează modelul cu numărul corect de caracteristici
model_hybrid = create_improved_hybrid_model(sequence_length, num_features, OUT_STEPS)



I0000 00:00:1754386029.992125    5938 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1754386029.992332    5938 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1754386029.992461    5938 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1754386030.061859    5938 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [15]:
# Celula 8: Definirea Callback-urilor și a Funcției de Antrenament
# Callbacks for training
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=8, # Mărit răbdarea
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = ModelCheckpoint(
    filepath="best_hybrid_model_with_fourier.keras", # Nume nou pentru fișierul modelului salvat
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7, # Factor de reducere
    patience=5, # Răbdare pentru reducerea learning rate-ului
    min_lr=1e-6, # Learning rate minim
    verbose=1
)

def compile_and_fit_improved(model, X_train, y_train, X_val, y_val, epochs=30, batch_size=32):
    """
    Compilează și antrenează modelul cu optimizator și callback-uri îmbunătățite.
    
    Args:
        model (tf.keras.Model): Modelul Keras de antrenat.
        X_train (np.array): Datele de antrenament (intrări).
        y_train (np.array): Țintele de antrenament.
        X_val (np.array): Datele de validare (intrări).
        y_val (np.array): Țintele de validare.
        epochs (int): Numărul maxim de epoci.
        batch_size (int): Dimensiunea batch-ului.
        
    Returns:
        tf.keras.callbacks.History: Obiectul History returnat de model.fit.
    """
    optimizer = AdamW(
        learning_rate=0.0005,  # Learning rate mai mic
        weight_decay=0.01
    )
    
    model.compile(
        optimizer=optimizer,
        loss='huber',  # Mai robust decât MSE pentru outliers
        metrics=['mae']
    )
    
    callbacks = [
        early_stopping,
        reduce_lr,
        model_checkpoint
    ]
    
    print(f"Începe antrenarea modelului pentru maxim {epochs} epoci cu batch_size={batch_size}...")
    history = model.fit(
        X_train, y_train, 
        epochs=epochs, 
        batch_size=batch_size,  # Batch size mai mare
        validation_data=(X_val, y_val),
        callbacks=callbacks,
        verbose=1
    )
    return history


In [16]:
# Celula 9: Antrenarea Modelului
history = compile_and_fit_improved(model_hybrid, X_train, y_train, X_val, y_val, epochs=30, batch_size=32)


Începe antrenarea modelului pentru maxim 30 epoci cu batch_size=32...
Epoch 1/30


2025-08-05 12:27:52.703373: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 91001
W0000 00:00:1754386072.816259    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.839219    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.845624    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.846379    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.847135    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.873968    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.874735    6895 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1754386072.877708    6895 gpu_

UnknownError: Graph execution error:

Detected at node adamw/Pow_43 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 211, in start

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3116, in run_cell

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3171, in _run_cell

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3394, in run_cell_async

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3639, in run_ast_nodes

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3699, in run_code

  File "/tmp/ipykernel_5938/220854830.py", line 2, in <module>

  File "/tmp/ipykernel_5938/1167740463.py", line 59, in compile_and_fit_improved

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 114, in one_step_on_data

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 81, in train_step

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/optimizers/base_optimizer.py", line 463, in apply_gradients

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/optimizers/base_optimizer.py", line 527, in apply

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/optimizers/base_optimizer.py", line 593, in _backend_apply_gradients

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/optimizer.py", line 120, in _backend_update_step

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/optimizer.py", line 134, in _distributed_tf_update_step

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/optimizer.py", line 131, in apply_grad_to_update_var

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/optimizers/adam.py", line 110, in update_step

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/ops/numpy.py", line 6391, in power

  File "/home/glosper/anaconda3/envs/tf216/lib/python3.11/site-packages/keras/src/backend/tensorflow/numpy.py", line 2653, in power

JIT compilation failed.
	 [[{{node adamw/Pow_43}}]] [Op:__inference_multi_step_on_iterator_18915]