In [2]:
from ts_modeling import build_dataset, build_full_dataset, create_mlp, evaluate_model, process_sep_events
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import wandb
from datetime import datetime
from wandb.keras import WandbCallback
from evaluate.utils import plot_tsne_pds
from models import modeling
import numpy as np
import random
import tensorflow as tf

In [3]:
# SEEDING
SEED = 42  # seed number 

# Set NumPy seed
np.random.seed(SEED)

# Set TensorFlow seed
tf.random.set_seed(SEED)

# Set random seed
random.seed(SEED)


In [3]:
mb = modeling.ModelBuilder()

In [4]:
for inputs_to_use in [['e0.5', 'e1.8'], ['e0.5', 'e1.8', 'p'], ['e0.5'], ['e0.5', 'p']]:
    for add_slope in [True, False]:

        # PARAMS
        # inputs_to_use = ['e0.5']
        # add_slope = True

        # Join the inputs_to_use list into a string, replace '.' with '_', and join with '-'
        inputs_str = "_".join(input_type.replace('.', '_') for input_type in inputs_to_use)

        # Construct the title
        title = f'MLP_{inputs_str}_add_slope_{str(add_slope)}'

        # Replace any other characters that are not suitable for filenames (if any)
        title = title.replace(' ', '_').replace(':', '_')

        # Create a unique experiment name with a timestamp
        current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
        experiment_name = f'{title}_{current_time}'

        # Initialize wandb
        wandb.init(project="mlp-ts-pds", name=experiment_name, config={
            "inputs_to_use": inputs_to_use,
            "add_slope": add_slope,
        })

        # set the root directory
        root_dir = 'D:/College/Fall2023/electron_cme_v4/electron_cme_data_split'
        # build the dataset
        X_train, y_train = build_dataset(root_dir + '/training', inputs_to_use=inputs_to_use, add_slope=add_slope)
        X_subtrain, y_subtrain = build_dataset(root_dir + '/subtraining', inputs_to_use=inputs_to_use,
                                               add_slope=add_slope)
        X_test, y_test = build_dataset(root_dir + '/testing', inputs_to_use=inputs_to_use, add_slope=add_slope)
        X_val, y_val = build_dataset(root_dir + '/validation', inputs_to_use=inputs_to_use, add_slope=add_slope)

        # print all data shapes
        print(f'X_train.shape: {X_train.shape}')
        print(f'y_train.shape: {y_train.shape}')
        print(f'X_subtrain.shape: {X_subtrain.shape}')
        print(f'y_subtrain.shape: {y_subtrain.shape}')
        print(f'X_test.shape: {X_test.shape}')
        print(f'y_test.shape: {y_test.shape}')
        print(f'X_val.shape: {X_val.shape}')
        print(f'y_val.shape: {y_val.shape}')

        # print a sample of the training data
        print(f'X_train[0]: {X_train[0]}')
        print(f'y_train[0]: {y_train[0]}')

        # get the number of features
        n_features = X_train.shape[1]
        print(f'n_features: {n_features}')
        hiddens = [100, 100, 50]

        # create the model
        # mlp_model_sep = create_mlp(input_dim=n_features, hiddens=hiddens)
        mlp_model_sep = mb.create_model_pds(input_dim=n_features, hiddens=hiddens, feat_dim=9)
        mlp_model_sep.summary()

        # Set the early stopping patience and learning rate as variables
        Options = {
            'batch_size': 32,  # Assuming batch_size is defined elsewhere
            'epochs': 10000,
            'patience': 50,  # Updated to 50
            'learning_rate': 3e-4,  # Updated to 3e-4
            'weight_decay': 0,  # Added weight decay
            'momentum_beta1': 0.9,  # Added momentum beta1
        }


        # Define the EarlyStopping callback
        # early_stopping = EarlyStopping(monitor='val_forecast_head_loss', patience=patience, verbose=1,
        #                                restore_best_weights=True)

        # Compile the model with the specified learning rate
        # mlp_model_sep.compile(optimizer=Adam(learning_rate=learning_rate,
        #                                       weight_decay=weight_decay,
        #                                       beta_1=momentum_beta1),
        #                       loss={'forecast_head': 'mse'})

        # Train the model with the callback
        # history = mlp_model_sep.fit(X_subtrain,
        #                             {'forecast_head': y_subtrain},
        #                             epochs=1000, batch_size=32,
        #                             validation_data=(X_val, {'forecast_head': y_val}),
        #                             callbacks=[early_stopping, WandbCallback()])

        # Plot the training and validation loss
        # plt.figure(figsize=(12, 6))
        # plt.plot(history.history['loss'], label='Training Loss')
        # plt.plot(history.history['val_loss'], label='Validation Loss')
        # plt.title('Training and Validation Loss')
        # plt.xlabel('Epochs')
        # plt.ylabel('Loss')
        # plt.legend()
        # # save the plot
        # plt.savefig(f'mlp_loss_{title}.png')

        # Determine the optimal number of epochs from early stopping
        # optimal_epochs = early_stopping.stopped_epoch - patience + 1  # Adjust for the offset
        # final_mlp_model_sep = create_mlp(input_dim=n_features,
        #                                  hiddens=hiddens)  # Recreate the model architecture
        # final_mlp_model_sep.compile(optimizer=Adam(learning_rate=learning_rate,
        #                                             weight_decay=weight_decay,
        #                                             beta_1=momentum_beta1),
        #                             loss={'forecast_head': 'mse'})  # Compile the model just like before
        # # Train on the full dataset
        # final_mlp_model_sep.fit(X_train, {'forecast_head': y_train}, epochs=optimal_epochs, batch_size=32,
        #                         verbose=1)

        # evaluate the model on test data
        # error_mae = evaluate_model(final_mlp_model_sep, X_test, y_test)
        # print(f'mae error: {error_mae}')
        # # Log the MAE error to wandb
        # wandb.log({"mae_error": error_mae})

        # Process SEP event files in the specified directory
        # test_directory = root_dir + '/testing'
        # filenames = process_sep_events(
        #     test_directory,
        #     final_mlp_model_sep,
        #     model_type='mlp',
        #     title=title,
        #     inputs_to_use=inputs_to_use,
        #     add_slope=add_slope)

        # Log the plot to wandb
        # for filename in filenames:
        #     wandb.log({f'{filename}': wandb.Image(filename)})
        
        mb.train_pds(mlp_model_sep,
             X_subtrain, y_subtrain,
             X_val, y_val,
             X_train, y_train,
             learning_rate=Options['learning_rate'],
             epochs=Options['epochs'],
             batch_size=Options['batch_size'],
             patience=Options['patience'], save_tag=current_time + "_features")

        # Log model to Weights & Biases
        wandb.log_artifact('path/to/model', type='model', name='pds_model')
        
        file_path = plot_tsne_pds(mlp_model_sep,
                                  X_train,
                                  y_train,
                                  title, 'training',
                                  save_tag=current_time)
        
        # Log t-SNE plot for training
        wandb.log_artifact(file_path, type='plot', name='tsne_training_plot')
        print('file_path: ' + file_path)
        
        file_path = plot_tsne_pds(mlp_model_sep,
                                  X_test,
                                  y_test,
                                  title, 'testing',
                                  save_tag=current_time)
        
        # Log t-SNE plot for testing
        wandb.log_artifact(file_path, type='plot', name='tsne_testing_plot')
        print('file_path: ' + file_path)

        # Finish the wandb run
        wandb.finish()

    


[34m[1mwandb[0m: Currently logged in as: [33merud1t3[0m. Use [1m`wandb login --relogin`[0m to force relogin


X_train.shape: (24217, 98, 1)
y_train.shape: (24217,)
X_subtrain.shape: (18952, 98, 1)
y_subtrain.shape: (18952,)
X_test.shape: (10357, 98, 1)
y_test.shape: (10357,)
X_val.shape: (5265, 98, 1)
y_val.shape: (5265,)
X_train[0]: [[ 8.32918127e-01]
 [ 8.32441929e-01]
 [ 8.31286513e-01]
 [ 8.30311912e-01]
 [ 8.27953573e-01]
 [ 8.29726848e-01]
 [ 8.29137084e-01]
 [ 8.22263682e-01]
 [ 8.19803034e-01]
 [ 8.19114328e-01]
 [ 8.17616930e-01]
 [ 8.18847247e-01]
 [ 8.18795562e-01]
 [ 8.18419663e-01]
 [ 8.16262466e-01]
 [ 8.17023541e-01]
 [ 8.15991693e-01]
 [ 8.15499170e-01]
 [ 8.15388439e-01]
 [ 8.15226785e-01]
 [ 8.13570890e-01]
 [ 8.11499156e-01]
 [ 8.13348330e-01]
 [ 8.14014860e-01]
 [ 8.11950317e-01]
 [ 4.84655267e-01]
 [ 4.78858197e-01]
 [ 4.78858197e-01]
 [ 4.76140841e-01]
 [ 4.73112523e-01]
 [ 4.73982469e-01]
 [ 4.71807092e-01]
 [ 4.57714143e-01]
 [ 4.43027860e-01]
 [ 4.48199824e-01]
 [ 4.51296118e-01]
 [ 4.39538095e-01]
 [ 4.52987532e-01]
 [ 4.43815616e-01]
 [ 4.42870870e-01]
 [ 4.42236379e


KeyboardInterrupt



In [ ]:
for inputs_to_use in [['e0.5', 'e1.8'], ['e0.5', 'e1.8', 'p']]:
    for add_slope in [True, False]:
        for cme_speed_threshold in [0, 500]:
            # PARAMS
            # inputs_to_use = ['e0.5']
            # add_slope = True
    
            # Join the inputs_to_use list into a string, replace '.' with '_', and join with '-'
            inputs_str = "_".join(input_type.replace('.', '_') for input_type in inputs_to_use)
    
            # Construct the title
            title = f'MLP_{inputs_str}_add_slope_{str(add_slope)}'
    
            # Replace any other characters that are not suitable for filenames (if any)
            title = title.replace(' ', '_').replace(':', '_')
    
            # Create a unique experiment name with a timestamp
            current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
            experiment_name = f'{title}_{current_time}'
    
            # Initialize wandb
            wandb.init(project="mlp-ts-lowerlr", name=experiment_name, config={
                "inputs_to_use": inputs_to_use,
                "add_slope": add_slope,
            })
    
            # set the root directory
            root_dir = 'D:/College/Fall2023/electron_cme_v4/electron_cme_data_split'
            # build the dataset
            X_train, y_train = build_dataset(root_dir + '/training', inputs_to_use=inputs_to_use, add_slope=add_slope)
            X_subtrain, y_subtrain = build_dataset(root_dir + '/subtraining', inputs_to_use=inputs_to_use,
                                                   add_slope=add_slope)
            X_test, y_test = build_dataset(root_dir + '/testing', inputs_to_use=inputs_to_use, add_slope=add_slope)
            X_val, y_val = build_dataset(root_dir + '/validation', inputs_to_use=inputs_to_use, add_slope=add_slope)
    
            # print all data shapes
            print(f'X_train.shape: {X_train.shape}')
            print(f'y_train.shape: {y_train.shape}')
            print(f'X_subtrain.shape: {X_subtrain.shape}')
            print(f'y_subtrain.shape: {y_subtrain.shape}')
            print(f'X_test.shape: {X_test.shape}')
            print(f'y_test.shape: {y_test.shape}')
            print(f'X_val.shape: {X_val.shape}')
            print(f'y_val.shape: {y_val.shape}')
    
            # print a sample of the training data
            print(f'X_train[0]: {X_train[0]}')
            print(f'y_train[0]: {y_train[0]}')
    
            # get the number of features
            n_features = X_train.shape[1]
            print(f'n_features: {n_features}')
            hiddens = [100, 100, 50]
    
            # create the model
            # mlp_model_sep = create_mlp(input_dim=n_features, hiddens=hiddens)
            mlp_model_sep = modeling.create_mlp(input_dim=n_features, hiddens=hiddens)
            mlp_model_sep.summary()
    
            # Set the early stopping patience and learning rate as variables
            patience = 50
            learning_rate = 3e-5
            weight_decay = 0 # higher weight decay
            momentum_beta1 = 0.9 # higher momentum beta1
    
            # Define the EarlyStopping callback
            early_stopping = EarlyStopping(monitor='val_forecast_head_loss', patience=patience, verbose=1,
                                           restore_best_weights=True)
    
            # Compile the model with the specified learning rate
            mlp_model_sep.compile(optimizer=Adam(learning_rate=learning_rate,
                                                  weight_decay=weight_decay,
                                                  beta_1=momentum_beta1),
                                  loss={'forecast_head': 'mse'})
    
            # Train the model with the callback
            history = mlp_model_sep.fit(X_subtrain,
                                        {'forecast_head': y_subtrain},
                                        epochs=1000, batch_size=32,
                                        validation_data=(X_val, {'forecast_head': y_val}),
                                        callbacks=[early_stopping, WandbCallback()])
    
            # Plot the training and validation loss
            plt.figure(figsize=(12, 6))
            plt.plot(history.history['loss'], label='Training Loss')
            plt.plot(history.history['val_loss'], label='Validation Loss')
            plt.title('Training and Validation Loss')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            # save the plot
            plt.savefig(f'mlp_loss_{title}.png')
    
            # Determine the optimal number of epochs from early stopping
            optimal_epochs = early_stopping.stopped_epoch - patience + 1  # Adjust for the offset
            final_mlp_model_sep = create_mlp(input_dim=n_features,
                                             hiddens=hiddens)  # Recreate the model architecture
            final_mlp_model_sep.compile(optimizer=Adam(learning_rate=learning_rate,
                                                        weight_decay=weight_decay,
                                                        beta_1=momentum_beta1),
                                        loss={'forecast_head': 'mse'})  # Compile the model just like before
            # Train on the full dataset
            final_mlp_model_sep.fit(X_train, {'forecast_head': y_train}, epochs=optimal_epochs, batch_size=32,
                                    verbose=1)
    
            # evaluate the model on test data
            error_mae = evaluate_model(final_mlp_model_sep, X_test, y_test)
            print(f'mae error: {error_mae}')
            # Log the MAE error to wandb
            wandb.log({"mae_error": error_mae})
    
            # Process SEP event files in the specified directory
            test_directory = root_dir + '/testing'
            filenames = process_sep_events(
                test_directory,
                final_mlp_model_sep,
                model_type='mlp',
                title=title,
                inputs_to_use=inputs_to_use,
                add_slope=add_slope)
    
            # Log the plot to wandb
            for filename in filenames:
                wandb.log({f'{filename}': wandb.Image(filename)})
    
            # Finish the wandb run
            wandb.finish()
    
    
