In [1]:
import cdflib

cdf_file = cdflib.CDF("D:\mms\OLSHEVSKY\mmslearning\labels_human\labels_fpi_fast_dis_dist_201711.cdf")
# print(cdf_file.cdf_info())  

var_info_epoch = cdf_file.varinq('epoch_mms1_fpi_fast_dis_dist_20171108040000')        # overview of file


In [None]:
import cdflib
import numpy as np

label_cdf = cdflib.CDF(r"D:\mms\OLSHEVSKY\mmslearning\labels_human\labels_fpi_fast_dis_dist_201711.cdf")


time_id = "20171109180000"

labels = label_cdf.varget(f"label_mms1_fpi_fast_dis_dist_{time_id}").squeeze()
epochs = label_cdf.varget(f"epoch_mms1_fpi_fast_dis_dist_{time_id}").squeeze()

# keep only valid labels (>=0)
valid_idx = labels >= 0
labels = labels[valid_idx]
epochs = epochs[valid_idx]


In [3]:
raw_cdf_path = rf"D:\mms\Data\mms\mms1\fpi\fast\l2\dis-dist\2017\11\mms1_fpi_fast_l2_dis-dist_{time_id}_v3.4.0.cdf"
raw_cdf = cdflib.CDF(raw_cdf_path)

dist = raw_cdf.varget("mms1_dis_dist_fast")  # shape (N,32,16,32)
dist_epochs = raw_cdf.varget("Epoch")


In [4]:
def match_epochs(raw_data, raw_epochs, label_epochs, labels):
    X, y = [], []
    for le, lab in zip(label_epochs, labels):
        idx = np.argmin(np.abs(raw_epochs - le))
        X.append(raw_data[idx])
        y.append(lab)
    return np.array(X), np.array(y)

X_raw, y_raw = match_epochs(dist, dist_epochs, epochs, labels)


In [5]:
def preprocess_distributions(X):
    X = X.copy()
    X[X == 0] = np.min(X[X > 0])
    X = np.log10(X)
    X = (X - X.min(axis=(1,2,3), keepdims=True)) / \
        (X.max(axis=(1,2,3), keepdims=True) - X.min(axis=(1,2,3), keepdims=True))
    return X[..., np.newaxis]

X = preprocess_distributions(X_raw)


In [6]:
from tensorflow.keras.utils import to_categorical
y_cat = to_categorical(y_raw, num_classes=4)  # {0:SW, 1:IF, 2:MSH, 3:MSP}


In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_mms_cnn(input_shape=(32, 16, 32, 1), num_classes=4):
    """
    3D CNN for MMS ion distribution classification.
    
    input_shape : tuple
        Shape of the input sample (32 energy, 16 theta, 32 phi, 1 channel).
    num_classes : int
        Number of output classes (default = 4: SW, IF, MSH, MSP).
    
    Returns:
        A compiled Keras model.
    """
    model = models.Sequential([
        # First 3D convolution
        layers.Conv3D(
            filters=32, kernel_size=(5, 3, 5), strides=(2, 1, 2),
            activation='relu', padding='valid',
            input_shape=input_shape
        ),
        
        # Second 3D convolution
        layers.Conv3D(
            filters=32, kernel_size=(3, 3, 3), strides=(1, 1, 1),
            activation='relu', padding='valid'
        ),
        
        # Max pooling
        layers.MaxPooling3D(pool_size=(2, 2, 2)),
        
        # Flatten for dense layers
        layers.Flatten(),
        
        # Dense hidden layer
        layers.Dense(128, activation='relu'),
        
        # Output layer
        layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [8]:
from sklearn.model_selection import train_test_split

# Suppose X, y_raw, y_cat are already defined
X_train, X_test, y_train, y_test = train_test_split(
    X, y_raw, test_size=0.2, random_state=42, stratify=y_raw
)

# One-hot encode test labels
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train, num_classes=4)
y_test_cat  = to_categorical(y_test,  num_classes=4)

# Train on train set
model = build_mms_cnn(input_shape=(32,16,32,1), num_classes=4)
model.fit(X_train, y_train_cat, epochs=30, batch_size=64, validation_split=0.2)

# Evaluate on held-out test set
test_loss, test_acc = model.evaluate(X_test, y_test_cat)
print("Test accuracy:", test_acc)


Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 79ms/step - accuracy: 0.6055 - loss: 0.7968 - val_accuracy: 0.7624 - val_loss: 0.5035
Epoch 2/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.8015 - loss: 0.4348 - val_accuracy: 0.7129 - val_loss: 0.4578
Epoch 3/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.8561 - loss: 0.3043 - val_accuracy: 0.8911 - val_loss: 0.2656
Epoch 4/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.9454 - loss: 0.1650 - val_accuracy: 0.9208 - val_loss: 0.1816
Epoch 5/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.9578 - loss: 0.1104 - val_accuracy: 0.9604 - val_loss: 0.1001
Epoch 6/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.9851 - loss: 0.0561 - val_accuracy: 0.9703 - val_loss: 0.0799
Epoch 7/30
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

Use a Different Day as Test Set