In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import ast
import os
from tensorflow.keras import layers, models

# --- 1. PRE-PROCESSING FUNCTION (Run once) ---
def convert_csv_to_numpy(e2_path, srs_path, output_prefix):
    """Parses raw CSVs into efficient NumPy binaries."""
    print(f"Converting {e2_path} and {srs_path} to binary format...")
    
    # Process E2
    df_e2 = pd.read_csv(e2_path, header=None).apply(pd.to_numeric, errors='coerce').fillna(0)
    e2_data = df_e2.iloc[:, :19].values.astype(np.float32)
    
    # Process SRS (The slow part)
    df_srs = pd.read_csv(srs_path, header=None)
    parsed_srs = []
    for val in df_srs[0]:
        # Parse string once and store as float32
        raw_arr = np.array(ast.literal_eval(val), dtype=np.float32)
        parsed_srs.append(raw_arr.T.reshape(4, 1536))
    srs_data = np.stack(parsed_srs, axis=0)
    
    # Save to disk
    np.save(f"{output_prefix}_e2.npy", e2_data)
    np.save(f"{output_prefix}_srs.npy", srs_data)
    print("Pre-processing complete.")

# --- 2. OPTIMIZED DATA PIPELINE ---
def get_fast_dataset(e2_npy, srs_npy, batch_size=4, e2_len=20, srs_input_rows=100, pred_offset=50):
    # Load the binary data
    e2_data = np.load(e2_npy)
    srs_data = np.load(srs_npy)

    # Calculate valid steps
    num_steps_srs = (len(srs_data) - (srs_input_rows + pred_offset)) // 50
    num_steps_e2 = len(e2_data) - e2_len
    num_steps = min(num_steps_e2, num_steps_srs)

    def data_generator():
        for t in range(num_steps):
            # E2 Processing
            e2_chunk = e2_data[t : t + e2_len]
            # Fast flatten and reshape
            X_e2 = np.zeros(380, dtype=np.float32)
            X_e2[:e2_chunk.size] = e2_chunk.flatten()[:380]
            X_e2 = X_e2.reshape(5, 4, 19)

            # SRS Processing (Jumping by 50)
            srs_start = t * 50
            X_srs = srs_data[srs_start : srs_start + srs_input_rows].reshape(20, 20, 1536)

            # Label
            y = srs_data[srs_start + srs_input_rows + pred_offset - 1]
            
            yield (X_e2, X_srs), y

    output_signature = (
        (tf.TensorSpec(shape=(5, 4, 19), dtype=tf.float32), 
         tf.TensorSpec(shape=(20, 20, 1536), dtype=tf.float32)),
        tf.TensorSpec(shape=(4, 1536), dtype=tf.float32)
    )

    ds = tf.data.Dataset.from_generator(data_generator, output_signature=output_signature)

    return ds.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)



In [None]:
def create_model(e2_input_shape=(5, 4, 19), srs_input_shape=(20, 20, 1536), lstm_units=128, dropout_rate=0.3):
    # Radio Branch
    radio_input = layers.Input(shape=e2_input_shape, name='radio_input')
    r1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(radio_input)
    r1 = layers.BatchNormalization()(r1)
    r1 = layers.ZeroPadding2D(padding=((0,0),(0,1)))(r1) # Result: (5, 5, 32)

    # SRS Branch
    srs_input = layers.Input(shape=srs_input_shape, name='srs_input')
    s1 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(srs_input) # Reduced filters for speed
    s1 = layers.MaxPooling2D((2, 2))(s1) # 10x10
    s1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(s1)
    s1 = layers.MaxPooling2D((2, 2))(s1) # 5x5
    s1 = layers.BatchNormalization()(s1)

    # Concatenate
    x = layers.Concatenate(axis=-1)([r1, s1]) # (5, 5, 32 + 64)
    x = layers.Reshape((25, 96))(x)
    x = layers.LSTM(lstm_units, return_sequences=True)(x)
    x = layers.Flatten()(x) 
    x = layers.Dense(4 * 1536)(x)
    output = layers.Reshape((4, 1536))(x)

    model = models.Model(inputs=[radio_input, srs_input], outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
TRAIN_E2 = "P:/SP Challenge/DataSet/Preprocessed Dataset/combined_E2.csv"
TRAIN_SRS = "P:/SP Challenge/DataSet/Preprocessed Dataset/combined_pp_srs.csv"

# Pre-process once
if not os.path.exists("train_e2.npy"):
    convert_csv_to_numpy(TRAIN_E2, TRAIN_SRS, "train")

# Create optimized dataset
train_ds = get_fast_dataset("train_e2.npy", "train_srs.npy", batch_size=16) # Increased batch size

# Model and training
model = create_model()
model.fit(train_ds, epochs=15)

In [None]:
def get_fast_test_dataset(e2_npy, srs_npy, e2_len=20, srs_input_rows=100, pred_offset=50):
    # Load binary test data
    e2_data = np.load(e2_npy)
    srs_data = np.load(srs_npy)

    num_steps_srs = (len(srs_data) - (srs_input_rows + pred_offset)) // 50
    num_steps_e2 = len(e2_data) - e2_len
    num_steps = min(num_steps_e2, num_steps_srs)

    def test_data_generator():
        for t in range(num_steps):
            # E2 Window
            e2_chunk = e2_data[t : t + e2_len]
            X_e2 = np.zeros(380, dtype=np.float32)
            X_e2[:e2_chunk.size] = e2_chunk.flatten()[:380]
            X_e2 = X_e2.reshape(5, 4, 19)

            # SRS Window (Jumping by 50)
            srs_start = t * 50
            X_srs = srs_data[srs_start : srs_start + srs_input_rows].reshape(20, 20, 1536)

            # Ground Truth Label
            y_true = srs_data[srs_start + srs_input_rows + pred_offset - 1]
            
            yield (X_e2, X_srs), y_true

    output_signature = (
        (tf.TensorSpec(shape=(5, 4, 19), dtype=tf.float32), 
         tf.TensorSpec(shape=(20, 20, 1536), dtype=tf.float32)),
        tf.TensorSpec(shape=(4, 1536), dtype=tf.float32)
    )

    # For testing/prediction, we use batch(1) as you did originally
    return tf.data.Dataset.from_generator(test_data_generator, output_signature=output_signature).batch(1)

test_e2_npy, test_srs_npy = ("P:/SP Challenge/Model/train_e2.npy", "P:/SP Challenge/Model/train_srs.npy")


test_ds = get_fast_test_dataset(test_e2_npy, test_srs_npy)


test_loss = model.evaluate(test_ds)
print(f"Test MSE: {test_loss}")


predictions = model.predict(test_ds)

Preparing test binaries for test_file_1...
Converting P:/SP Challenge/DataSet/Preprocessed Dataset/E2_test.csv and P:/SP Challenge/DataSet/Preprocessed Dataset/pp_srs_test.csv to binary format...
Pre-processing complete.
Test MSE: 6969.412109375
