In [1]:
import pyshred
from pyshred.models import SHRED
from pyshred.sequence_models import *
from pyshred.decoder_models import *
from pyshred.datasets import load_plasma # import plasma dataset
from pyshred_pypi_helper import *
import numpy as np
from pyshred.data_processor import SHREDPreprocessor

In [None]:
np.random.choice(100, size=1, replace=False)

In [None]:
data['Jex'][(5,5)]

In [2]:
plasma_data = load_plasma()
data_Jex = plasma_data['Jex']
data_Jey = plasma_data['Jey']
data_Jez = plasma_data['Jez']

data = {
    'Jex' : data_Jex,
    'Jey' : data_Jey,
    'Jez' : data_Jez,
}

Jez_mobile_clockwise_perimeter_walk_cw = perimeter_walk(height = data_Jez.shape[0], width = data_Jez.shape[1], timesteps = data_Jez.shape[2], clockwise=True)
Jez_mobile_clockwise_perimeter_walk_ccw = perimeter_walk(height = data_Jez.shape[0], width = data_Jez.shape[1], timesteps = data_Jez.shape[2], clockwise=False)

sensors = {
    'Jez' : [Jez_mobile_clockwise_perimeter_walk_cw, Jez_mobile_clockwise_perimeter_walk_ccw], # mobile sensors
    'Jey' : [(0,0), (49,59)], # selected stationary sensors
    'Jex' : 3 # random stationary sensors
}

In [None]:
# idea
shred_data_loader = SHREDPreprocessor(lags = 20, compression = True, n_components = None, scaling = "minmax")
X_train_recon, X_test_recon, y_train_recon, y_test_recon = shred_data_loader.train_test_split(sensor_locations_dict=sensors, full_state_dict=data, train_size=0.8, method = "random")

In [None]:
# Initialize preprocessor
shred_data_loader = SHREDPreprocessor(lags=20, compression=True, n_components=None, scaling="minmax")

# Preprocess raw data (sensor locations -> measurements, generate lagged data)
sensor_measurements, sensor_summary = shred_data_loader.preprocess(sensor_locations_dict=sensors, full_state_dict=data)

# Generate lagged sequences
lagged_sequences = shred_data_loader.generate_lagged_sequences(sensor_measurements=sensor_measurements)

# Split into train/test datasets # here 
X_train, X_test, y_train, y_test = shred_data_loader.train_test_split(
    lagged_sequences=lagged_sequences, full_state_dict=data, train_size=0.8, method="random"
)

# Fit the preprocessor
shred_data_loader.fit(X=X_train, y=y_train)

# Transform train/test data
# X_train_transformed_tensor, flattened_y_train_transformed_tensor = shred_data_loader.transform(X_train, y_train) # here it becomes flattened and concatenated (regardless of fit being called)!!! 
# (if fit is called do scaling and compression otherwise warn user no scaling or compression skipped)
train_dataset_torch = shred_data_loader.transform(X_train, y_train)
# X_test_transformed_tensor, y_test_transformed_tensor = shred_data_loader.transform(X_test, y_test)
test_dataset_torch = shred_data_loader.transform(X_test, y_test)

In [5]:
shred_data_loader = SHREDPreprocessor(lags = 20, compression = True, n_components = None, scaling = "minmax")

sensor_measurements, sensor_summary = shred_data_loader.sensor_loc_to_measurement(sensor_locations_dict=sensors, full_state_dict=data)
X_train, X_test, y_train, y_test = shred_data_loader.generate_datasets(sensor_measurements = sensor_measurements, full_state_dict=data, train_size=0.8, method = "random")
X = shred_data_loader.generate_datasets(sensor_measurements = sensor_measurements)
shred_data_loader.fit(X = X)
shred_data_loader.fit(y = y)
shred_data_loader.fit(X = y, Y = y)
X_transformed, y_transformed = shred_data_loader.transform(X, y)

lagged_sequences, sensor_summary = shred_data_loader.generate_lagged_sequences(sensor_locations_dict=sensors, full_state_dict=data)
X_train_recon, X_test_recon, y_train_recon, y_test_recon = shred_data_loader.train_test_split(lagged_sequences=lagged_sequences, full_state_dict=data, train_size=0.8, method = "random")
X_train_forecast, X_test_forecast, y_train_forecast, y_test_forecast = shred_data_loader.train_test_split(lagged_sequences=lagged_sequences, full_state_dict=data, train_size=0.8, method = "sequential")
X_train_recon_transformed, y_train_recon_transformed = shred_data_loader.fit_transform(X_train_recon, y_train_recon)
X_test_recon_transformed, y_test_recon_transformed = shred_data_loader.fit_transform(X_test_recon, y_test_recon)
shred_recon = SHRED_RECONSTRUCTOR(sequence = 'LSTM', decoder = 'SDN')

In [27]:
lagged_sequences.shape

(1990, 11, 7)

In [None]:
sensors

In [None]:
for key, data in sensors:
    print(key)

In [9]:
def generate_random_sensor_locations(full_state, num_sensors):
    spatial_shape = full_state.shape[:-1]  # last dimension always number of timesteps
    spatial_points = np.prod(spatial_shape)
    sensor_indices = np.random.choice(spatial_points, size=num_sensors, replace=False)
    sensor_locations = []

    for sensor_index in sensor_indices:
        sensor_location = []
        for dim in reversed(spatial_shape):
            sensor_location.append(sensor_index % dim)
            sensor_index //= dim
        sensor_location = tuple(reversed(sensor_location))
        sensor_locations.append(sensor_location)
    
    return sensor_locations

In [24]:
import pandas as pd


def generate_random_sensor_locations(full_state, num_sensors):
    spatial_shape = full_state.shape[:-1] # last dimension always number of timesteps
    spatial_points = np.prod(spatial_shape)
    sensor_indices = np.random.choice(spatial_points, size = num_sensors, replace = False)
    sensor_locations = []
    for sensor_index in sensor_indices:
        sensor_location = []
        for dim in reversed(spatial_shape):
            sensor_location.append(sensor_index % dim)
            sensor_index //= dim
        sensor_location = tuple(reversed(sensor_location))
        sensor_locations.append(sensor_location)
    return sensor_locations

def sensor_locations_dict_to_sensor_measurements(full_state_dict, sensor_locations_dict):
    sensor_summary = []
    sensor_measurements = []
    for key, data in sensor_locations_dict.items():
        # generate random sensor locations
        if isinstance(data, int):
            data = generate_random_sensor_locations(full_state = full_state_dict[key], num_sensors = data)
        if isinstance(data[0], tuple):
            for sensor_coordinate in data:
                sensor_summary.append([key, 'stationary', sensor_coordinate])
                sensor_measurements.append(full_state_dict[key][sensor_coordinate])
        elif isinstance(data[0], list):

            for mobile_sensor_coordinates in data:
                if len(mobile_sensor_coordinates) != full_state_dict[key].shape[-1]:
                    raise ValueError(
                        f"Number of mobile sensor coordinates ({len(mobile_sensor_coordinates)}) "
                        f"must match the number of timesteps ({full_state_dict[key].shape[-1]})."
                    )
                sensor_summary.append([key, 'mobile', mobile_sensor_coordinates])
                sensor_measurements.append([
                    full_state_dict[key][sensor_coordinate][timestep]
                    for timestep, sensor_coordinate in enumerate(mobile_sensor_coordinates)
                ])
                # sensor_measurements.append(np.array([full_state_dict[key][sensor_coordinate] for 
                #                                      sensor_coordinate in mobile_sensor_coordinates]))
        else:
            raise ValueError(f"Unsupported sensor type for key {key}.")
    sensor_measurements = np.array(sensor_measurements)
    sensor_summary = pd.DataFrame(sensor_summary, columns=["dataset", "type", "location/trajectory"])
    return {
        "sensor_measurements": sensor_measurements,
        "sensor_summary": sensor_summary,
    }

def generate_lagged_sequences(lags, full_state_dict = None, sensor_locations_dict = None, sensor_measurements = None):
    if sensor_measurements is None:
        if full_state_dict is None or sensor_locations_dict is None:
            raise ValueError("Provide either `sensor_measurements` or both `full_state_dict` and `sensor_locations_dict`.")
        sensor_measurements = sensor_locations_dict_to_sensor_measurements(full_state_dict, sensor_locations_dict)["sensor_measurements"]
    return generate_lagged_sequences_from_sensor_measurements(sensor_measurements, lags), sensor_locations_dict_to_sensor_measurements(full_state_dict, sensor_locations_dict)["sensor_summary"]


def generate_lagged_sequences_from_sensor_measurements(sensor_measurements, lags):
    num_sensors = sensor_measurements.shape[0]
    num_timesteps = sensor_measurements.shape[1]

    if num_timesteps <= lags:
        raise ValueError("Number of timesteps must be greater than the number of lags.")

    lagged_sequences = np.empty((num_timesteps - lags, lags + 1, num_sensors))
    for i in range(lagged_sequences.shape[0]):
        lagged_sequences[i] = sensor_measurements[:, i:i+lags+1].T
    return lagged_sequences


def train_test_split(full_state_dict, lagged_sequences, train_size = 0.8, method = "random"):
    num_timesteps_minus_lags = lagged_sequences.shape[0]
    if train_size <= 0 or train_size >= 1:
        raise ValueError("`train_size` must be in the range (0.0, 1.0).")
    # generate random indices for train/test
    if method == "random":
        indices = np.random.permutation(num_timesteps_minus_lags)
    elif method == "sequential":
        indices = np.arange(num_timesteps_minus_lags)
    else:
        raise ValueError(f"Invalid method '{method}'. Use 'random' or 'sequential'.")
    num_train_indices = int(train_size * num_timesteps_minus_lags)
    train_indices = indices[:num_train_indices]
    test_indices = indices[num_train_indices:]
    # Inputs
    X_train = lagged_sequences[train_indices]
    X_test = lagged_sequences[test_indices]
    # Outputs
    y_train = {}
    y_test = {}
    for key, full_state_data in full_state_dict.items():
        y_train[key] = full_state_data[..., train_indices]
        y_test[key] = full_state_data[..., test_indices]
    return X_train, X_test, y_train, y_test

sensor_measurements, sensor_summary = sensor_locations_dict_to_sensor_measurements(data, sensors)
lagged_sequences, sensor_summary = generate_lagged_sequences(lags = 10, full_state_dict = data, sensor_locations_dict = sensors, sensor_measurements = None)
X_train, X_test, y_train, y_test = train_test_split(data, lagged_sequences, train_size = 0.8, method = "random")


In [25]:
sensor_summary

Unnamed: 0,dataset,type,location/trajectory
0,Jez,mobile,"[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5..."
1,Jez,mobile,"[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0..."
2,Jey,stationary,"(0, 0)"
3,Jey,stationary,"(49, 59)"
4,Jex,stationary,"(10, 48)"
5,Jex,stationary,"(19, 5)"
6,Jex,stationary,"(8, 57)"


In [11]:
y_test['Jex'].shape

(50, 60, 398)

In [12]:
y_train['Jex'].shape

(50, 60, 1592)

In [13]:
lagged_sequences.shape[0]

1990

In [14]:
data['Jex'].shape

(50, 60, 2000)

In [15]:
lagged_sequences

array([[[-1.45310000e+03, -1.45310000e+03,  8.75030000e+02, ...,
          4.87961480e+02,  9.51231320e+01, -3.91650703e+02],
        [-3.88715490e+03, -3.52302730e+03, -1.59030000e+02, ...,
         -3.14758769e+01, -3.67242173e+01,  1.47297779e+01],
        [-4.09875947e+03, -3.52171320e+03, -1.37350000e-01, ...,
         -9.08299872e+00, -1.60889589e+00,  3.04896704e+01],
        ...,
        [-4.05473714e+03, -2.41615624e+03, -1.46460000e+01, ...,
         -1.36076704e+01, -1.12196073e+02,  1.17682107e+02],
        [-4.45804462e+03, -2.40499854e+03, -1.67140000e+02, ...,
          4.75786193e+01, -9.84582751e+01,  1.07320944e+02],
        [-4.40876401e+03, -2.25425813e+03, -7.14810000e+00, ...,
         -8.38844863e+01, -6.80402050e+01,  9.66751490e+01]],

       [[-3.88715490e+03, -3.52302730e+03, -1.59030000e+02, ...,
         -3.14758769e+01, -3.67242173e+01,  1.47297779e+01],
        [-4.09875947e+03, -3.52171320e+03, -1.37350000e-01, ...,
         -9.08299872e+00, -1.60889589e

In [None]:
sensor_summary

TypeError: 'str' object is not callable

In [17]:
sensor_measurements.shape

AttributeError: 'str' object has no attribute 'shape'

In [None]:
sensors

In [None]:
print('data_Jex.shape:', data_Jex.shape)
print('type(data_Jex):',type(data_Jex))

In [None]:
# current ideas:

shred_preprocessor = SHREDPreprocessor()
X_train, X_test, Y_train, Y_test = shred_preprocessor.fit_transform(data, sensors, train_size = 0.8, lags = 20, split="random", n_components = 20, scaling = True)
shred = SHRED(sequence="LSTM", decoder="SDN")
shred.fit(X = X_train, y = Y_train, num_epochs=10) # by default: n_components = 20
shred.eval(X = X_test, y = Y_test)
shred.recon(X)
shred_preprocessor.inverse_transform(test_output)

In [None]:
shred_preprocessor = SHREDPreprocessor()
X_train, X_test, Y_train, Y_test, time = shred_preprocessor.fit_transform(data, sensors, time = None, train_size = 0.8, lags = 20, split="random", n_components = 20, scaling = True)
shred = SHRED(sequence="LSTM", decoder="SDN")
shred.fit(X = X_train, y = Y_train, time = time, num_epochs=10) # by default: n_components = 20
shred.eval(X = X_test, y = Y_test, time = None)
shred.predict(X)
shred.forecast(t = timesteps_forward)
shred_preprocessor.inverse_transform(test_output)


In [None]:
X_train, X_test, y_train, y_test = prep_and_split(data = data, sensors = sensors, time = None, train_size = 0.8, lags = 20, n_components = 20)

In [None]:
y_train.shape

In [None]:
X_train.shape

In [None]:
X_train.shape

In [None]:
shred_data = DataLoader(data = data, sensors = sensors, time = None)
test_data = DataLoader(data = test_set, sensors = sensors, time = None)
shred = SHRED(sequence="LSTM", decoder="SDN")
shred.fit(X = X_train, y = y_train, num_epochs=10) # by default: n_components = 20
shred.eval(X = X_test, y = y_test, time = None)


shred.predict(start = start_time, end = end_time)
shred.forecast(t = timesteps_forward)

SHRED.split_data(data = data)

In [None]:
shred_dataset = DataProcessor(data = data, sensors = sensors, time = None)
recon_train_dataset, recon_val_dataset, recon_test_dataset, forecast_train_dataset, forecast_val_dataset, forecast_test_dataset = shred_dataset.prepare_datasets(val_size = 0.2, test_size = 0.2, lags = 20, n_components = 20)




# want: train, val, test = shred_dataset.prepare_datasets(val_size = 0.2, test_size = 0.2, lags = 20, n_components = 20)

# Next up:

# this:
# shred.fit(data, sensors, lags = 40, time = None, sensor_forecaster = True, n_components = 20, val_size = 0.2, batch_size=64, num_epochs=4000, lr=1e-3, verbose=True, patience=20):
# to:
# shred.fit(recon_train, recon_val, forecast_train, forecast_val, batch_size=64, num_epochs=4000, lr=1e-3, verbose=True, patience=20):

# create a custom class that inside has multiple standard TimeSeriesDataset objects (wraps multiple together):
# shred.fit(train, val, batch_size=64, num_epochs=4000, lr=1e-3, verbose=True, patience=20):


In [None]:
recon_val_dataset.data_in
print('recon_val_dataset.data_in:', recon_val_dataset.data_in.shape) # number time steps, lags + 1, number of sensors
print('recon_val_dataset.data_out:', recon_val_dataset.data_out.shape) # number of time steps, 20 components * 3 datasets + 7 sensors

In [None]:
X_train, X_test, y_train, y_test = pyshred.prep_and_split(data = data, sensors = sensors, time = None, train_size = 0.8, lags = 20, n_components = 20)
shred = SHRED(sequence=sequence_model, decoder=decoder_model)
shred.fit(X = X_train, y = y_train, num_epochs=10) # by default: n_components = 20
shred.eval(X = X_test, y = y_test, time = None)


shred.predict(start = start_time, end = end_time)
shred.forecast(t = timesteps_forward)



In [None]:
recon_train_dataset

In [None]:
# def fit(self, data, sensors, lags = 40, time = None, sensor_forecaster = True, n_components = 20, val_size = 0.2, batch_size=64, num_epochs=4000, lr=1e-3, verbose=True, patience=20):

# def fit(self, recon_train, recon_val, forecast_train, forecast_val, batch_size=64, num_epochs=4000, lr=1e-3, verbose=True, patience=20):

In [None]:
sequence_model = Transformer(d_model=64, num_encoder_layers=2, nhead=8, dropout=0.1)
decoder_model = SDN(l1 = 200, l2 = 300, dropout= 0.2)
shred = SHRED(sequence=sequence_model, decoder=decoder_model)
val_errors,  sensor_forecast_errors = shred.fit(data = data, sensors = sensors, num_epochs=10) # by default: n_components = 20

In [None]:
shred = SHRED(sequence='LSTM', decoder='SDN')
shred.fit(data = 'path_to_folder or file_path', sensors = 'default 3', lags = 'default 40', n_components='defuault 20')

In [None]:
sequence_model = LSTM(hidden_size=32, num_layers=1)
decoder_model = SDN(l1 = 200, l2 = 300, dropout= 0.2)
shred = SHRED(sequence=sequence_model, decoder=decoder_model)
val_errors,  sensor_forecast_errors = shred.fit(data = data, sensors = sensors, num_epochs=10) # by default: n_components = 20

In [None]:
shred.summary