In [36]:
from karateclub import NetMF,
import torch
from torch_geometric.data import Data
import numpy as np
import networkx as nx
from tsfresh import select_features, extract_features,feature_extraction
from tsfresh.utilities.dataframe_functions import impute

In [22]:
def extract_tsfresh_features(x):
    """
    For MRI data sets. Function used for node features extraction from node time series. The features to be computed
    from tsfresh package are defined in functions_to_test dictionary
    :param x: numpy array containing a time series
    :return: a list of values of computed features
    """

    functions_to_test = {
        "asof": feature_extraction.feature_calculators.absolute_sum_of_changes,
        # "ae": feature_extraction.feature_calculators.approximate_entropy,
        "bc": feature_extraction.feature_calculators.benford_correlation,
        "c3": feature_extraction.feature_calculators.c3,
        "cid_ce": feature_extraction.feature_calculators.cid_ce,
        "cam": feature_extraction.feature_calculators.count_above_mean,
        "cbm": feature_extraction.feature_calculators.count_below_mean,
        "lsam": feature_extraction.feature_calculators.longest_strike_above_mean,
        "var": feature_extraction.feature_calculators.variance,
        "std": feature_extraction.feature_calculators.standard_deviation,
        "skw": feature_extraction.feature_calculators.skewness,
        # "sentr": feature_extraction.feature_calculators.sample_entropy,
        "qua": feature_extraction.feature_calculators.quantile,
    }

    computed_feature_list = list()

    for key, function in functions_to_test.items():
        # start = time.time()
        for i in range(1):
            if key == "ae":
                computed_feature_list.append(np.float32(function(x, 10, 2)))
            elif key == "c3":
                computed_feature_list.append(np.float32(function(x, 7)))
            elif key == "cid_ce":
                computed_feature_list.append(np.float32(function(x, True)))
            elif key == "qua":
                computed_feature_list.append(np.float32(function(x, 0.25)))
            else:
                computed_feature_list.append(np.float32(function(x)))
        # print(computed_feature_list)
        # end = time.time()
        # duration = end-start
        # print(key, duration)
    return computed_feature_list

In [23]:
subject_time_series=np.load('../source_data/time_series/time_series.npy')

In [25]:
subject_time_series[0,:,:].shape

(341, 39)

In [26]:
subject_t=subject_time_series[0,:,:]

In [29]:
subject_t[:,1].shape

(341,)

In [34]:
def temporal_feature_extraction(subject_time_series):
    temporal_features=[]
    for node in range(len(subject_time_series[0,:])):
        feature=extract_tsfresh_features(subject_time_series[:,node])
        temporal_features.append(feature)
    return np.array(temporal_features)

In [35]:
temp=temporal_feature_extraction(subject_t)
temp.shape

(39, 11)

In [48]:
import numpy as np

# Load your data
subject_fc_matrices = np.load('../source_data/fc/fc_matrices.npy')
subject_time_series = np.load('../source_data/time_series/time_series.npy')

# Ensure that the shapes of both arrays match
assert subject_fc_matrices.shape[0] == subject_time_series.shape[0], "Number of subjects should match."


In [46]:
# Combine the arrays into a single array of tuples
combined_data = [(fc_matrix, time_series) for fc_matrix, time_series in zip(subject_fc_matrices, subject_time_series)]

In [52]:
len(combined_data)

71

In [56]:
# Accessing FC matrix and time series for an individual subject (e.g., subject at index 0)
individual_subject_data = combined_data[0]
fc_matrix_individual_subject, time_series_individual_subject = individual_subject_data
print(np.array(fc_matrix_individual_subject).shape)
print(np.array(time_series_individual_subject).shape)

(39, 39)
(341, 39)


In [None]:
combined_data = np.stack((subject_fc_matrices, subject_time_series), axis=-1)

# Accessing FC matrix and time series for an individual subject (e.g., subject at index 0)
individual_subject_data = combined_data[0]
fc_matrix_individual_subject = individual_subject_data[..., 0]
time_series_individual_subject = individual_subject_data[..., 1]

In [44]:
subject_fc_matrices,subject_time_series, all_labels,num_classes=create_labels()

Number of Subjects 71
Label 0: 25 subjects
Label 1: 23 subjects
Label 2: 23 subjects


In [18]:
subject_time_series_features.shape

(71, 39, 11)

In [11]:
subject_time_series[0,:,1].shape

(341,)

In [13]:
feature=extract_tsfresh_features(subject_time_series[0,:,1])

In [58]:
len(feature)

11

In [63]:
def netmf_embedding(graph, dimensions=16, order=2):
    model = NetMF(dimensions=dimensions, order=order,seed=21)
    model.fit(graph)
    return model.get_embedding()

In [64]:
def create_data_object(sub_conn_matrix, sub_ROI_ts, label, dimensions, order, threshold=0.5):
    adjacency_matrix = (sub_conn_matrix > threshold).astype(float)
    graph = nx.from_numpy_matrix(adjacency_matrix)
    netmf_embeddings = netmf_embedding(graph,dimensions, order)  # Replace this with your actual embedding logic
    temporal_embeddings= temporal_feature_extraction(sub_ROI_ts)
    # Extract node features from the embeddings
    node_features = torch.tensor(np.concatenate((netmf_embeddings, temporal_embeddings), axis=1), dtype=torch.float32)
    
    edge_index = torch.tensor(np.array(adjacency_matrix.nonzero()), dtype=torch.long)
    
    # Create PyTorch Geometric Data object
    data = Data(x=node_features, edge_index=edge_index, y=torch.tensor(label,dtype=torch.long))
    return data

In [66]:
# Load your data
subject_fc_matrices = np.load('../source_data/fc/fc_matrices.npy')
subject_time_series = np.load('../source_data/time_series/time_series.npy')

data=create_data_object(subject_fc_matrices[1],subject_time_series[1],1,32,2)

In [67]:
data

Data(x=[39, 43], edge_index=[2, 264], y=1)