In [9]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import multiprocessing
import os
import zipfile
from minepy import MINE

# Define directories
base_dir = '/home/vincent/MySSD/JupyterProjects/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/EEG_Chaos_Kuramoto_Neural_Net'
embedding_2d_dir = os.path.join(base_dir, '2dembedding_data')
embedding_3d_dir = os.path.join(base_dir, '3dembedding_data')
plots_directory = os.path.join(base_dir, 'plots')

# Ensure directories exist
for dir_path in [embedding_2d_dir, embedding_3d_dir, plots_directory]:
    os.makedirs(dir_path, exist_ok=True)

# Load EEG data
EEG_data = np.load(os.path.join(base_dir, 'eeg_data_with_channels.npy'), allow_pickle=True)

# EEG channel names
eeg_channel_names = ['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FC5', 'FC1', 'FC2', 'FC6',
                     'M1', 'T7', 'C3', 'Cz', 'C4', 'T8', 'M2', 'CP5', 'CP1', 'CP2', 'CP6',
                     'P7', 'P3', 'Pz', 'P4', 'P8', 'POz', 'O1', 'Oz', 'O2']

sampling_rate = 1000  # Hz
start_time, end_time = 805.571, 921.515
start_index, end_index = int(start_time * sampling_rate), int(end_time * sampling_rate)
filtered_EEG_data = EEG_data[start_index:end_index, :]

max_dim = 20

def mutual_info_worker(args):
    data1, data2 = args
    mine = MINE(alpha=0.6, c=15)
    mine.compute_score(data1, data2)
    return mine.mic()

def determine_delay(data, max_delay=100, subsample_factor=10):
    subsampled_data = data[::subsample_factor]
    with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
        mi_values = pool.map(mutual_info_worker, [(subsampled_data[:-i], subsampled_data[i:]) for i in range(1, max_delay + 1)])
    return np.argmin(mi_values) + 1

def delay_embedding(data, emb_dim, delay):
    N = len(data)
    return np.array([data[i:i + emb_dim * delay:delay] for i in range(N - (emb_dim - 1) * delay)])

def false_nearest_neighbors(data, emb_dim, delay, R=10):
    N = len(data)
    false_neighbors = np.zeros(emb_dim)
    for d in range(1, emb_dim + 1):
        emb_data = delay_embedding(data, d, delay)
        nbrs = NearestNeighbors(n_neighbors=2).fit(emb_data)
        distances, indices = nbrs.kneighbors(emb_data)
        false_neighbors[d - 1] = np.mean((np.abs(distances[:, 0] - distances[:, 1]) / distances[:, 1]) > R)
    return false_neighbors

# Analyze each EEG channel
for channel_name in eeg_channel_names:
    channel_index = eeg_channel_names.index(channel_name)
    channel_data = filtered_EEG_data[:, channel_index]

    optimal_delay = determine_delay(channel_data, max_delay=20, subsample_factor=10)
    emb_dim_2d, emb_dim_3d = 2, 3

    embedded_data_2d = delay_embedding(channel_data, emb_dim_2d, optimal_delay)
    embedded_data_3d = delay_embedding(channel_data, emb_dim_3d, optimal_delay)

    np.save(os.path.join(embedding_2d_dir, f'2dembedded_{channel_name}.npy'), embedded_data_2d)
    np.save(os.path.join(embedding_3d_dir, f'3dembedded_{channel_name}.npy'), embedded_data_3d)

    # Generate and save plots
    plt.figure(figsize=(8, 6))
    plt.scatter(embedded_data_2d[:, 0], embedded_data_2d[:, 1], s=1)
    plt.title(f'2D Phase Space Reconstruction for {channel_name}')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.savefig(os.path.join(plots_directory, f'2D_{channel_name}.png'))
    plt.close()

    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(embedded_data_3d[:, 0], embedded_data_3d[:, 1], embedded_data_3d[:, 2], s=1)
    ax.set_title(f'3D Phase Space Reconstruction for {channel_name}')
    ax.set_xlabel('Component 1')
    ax.set_ylabel('Component 2')
    ax.set_zlabel('Component 3')
    plt.savefig(os.path.join(plots_directory, f'3D_{channel_name}.png'))
    plt.close()

# Zip the embedded data directories
for dir_path, zip_name in [(embedding_2d_dir, '2d_embedded_data.zip'), (embedding_3d_dir, '3d_embedded_data.zip')]:
    with zipfile.ZipFile(os.path.join(base_dir, zip_name), 'w') as zipf:
        for file in os.listdir(dir_path):
            zipf.write(os.path.join(dir_path, file), file)

print("All processes completed successfully.")

All processes completed successfully.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.cluster import DBSCAN
from scipy.spatial.distance import pdist, squareform

def sample_entropy(u, m, r):
    def _count_matches(m, r):
        count = 0
        for i in range(len(u) - m):
            for j in range(i + 1, len(u) - m):
                if max(abs(u[i:i + m] - u[j:j + m])) <= r:
                    count += 1
        return count
    
    n = len(u)
    r *= np.std(u)
    matches_m = _count_matches(m, r)
    matches_m_plus_one = _count_matches(m + 1, r)
    try:
        return -np.log(matches_m_plus_one / matches_m)
    except ZeroDivisionError:
        return np.inf

# Define a function to calculate the box-counting dimension
def box_counting_dimension(data, k=4):
    """
    Estimate the box-counting dimension of a dataset using the k-nearest neighbor distances.
    """
    distances = pdist(data, 'euclidean')
    distance_matrix = squareform(distances)
    k_distances = np.sort(distance_matrix, axis=0)[k]
    r = np.mean(k_distances)
    N = len(data)
    return np.log(N) / np.log(1/r)

# Update the analyze_channel_data function to use the new sample_entropy function
def analyze_channel_data(channel_data_3d, channel_name):
    # Apply DBSCAN clustering
    dbscan = DBSCAN(eps=0.5, min_samples=10)
    clusters = dbscan.fit_predict(channel_data_3d)
    
    # Flatten the data for sample entropy calculation
    flattened_data = channel_data_3d.reshape(-1)
    
    # Calculate sample entropy
    sample_entropy_value = sample_entropy(flattened_data, m=2, r=0.2)  # Adjust m and r based on your data
    
    # Estimate the box-counting dimension
    box_dim = box_counting_dimension(channel_data_3d)
    
    print(f"Channel: {channel_name}")
    print(f"  Number of Clusters: {len(set(clusters)) - (1 if -1 in clusters else 0)}")
    print(f"  Sample Entropy: {sample_entropy_value}")
    print(f"  Box-Counting Dimension: {box_dim}")


# Loop through each channel's 3D embedded data
for channel_name in eeg_channel_names:
    data_path = os.path.join(embedding_3d_dir, f'3dembedded_{channel_name}.npy')
    channel_data_3d = np.load(data_path)
    
    # Analyze the channel data
    analyze_channel_data(channel_data_3d, channel_name)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import multiprocessing
import os
import zipfile
from minepy import MINE

# Define directories
base_dir = '/home/vincent/MySSD/JupyterProjects/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/EEG_Chaos_Kuramoto_Neural_Net'
embedding_2d_dir = os.path.join(base_dir, '2dembedding_data')
embedding_3d_dir = os.path.join(base_dir, '3dembedding_data')
plots_directory = os.path.join(base_dir, 'plots')

# Ensure directories exist
os.makedirs(embedding_2d_dir, exist_ok=True)
os.makedirs(embedding_3d_dir, exist_ok=True)
os.makedirs(plots_directory, exist_ok=True)

# Load EEG data
EEG_data = np.load(os.path.join(base_dir, 'eeg_data_with_channels.npy'), allow_pickle=True)

# EEG channel names
eeg_channel_names = ['Fp1', 'Fpz', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FC5', 'FC1', 'FC2', 'FC6', 'M1', 'T7', 'C3', 'Cz', 'C4', 'T8', 'M2', 'CP5', 'CP1', 'CP2', 'CP6', 'P7', 'P3', 'Pz', 'P4', 'P8', 'POz', 'O1', 'Oz', 'O2']

sampling_rate = 1000  # Hz
start_time, end_time = 805.571, 921.515
start_index, end_index = int(start_time * sampling_rate), int(end_time * sampling_rate)
filtered_EEG_data = EEG_data[start_index:end_index, :]

max_dim = 20

def mutual_info_worker(args):
    data1, data2 = args
    mine = MINE(alpha=0.6, c=15)
    mine.compute_score(data1, data2)
    return mine.mic()

def determine_delay(data, max_delay=100, subsample_factor=10):
    subsampled_data = data[::subsample_factor]
    with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
        mi_values = pool.map(mutual_info_worker, [(subsampled_data[:-i], subsampled_data[i:]) for i in range(1, max_delay + 1)])
    return np.argmin(mi_values) + 1

def delay_embedding(data, emb_dim, delay):
    N = len(data)
    return np.array([data[i:i + emb_dim * delay:delay] for i in range(N - (emb_dim - 1) * delay)])

def false_nearest_neighbors(data, emb_dim, delay, R=10):
    N = len(data)
    false_neighbors = np.zeros(emb_dim)
    for d in range(1, emb_dim + 1):
        emb_data = delay_embedding(data, d, delay)
        nbrs = NearestNeighbors(n_neighbors=2).fit(emb_data)
        distances, indices = nbrs.kneighbors(emb_data)
        false_neighbors[d - 1] = np.mean((np.abs(distances[:, 0] - distances[:, 1]) / distances[:, 1]) > R)
    return false_neighbors
    
def caos_method(data, max_dim=20, tau=1):
    """
    Simple implementation of Cao's method for determining the minimal embedding dimension.
    :param data: Input time series data.
    :param max_dim: Maximum embedding dimension to consider.
    :param tau: Time delay.
    :return: E1 and E2 values for dimensions 1 to max_dim. A plateau or change in E1 indicates
             sufficient embedding, whereas E2 approaching 1 suggests false nearest neighbors are minimal.
    """
    def embedding(data, dim, tau):
        """Embeds data according to given dimension and time delay."""
        N = len(data)
        if N - (dim - 1) * tau < 1:
            return None
        embedded_data = np.array([data[i:N - (dim - 1) * tau + i:tau] for i in range(dim)]).T
        return embedded_data
    
    E1 = np.zeros(max_dim)
    E2 = np.zeros(max_dim - 1)
    
    for dim in range(1, max_dim + 1):
        embedded_data = embedding(data, dim, tau)
        embedded_data_next = embedding(data, dim + 1, tau)
        
        if embedded_data is None or embedded_data_next is None:
            break
        
        distances = np.sqrt(np.sum(np.power(embedded_data[:, None, :] - embedded_data[None, :, :], 2), axis=2))
        min_distances = np.min(distances + np.eye(len(data) - (dim - 1) * tau) * np.max(distances), axis=1)
        
        distances_next = np.sqrt(np.sum(np.power(embedded_data_next[:, None, :] - embedded_data_next[None, :, :], 2), axis=2))
        min_distances_next = np.min(distances_next + np.eye(len(data) - (dim - 1) * tau) * np.max(distances_next), axis=1)
        
        E1[dim - 1] = np.mean(min_distances_next / min_distances)
        if dim > 1:
            E2[dim - 2] = E1[dim - 1] / E1[dim - 2]
    
    return E1, E2
    return np.ones(max_dim), np.ones(max_dim - 1)

# Analyze each EEG channel using Cao's method and generate plots
for channel_name in eeg_channel_names:
    channel_index = eeg_channel_names.index(channel_name)
    channel_data = filtered_EEG_data[:, channel_index]

    # Assuming optimal_delay is predefined or determined by another method
    optimal_delay = determine_delay(channel_data, max_delay=20, subsample_factor=10)

    # Determine the optimal embedding dimension using Cao's method
    E1, E2 = caos_method(channel_data, max_dim=20, tau=optimal_delay)
    optimal_dim = np.argmax(E2 < 1.05) + 2  # Adjust this criterion based on your analysis

    # Perform embedding with the determined optimal dimension
    embedded_data = delay_embedding(channel_data, emb_dim=optimal_dim, delay=optimal_delay)

    # Save the embedded data
    if optimal_dim == 2:
        np.save(os.path.join(embedding_2d_dir, f'2dembedded_{channel_name}.npy'), embedded_data)
    else:
        np.save(os.path.join(embedding_3d_dir, f'3dembedded_{channel_name}.npy'), embedded_data)

    # Generate and save plot for the optimal embedding
    if emb_dim == 2:
        plt.figure(figsize=(8, 6))
        plt.scatter(embedded_data[:, 0], embedded_data[:, 1], s=1)
        plt.title(f'2D Phase Space Reconstruction for {channel_name}')
        plt.xlabel('Component 1')
        plt.ylabel('Component 2')
        plt.savefig(os.path.join(plots_directory, f'2D_{channel_name}.png'))
        plt.close()
    elif emb_dim == 3:
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(embedded_data[:, 0], embedded_data[:, 1], embedded_data[:, 2], s=1)
        ax.set_title(f'3D Phase Space Reconstruction for {channel_name}')
        ax.set_xlabel('Component 1')
        ax.set_ylabel('Component 2')
        ax.set_zlabel('Component 3')
        plt.savefig(os.path.join(plots_directory, f'3D_{channel_name}.png'))
        plt.close()


# Zip the embedded data directories
for dir_path, zip_name in [(embedding_2d_dir, '2d_embedded_data_caos.zip'), (embedding_3d_dir, '3d_embedded_data_caos.zip')]:
    with zipfile.ZipFile(os.path.join(base_dir, zip_name), 'w') as zipf:
        for file in os.listdir(dir_path):
            zipf.write(os.path.join(dir_path, file), file)

print("All processes completed successfully.")

In [None]:
import numpy as np
import gudhi as gd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


# Assuming `reduced_data_umap` is your dimensionality-reduced data from the previous steps

def compute_persistence_diagrams(data):
    """
    Compute the persistence diagrams for a given dataset using Vietoris-Rips complex.
    :param data: Input dataset, assumed to be the output of a dimensionality reduction method.
    :return: Persistence diagrams for the dataset.
    """
    rips_complex = gd.RipsComplex(points=data, max_edge_length=2)
    simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)
    persistence = simplex_tree.persistence()
    return persistence

def plot_persistence_diagrams(persistence):
    """
    Plot the persistence diagrams.
    :param persistence: Persistence diagrams.
    """
    gd.plot_persistence_diagram(persistence)
    plt.show()

def calculate_betti_numbers(persistence):
    """
    Calculate Betti numbers from the persistence diagrams.
    :param persistence: Persistence diagrams.
    :return: Betti numbers (b0, b1, b2) counting the number of connected components, loops, and voids respectively.
    """
    betti_numbers = {i: 0 for i in range(3)}  # Assuming we're only interested in dimensions 0, 1, and 2
    for interval in persistence:
        if interval[0] < 3:  # Filter out infinite persistence intervals
            betti_numbers[interval[0]] += 1
    return betti_numbers['b0'], betti_numbers['b1'], betti_numbers['b2']

# Compute Persistence Diagrams
persistence = compute_persistence_diagrams(reduced_data_umap)

# Plot Persistence Diagrams
plot_persistence_diagrams(persistence)

# Calculate Betti Numbers
betti_numbers = calculate_betti_numbers(persistence)
print("Betti Numbers:", betti_numbers)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")