In [6]:
import numpy as np
import mat73
from tqdm import tqdm
import os
# Assuming signal is your original signal stored as a numpy array
# signal = np.array([...])
sampling_rate = 128
time_step = 8
window_length = 3  # in seconds
num_points = sampling_rate * window_length
threshold = 0.75
cluster_len = 10

def hard_mine(df):
    # Correcting the column name and identifying false positives
    column_name = df.columns[0]
    false_positives = df[df[column_name] > threshold]
    
    # Display the number of false positives and their first few rows
    num_false_positives = len(false_positives)
    false_positives.head(), num_false_positives
    
    # Find clusters of false positives
    clusters = []
    current_cluster = []
    
    # Iterate over the false positives
    for idx in false_positives.index:
        # If current_cluster is empty or the current index is consecutive to the last index in current_cluster
        if not current_cluster or idx == current_cluster[-1] + 1:
            current_cluster.append(idx)
        else:
            # If the current index is not consecutive, check if the current cluster is valid (has at least 8 false positives)
            if len(current_cluster) >= cluster_len:
                clusters.append(current_cluster)
            # Reset current_cluster and start a new one
            current_cluster = [idx]
    
    # Check for the last cluster
    if len(current_cluster) >= cluster_len:
        clusters.append(current_cluster)
    
    # Display the number of clusters and the indices of the first few clusters
    num_clusters = len(clusters)
    # Extract the index of the highest prediction value from each cluster
    highest_prediction_indices = []
    bonobo_idx = []
    
    for cluster in clusters:
        max_index = df.loc[cluster][column_name].idxmax()
        highest_prediction_indices.append(max_index)
    
    filtered_indices = []
    for idx in sorted(highest_prediction_indices):
        if not filtered_indices or (idx - filtered_indices[-1] > num_points):
            filtered_indices.append(idx)
        else:
            if idx - filtered_indices[-1] <= num_points:
                last_idx = filtered_indices.pop()  # Store the popped value if needed
    return filtered_indices

path_controls = os.path.join("/run/media/exx/Expansion2/code/Spike_37chan/controlset.csv")
controls = pd.read_csv(path_controls)
train_controls = controls[controls['Mode']=='Train']
for eeg_file in tqdm(train_controls.EEG_index):
    df_path = "/run/media/exx/Expansion2/code/Spike_37chan/Models/YOUR_MODEL_NAME/con/"+eeg_file+".csv"
    signal_path = "/run/media/exx/Expansion2/Bonobo_data/"+eeg_file+".mat"
    df = pd.read_csv(df_path)
    filtered_indices = hard_mine(df)
    signal = mat73.loadmat(signal_path)['data'] #128 sample rate
    signal = signal.transpose(1,0)
    start_times = filtered_indices #16 sample rate
    
    
    # Calculate start indices
    start_indices = [int(time * time_step) for time in start_times]
    
    # Extract signal segments
    segments = [signal[start : start + num_points] for start in start_indices]
    for idx,seg in zip(start_indices,segments):
        path = str('/run/media/exx/Expansion2/code/Spike_37chan/Models/YOUR_MODEL_NAME/hardmine_npy_round2/'+eeg_file+'_'+str(idx)+'.npy')
        seg = seg.transpose(1,0)
        print(seg.shape)
        np.save(path,seg)

  0%|▏                                                                                                                                      | 5/4011 [00:02<30:39,  2.18it/s]


TypeError: slice indices must be integers or None or have an __index__ method

In [40]:
import pandas as pd

# read csv
df = pd.read_csv('/run/media/exx/Expansion/code/Spike_37chan/lut_labelled_20230628.csv')
import os

# Get the names of all .npy files in the folder
npy_files = [f for f in os.listdir('/run/media/exx/Expansion/code/Spike_37chan/Models/YOUR_MODEL_NAME/hardmine_npy') if f.endswith('.npy')]

# Extract file name part
event_files = [f[:-4] for f in npy_files]  # remove.npy
eeg_files = [f.split('_')[:-1] for f in event_files]  # # Use '_' as delimiter and remove the last part
eeg_files = ['_'.join(f) for f in eeg_files]  
# Create a new DataFrame to store filenames and other information
new_data = {
    'event_file': event_files,
    'eeg_file': eeg_files,
    'total_votes_received': [3] * len(event_files),
    'fraction_of_yes': [0] * len(event_files),
    'Mode': ['Train'] * len(event_files)
}

new_df = pd.DataFrame(new_data)

# Add new data to the original DataFrame
df = pd.concat([df, new_df], ignore_index=True)
df.to_csv('/run/media/exx/Expansion/code/Spike_37chan/hard_mining.csv', index=False)
