In [1]:
import pandas as pd
import numpy as np
import asyncio
import os
import mne
import preprocess_run as preprocess_run


In [3]:
def create_file_names():
        paths_EEG = []
        paths_IMG = []
        for i in range(0,5):
            root = "/mnt/a/MainFolder/Neural Nirvana/Data/sub-"
            mid = "/eeg/sub-"
            end = "_task-rsvp_eeg.vhdr"
            if i < 9:
                path = root + "0" + str(i+1) + mid + "0" + str(i+1) + end
            else:
                path = root + str(i+1) + mid + str(i+1) + end

            paths_EEG.append(path)

            end_img = "_task-rsvp_events.csv"

            if i < 9:
                path = root + "0" + str(i+1) + mid + "0" + str(i+1) + end_img
            else:
                path = root + str(i+1) + mid + str(i+1) + end_img
            paths_IMG.append(path)

        return paths_EEG, paths_IMG

paths_EEG, paths_IMG = create_file_names()

In [31]:
def get_data(eeg_path):
        #this function will get the data and info from the eeg and event files
        raw = mne.io.read_raw_brainvision(eeg_path, preload=True)
        data = raw.to_data_frame(index="time", time_format="ms")
        print(data.head(20))
        info = raw.info

        return data, info

def get_events(event_path):
        #this function will get the events from the event file
        df = pd.read_csv(event_path)
        stm_on = df["time_stimon"].values
        stim_names = df['stimname']


        #convert to ms
        stm_on = stm_on * 1000

        #round to nearest integer
        stm_on = np.round(stm_on).astype(int)

        return stm_on, stim_names

data, info = get_data(paths_EEG[0])
stm_on, stim_names = get_events(paths_IMG[0])

Extracting parameters from /mnt/a/MainFolder/Neural Nirvana/Data/sub-01/eeg/sub-01_task-rsvp_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 3035739  =      0.000 ...  3035.739 secs...


channel           Fp1           Fz          F3           F7          FT9  \
time                                                                       
0       -12011.370803 -5759.762676 -269.921737 -6363.229164 -8553.999527   
1       -12012.103225 -5760.299785 -265.673692 -6363.326820 -8551.265153   
2       -12010.833694 -5758.004864 -263.134631 -6361.861977 -8545.942890   
3       -12008.294633 -5755.661116 -263.525256 -6360.152994 -8544.429219   
4       -12006.536821 -5753.707992 -265.429552 -6361.227212 -8548.433123   
5       -12003.509479 -5751.217758 -265.722520 -6360.397134 -8549.067888   
6       -11999.310263 -5751.217758 -268.066269 -6358.346354 -8546.772968   
7       -11993.646203 -5750.729478 -267.919785 -6358.102213 -8540.474143   
8       -11992.864953 -5750.241197 -264.892443 -6358.541666 -8535.737817   
9       -11994.915734 -5748.434557 -262.109241 -6358.883463 -8540.474143   
10      -11991.448938 -5746.872058 -259.960804 -6358.346354 -8548.237811   
11      -119

In [16]:
print(data.shape)

(3035740, 63)


In [32]:
def downsample_data(df):
    # Number of rows in the DataFrame
    n_rows = df.shape[0]
    
    # Determine the number of rows to include in the downsampled data
    # If the number of rows is not divisible by 4, we exclude the last few rows
    valid_rows = n_rows - (n_rows % 4)

    # Reshape the DataFrame and calculate the mean along the first axis
    # The resulting DataFrame will have the same column names
    downsampled_df = pd.DataFrame(df.iloc[:valid_rows].values.reshape(-1, 4, df.shape[1]).mean(axis=1),
                                  columns=df.columns)

    return downsampled_df

data = downsample_data(data)
print(data.head())

channel           Fp1           Fz          F3           F7          FT9  \
0       -12010.650589 -5758.432110 -265.563829 -6362.142739 -8548.909197   
1       -12000.750692 -5751.718247 -266.784531 -6359.518228 -8546.187031   
2       -11991.424524 -5748.019518 -261.804065 -6358.175456 -8543.977559   
3       -11997.759970 -5752.633773 -267.162949 -6364.571937 -8554.316909   
4       -12006.573442 -5756.100569 -268.261581 -6363.827308 -8557.747083   

channel         FC5          FC1           C3           T7          TP9  ...  \
0       -544.530971 -9914.155080 -2389.525144 -7943.184410 -2396.739496  ...   
1       -542.870816 -9913.288381 -2393.455806 -7944.576010 -2472.044633  ...   
2       -539.965544 -9911.701468 -2388.597410 -7936.263026 -2450.731167  ...   
3       -546.630580 -9913.715627 -2391.246334 -7969.429513 -2342.222922  ...   
4       -543.786343 -9914.338186 -2396.702874 -7985.115541 -2359.129652  ...   

channel          TP8          C6           C2          FC4    

In [33]:
print(data.shape)

(758935, 63)


In [34]:
def adjust_events_downsample(events, names, data):
        #here we will adjust the events to match the downsampled data

        #convert data to a dataframe
        
        
        # start by creating a column in data that is the index * 4
        data["index"] = data.index
        data["index"] = data["index"] * 4

        #now we can adjust the events

        #create a new column for the events
        data["events"] = 0
        data["event_names"] = ""

        #loop through each event
        for i in range(len(events)):
            #get the event time
            event_time = events[i]

            #get the nearest index to that event time
            index = data["index"].sub(event_time).abs().idxmin()

            #set the event value to 1
            data.at[index, "events"] = 1
            data.at[index, "event_names"] = names[i]

        #now we can drop the index column
        data = data.drop(columns=["index"])

        return data

data = adjust_events_downsample(stm_on, stim_names, data)

In [35]:
data.head(100)
#get unique event names
event_names = data["event_names"].unique()

In [39]:
data.head(3400)

channel,Fp1,Fz,F3,F7,FT9,FC5,FC1,C3,T7,TP9,...,C2,FC4,FT8,F6,AF8,AF4,F2,FCz,events,event_names
0,-12010.650589,-5758.432110,-265.563829,-6362.142739,-8548.909197,-544.530971,-9914.155080,-2389.525144,-7943.184410,-2396.739496,...,-6624.654811,-3366.404526,-2062.779706,-721.752560,-12683.074561,-11513.641566,-3593.064567,-24.951159,0,
1,-12000.750692,-5751.718247,-266.784531,-6359.518228,-8546.187031,-542.870816,-9913.288381,-2393.455806,-7944.576010,-2472.044633,...,-6620.980497,-3363.706774,-2065.269939,-718.224730,-12677.740091,-11491.827612,-3586.704707,-23.327625,0,
2,-11991.424524,-5748.019518,-261.804065,-6358.175456,-8543.977559,-539.965544,-9911.701468,-2388.597410,-7936.263026,-2450.731167,...,-6625.924342,-3366.123765,-2063.853924,-719.335569,-12675.457377,-11491.571265,-3587.534784,-26.989732,0,
3,-11997.759970,-5752.633773,-267.162949,-6364.571937,-8554.316909,-546.630580,-9913.715627,-2391.246334,-7969.429513,-2342.222922,...,-6624.081081,-3363.987535,-2060.387129,-717.541137,-12674.895854,-11491.998511,-3587.742304,-24.560534,0,
4,-12006.573442,-5756.100569,-268.261581,-6363.827308,-8557.747083,-543.786343,-9914.338186,-2396.702874,-7985.115541,-2359.129652,...,-6623.409695,-3369.126693,-2068.883218,-719.189085,-12681.109230,-11497.931125,-3592.075798,-23.608386,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3395,-12056.317069,-5803.048787,-293.090670,-6312.081729,-8526.606963,-539.074431,-9974.225850,-2412.474351,-7925.154634,-2343.675558,...,-6615.731476,-3413.950889,-2130.748421,-789.965416,-12782.220018,-11532.867630,-3624.766211,-41.613748,0,
3396,-12055.267265,-5803.451618,-289.965672,-6316.061219,-8534.199732,-541.430387,-9973.981710,-2413.414292,-7939.912927,-2340.562766,...,-6616.891143,-3412.815635,-2133.263068,-791.064048,-12785.052048,-11532.037553,-3624.400000,-44.213845,0,
3397,-12049.505549,-5801.535116,-292.834323,-6316.403016,-8533.516139,-543.053921,-9973.407980,-2413.792709,-7929.097503,-2389.512937,...,-6614.681672,-3410.923547,-2133.372931,-791.466880,-12779.925097,-11532.562455,-3625.169042,-43.493630,0,
3398,-12053.118829,-5804.782184,-298.998870,-6313.863955,-8530.708523,-542.846402,-9974.921651,-2411.656480,-7926.131196,-2382.591554,...,-6617.001007,-3411.717003,-2133.482794,-793.542074,-12780.877245,-11549.798774,-3628.355076,-44.946266,0,


In [36]:
print(event_names)

['' 'carousel_11s.jpg' 'piano_05n.jpg' ... 'necklace_11s.jpg'
 'wrap_09s.jpg' 'spout_04s.jpg']


In [37]:
#get value counts for the events column
event_counts = data["events"].value_counts()
print(event_counts)

events
0    737012
1     21923
Name: count, dtype: int64
