In [1]:
import numpy as np
import pandas as pd
from mne.io import RawArray
from mne.channels import read_montage
from mne.epochs import concatenate_epochs
from mne import create_info, find_events, Epochs, concatenate_raws, pick_types
from mne.decoding import CSP

from sklearn.linear_model import LogisticRegression
from glob import glob

from scipy.signal import butter, lfilter, convolve, boxcar

def creat_mne_raw_object(fname,read_events=True,norm=False):
    """Create a mne raw instance from csv file"""
    # Read EEG file
    data = pd.read_csv(fname)
    
    # get chanel names
    ch_names = list(data.columns[1:])
    
    # read EEG standard montage from mne
    montage = read_montage('standard_1005',ch_names)

    ch_type = ['eeg']*len(ch_names)
    data = 1e-6*np.array(data[ch_names]).T # From microvolts to volts
    
    # this normalizes the data per series.
    if norm:
        f = lambda x: (x - np.mean(x)) / np.std(x)
        data = map(f,data)
        
    if read_events:
        # events file
        ev_fname = fname.replace('_data','_events')
        # read event file
        events = pd.read_csv(ev_fname)
        events_names = events.columns[1:]
        events_data = np.array(events[events_names]).T
        
        # define channel type, the first is EEG, the last 6 are stimulations
        ch_type.extend(['stim']*6)
        ch_names.extend(events_names)
        # concatenate event file and data
        data = np.concatenate((data,events_data))
        
    # create and populate MNE info structure
    info = create_info(ch_names,sfreq=500.0, ch_types=ch_type, montage=montage)
    info['filename'] = fname
    
    # create raw object 
    raw = RawArray(data,info,verbose=False)
    
    return raw

subjects = range(1,13)
rawfilt_tot = []
ids_tot = []

# design a butterworth bandpass filter 
freqs = [7, 30]
b,a = butter(5,np.array(freqs)/250.0,btype='bandpass')

# convolution
# window for smoothing features
nwin = 250

################################################################################
###### CHANGE THIS FOR SMALLER AMOUNTS OF DATA. SUBSAMPLE = 1 IS ALL DATA.######
# training subsample
subsample = 1
################################################################################

cols = ['HandStart','FirstDigitTouch',
        'BothStartLoadPhase','LiftOff',
        'Replace','BothReleased']

for subject in subjects:

    ################ READ DATA ################################################
    fnames =  glob('/Users/eszti/Documents/__NYC_DSA/Dev/capstone_eeg/train/subj%d_series*_data.csv' % (subject))
    
    # read and concatenate all the files
    raw = concatenate_raws([creat_mne_raw_object(fname) for fname in fnames])
       
    # pick eeg signal
    picks = pick_types(raw.info,eeg=True)
    
    # Filter data for alpha frequency and beta band
    # Note that MNE implement a zero phase (filtfilt) filtering not compatible
    # with the rule of future data.
    # Here we use left filter compatible with this constraint. 
    raw._data[picks] = lfilter(b,a,raw._data[picks])
    #rawfilt_tot.append(raw._data[:,::subsample])
    #rawfilt_tot.append(raw.to_data_frame(index=None))
    rawfilt_tot.append(pd.DataFrame(raw._data[:,::subsample]).T)
    
    # read ids
    ids = np.concatenate([np.array(pd.read_csv(fname)['id']) for fname in fnames])
    ids_tot.append(ids[::subsample])
    #ids_tot.append(ids)
    
    # create data frame and write to csv
    rf = pd.DataFrame(np.concatenate(rawfilt_tot),np.concatenate(ids_tot),columns = raw.ch_names)
    rf.to_csv('raw_filtered.csv', float_format='%.5f')

In [14]:
rf[rf.iloc[:,32:38].sum(axis=1)>1].shape

(478939, 38)

In [15]:
rf.shape

(17985754, 38)

In [17]:
478939.00/17985754.00

0.026628797436015192

In [20]:
rf_solo = rf[rf.iloc[:,32:38].sum(axis=1)<2]

In [23]:
rf_solo.to_csv('raw_solo.csv',float_foramt='%.5f')

In [24]:
rf_accompanied = rf[rf.iloc[:,32:38].sum(axis=1)>1]

In [25]:
rf_accompanied.to_csv('rf_accompanied.csv',float_format='%.5f')