In [97]:
import numpy as np
import pandas as pd
from mne.io import RawArray
from mne.channels import read_montage
from mne.epochs import concatenate_epochs
from mne import create_info, find_events, Epochs, concatenate_raws, pick_types
from mne.decoding import CSP

from sklearn.linear_model import LogisticRegression
from glob import glob

from scipy.signal import butter, lfilter, convolve, boxcar

def creat_mne_raw_object(fname,read_events=True,norm=False):
    """Create a mne raw instance from csv file"""
    # Read EEG file
    data = pd.read_csv(fname)
    
    # get chanel names
    ch_names = list(data.columns[1:])
    
    # read EEG standard montage from mne
    montage = read_montage('standard_1005',ch_names)

    ch_type = ['eeg']*len(ch_names)
    data = 1e-6*np.array(data[ch_names]).T # From microvolts to volts
    
    # this normalizes the data per series.
    if norm:
        f = lambda x: (x - np.mean(x)) / np.std(x)
        data = map(f,data)
        
    if read_events:
        # events file
        ev_fname = fname.replace('_data','_events')
        # read event file
        events = pd.read_csv(ev_fname)
        events_names = events.columns[1:]
        events_data = np.array(events[events_names]).T
        
        # define channel type, the first is EEG, the last 6 are stimulations
        ch_type.extend(['stim']*6)
        ch_names.extend(events_names)
        # concatenate event file and data
        data = np.concatenate((data,events_data))
        
    # create and populate MNE info structure
    info = create_info(ch_names,sfreq=500.0, ch_types=ch_type, montage=montage)
    info['filename'] = fname
    
    # create raw object 
    raw = RawArray(data,info,verbose=False)
    
    return raw

subjects = range(1,13)
rawfilt_tot = []
ids_tot = []

# design a butterworth bandpass filter 
freqs = [7, 30]
b,a = butter(5,np.array(freqs)/250.0,btype='bandpass')

# convolution
# window for smoothing features
nwin = 250

################################################################################
###### CHANGE THIS FOR SMALLER AMOUNTS OF DATA. SUBSAMPLE = 1 IS ALL DATA.######
# training subsample
subsample = 10
################################################################################

cols = ['HandStart','FirstDigitTouch',
        'BothStartLoadPhase','LiftOff',
        'Replace','BothReleased']

for subject in subjects:

    ################ READ DATA ################################################
    fnames =  glob('/Users/eszti/Documents/__NYC_DSA/Dev/capstone_eeg/train/subj%d_series*_data.csv' % (subject))
    
    # read and concatenate all the files
    raw = concatenate_raws([creat_mne_raw_object(fname) for fname in fnames])
       
    # pick eeg signal
    picks = pick_types(raw.info,eeg=True)
    
    # Filter data for alpha frequency and beta band
    # Note that MNE implement a zero phase (filtfilt) filtering not compatible
    # with the rule of future data.
    # Here we use left filter compatible with this constraint. 
    raw._data[picks] = lfilter(b,a,raw._data[picks])
    #rawfilt_tot.append(raw._data[:,::subsample])
    #rawfilt_tot.append(raw.to_data_frame(index=None))
    rawfilt_tot.append(pd.DataFrame(raw._data[:,::subsample]).T)
    
    # read ids
    ids = np.concatenate([np.array(pd.read_csv(fname)['id']) for fname in fnames])
    ids_tot.append(ids[::subsample])
    #ids_tot.append(ids)
    
    # create data frame and write to csv
    rf = pd.DataFrame(np.concatenate(rawfilt_tot),np.concatenate(ids_tot),columns = raw.ch_names)
    rf.to_csv('raw_filtered.csv', float_format='%.5f')

In [99]:
rf

Unnamed: 0,Fp1,Fp2,F7,F3,Fz,F4,F8,FC5,FC1,FC2,...,O1,Oz,O2,PO10,HandStart,FirstDigitTouch,BothStartLoadPhase,LiftOff,Replace,BothReleased
subj1_series1_0,-1.265060e-09,1.481344e-08,8.610567e-09,4.937814e-09,8.610567e-09,6.121256e-10,2.925961e-08,1.138554e-08,1.428293e-09,6.447723e-09,...,1.873104e-08,7.059849e-09,4.897005e-09,2.872910e-08,0,0,0,0,0,0
subj1_series1_10,-2.838637e-05,4.948656e-05,1.961893e-05,1.950980e-05,3.688006e-05,6.031767e-05,9.691626e-05,6.039221e-05,5.736971e-06,3.118575e-05,...,8.128337e-05,2.892508e-05,1.487072e-05,1.215434e-04,0,0,0,0,0,0
subj1_series1_20,1.037941e-04,-4.974171e-05,-3.354253e-06,-1.055577e-05,-5.857862e-05,-8.417521e-05,-1.842902e-04,-1.119090e-04,-1.818688e-06,-6.207247e-05,...,-1.557538e-04,-6.044906e-05,-4.022023e-05,-2.433586e-04,0,0,0,0,0,0
subj1_series1_30,-8.659498e-05,-9.379269e-05,-6.753905e-05,-6.938635e-05,-1.870494e-05,-7.815695e-05,3.144889e-06,-1.037878e-05,-1.899459e-05,6.720463e-06,...,2.158946e-06,1.247388e-05,2.577026e-05,2.463140e-05,0,0,0,0,0,0
subj1_series1_40,-1.901873e-05,1.239944e-04,8.672682e-07,4.477969e-05,2.963357e-05,8.626145e-05,9.832759e-05,2.786242e-05,1.059038e-05,2.504109e-05,...,7.219534e-05,3.398650e-05,3.412625e-05,1.465030e-04,0,0,0,0,0,0
subj1_series1_50,5.822718e-06,1.209939e-05,6.890198e-05,3.963144e-05,3.099196e-05,7.891816e-05,7.693131e-05,9.535602e-05,4.801922e-06,2.513597e-05,...,7.687476e-05,-8.632405e-07,-4.344073e-05,4.557492e-05,0,0,0,0,0,0
subj1_series1_60,3.816463e-05,9.287356e-06,5.553475e-05,-3.725468e-06,2.456960e-05,7.409507e-06,-2.730549e-05,2.408902e-05,1.353809e-05,-4.569749e-06,...,-3.857925e-05,-2.213239e-05,-2.217242e-05,-3.946804e-05,0,0,0,0,0,0
subj1_series1_70,6.125445e-05,1.332427e-07,-1.819115e-05,1.375939e-05,-2.526797e-05,-3.188939e-05,-2.412046e-05,-5.309355e-05,-1.898365e-06,-1.659012e-05,...,1.190380e-05,4.098450e-05,6.178139e-05,-3.095760e-05,0,0,0,0,0,0
subj1_series1_80,-2.617454e-05,-6.716634e-05,-4.426233e-05,1.236237e-05,-1.907834e-05,-6.593153e-05,-9.178293e-05,-3.788373e-05,-2.859309e-06,-1.621872e-05,...,-7.308322e-05,-2.193844e-05,-5.342788e-07,-1.785041e-05,0,0,0,0,0,0
subj1_series1_90,-7.986742e-05,-8.730948e-06,-9.102282e-05,-1.047206e-04,-3.164583e-05,-7.702616e-05,1.052661e-05,-4.015438e-05,-2.790271e-05,-1.016673e-05,...,-5.836342e-05,-5.826839e-05,-6.157118e-05,-6.484488e-05,0,0,0,0,0,0
