In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import scipy.io
import numpy as np
from scipy import signal
from sklearn.decomposition import PCA
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE

In [None]:
class BasicDataProcess:
	@staticmethod
	def TransposeElements(inputs):
		outputs = []
		for i in inputs:
			outputs.append(i.transpose())
		return np.array(outputs)
	@staticmethod
	def LoadEEGFromFile(data_dir, is_train):
		if is_train:
			train_data = scipy.io.loadmat(data_dir + "/Train/trainData.mat")
		else:
			test_data = scipy.io.loadmat(data_dir + "/Test/testData.mat")
		channels = []
		if is_train:
			for i in train_data['trainData']:
				channels.append(np.array(i).transpose())
		else:
			for i in test_data['testData']:
				channels.append(np.array(i).transpose())
		return channels
	@staticmethod
	def LoadDataFromFile(filename):
		file = open(filename)
		raw_data = file.readlines()
		file.close()
		data = []
		for i in raw_data:
			data.append(int(i))
		return data
	@staticmethod
	def LowPassFilter(fc, data):
		fs = 250.0
		w = fc / (fs / 2.0) # Normalize the frequency
		b, a = signal.butter(9, w, 'low', analog=False)
		output = signal.filtfilt(b, a, data)
		return output
	@staticmethod
	def GetFeatureByPCA(channels, data_size, with_filter, pca_threshold, reshaped, width):
		train_input = []
		for i in range(data_size):
			p300_matrix = []
			for channel in channels:
				left = 125 - width / 2
				right = 125 + width / 2
				raw_channel_data = channel[i][int(left):int(right)]
				if with_filter:
					filtered_data = BasicDataProcess.LowPassFilter(15, raw_channel_data)
					p300_matrix.append(filtered_data.tolist())
				else:
					p300_matrix.append(raw_channel_data)
			pca = PCA()
			p300_pca = pca.fit_transform(p300_matrix)
			if reshaped:
				train_input.append(p300_pca[0:pca_threshold].reshape(-1).tolist())
			else:
				train_input.append(p300_pca[0:pca_threshold].tolist())
		return train_input
	@staticmethod
	def GetP300Inputs(channels, with_filter, data_size, reshaped, width):
		result = []
		for i in range(data_size):
			p300_matrix = []
			for channel in channels:
				left = 125 - width / 2
				right = 125 + width / 2
				raw_channel_data = channel[i][int(left):int(right)]
				if(with_filter):
					filtered_data = BasicDataProcess.LowPassFilter(20, raw_channel_data)
					p300_matrix.append(filtered_data.tolist())
				else:
					p300_matrix.append(raw_channel_data)
			if reshaped:
				result.append(np.array(p300_matrix).reshape(-1).tolist())
			else:
				result.append(np.array(p300_matrix))
		return result
	@staticmethod
	def GetTargetResults(proba_results):
		label_results = np.zeros(len(proba_results), dtype=np.int8)
		pos = 0
		max_proba = 0
		record = 0
		for i in proba_results:
			if(i[1] > max_proba):
				max_proba = i[1]
				record = pos
			pos += 1
			if pos % 8 == 0:
				label_results[record] = 1
				max_proba = 0
		return label_results.tolist()
	@staticmethod
	def GetLabelResults(targets, events):
		result = []
		pos = 0
		for i in targets:
			if i == 1:
				result.append(events[pos])
			pos += 1
		return result
	@staticmethod
	def GetDifferences(a, b, size):
		result = 0
		for i in range(size):
			if a[i] != b[i]:
				result += 1
		return result
	@staticmethod
	def GetMostFrequent(List): 
		counter = 0
		num = List[0] 

		for i in List: 
			curr_frequency = List.count(i) 
			if(curr_frequency > counter): 
				counter = curr_frequency 
				num = i 
		return num
	@staticmethod
	def GetLabels(votes, block_size):
		test_labels = []
		for i in range(len(votes) // block_size):
			block = np.array(votes[i * block_size : (i + 1) * block_size])
			test_labels.append(BasicDataProcess.GetMostFrequent(block.tolist()))
		return test_labels

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import VarianceThreshold, SelectFromModel
#from BasicDataProcess import BasicDataProcess
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import time
import numpy as np
import os
import traceback
from scipy.fftpack import rfft, irfft

#timestr = time.strftime("%Y%m%d-%H%M%S")
result_file_name = "Output_All_Channels_Theta_Band" + ".csv"
result_file = open(result_file_name, "w+")
result_file.write("SUBJECT,SESSION,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,\n")

all_results = []
successful = True

In [None]:
def GetNonTargetsAverage(train_inputs, train_targets):
	print("--Get non-targets average matrix--")
	non_targets = None
	count = 0
	for i in range(len(train_targets)):
		if train_targets[i] == 0:
			count += 1
			if non_targets is None:
				non_targets = train_inputs[i]
			else:
				non_targets = np.add(non_targets, train_inputs[i])
	non_targets = non_targets / float(count)
	print("--Done--")
	return non_targets

In [None]:
def ApplySpecialFilter(inputs, filter_feature, reshaped):
	print("--Apply special filter to the inputs--")
	result = []
	for single_input in inputs:
		epoch = []
		for j in range(6):
			input_fft = rfft(single_input[j])
			filter_fft = rfft(filter_feature[j])
			output = irfft(input_fft - filter_fft)
			epoch.append(output)   
		if reshaped:
			result.append(np.array(epoch).reshape(-1))
		else:
			result.append(np.array(epoch))
	print("--Done--")
	return np.array(result)

In [None]:
def PreprocessData(data_dir, filter_applied, pca_applied, pca_threshold, reshaped, data_size, input_data, width):
	print("--Load " + data_dir + " Successfully! Now start processing--")
	print("--Applying Low Pass Filter and reshape to input data(Train)...--")
	if filter_applied:
		print("Low pass filter: YES")
	else:
		print("Low pass filter: NO")
	if pca_applied:
		result = BasicDataProcess.GetFeatureByPCA(input_data, data_size, filter_applied, pca_threshold, reshaped, width)
		print("PCA Applied with threshold: " + str(pca_threshold))
	else:
		result = BasicDataProcess.GetP300Inputs(input_data, filter_applied, data_size, reshaped, width)
		print("PCA Applied: NO")
	if reshaped:
		print("Data set has been reshaped to 1D.")
	print("--Input data preprocessed!--")
	return result

In [None]:
from sklearn.neural_network import MLPClassifier
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
import pywt


In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
for sbj_no in range(1, 16):
    print("===================SBJ%02d===================" % sbj_no)
    sbj_folder = "../input/bciaut-p300/data" + "/SBJ%02d" % sbj_no
    for session_no in range(1, 8):
        train_cwt = []
        data_dir = sbj_folder + "/S0" + str(session_no)
        train_data = BasicDataProcess.LoadEEGFromFile(data_dir, True)
        train_data = np.array(train_data)
        #train_data = train_data[[1, 2,  3, 5, 6, 7], :, :]
        train_targets = BasicDataProcess.LoadDataFromFile(data_dir + "/Train/trainTargets.txt")
        
        sample_width = 150
        train_inputs = PreprocessData(data_dir, filter_applied = False, pca_applied = False, pca_threshold = 20, reshaped = False, data_size = len(train_targets), input_data = train_data, width = sample_width)
        filter_feature = GetNonTargetsAverage(train_inputs, train_targets)
        train_inputs = ApplySpecialFilter(train_inputs, filter_feature, reshaped = True)
        train_targets = np.array(train_targets)
        #train_inputs = []
        lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(train_inputs, train_targets)
        sel_model = SelectFromModel(lsvc, prefit=True)
        train_inputs = sel_model.transform(train_inputs)
        scale = [45, 50, 60, 70] 
        for event in range(0, len(train_inputs)):
            cwtmatr, freqs = pywt.cwt(train_inputs[event, :], scale, 'mexh')
            cwt_oneD = []
            cwtmatr = np.array(cwtmatr)
            cwt_oneD = cwtmatr.flatten()
            #for i in range(0, len(cwtmatr)-1):
             #   cwt_oneD = sum(cwtmatr[i+1, :], [])
            train_cwt.append(cwt_oneD)
        estimators = []
        train_inp = np.concatenate((train_inputs, train_cwt), axis = 1)
        sm = SMOTE(random_state=42)
        train_fea, train_targets = sm.fit_resample(train_inp, train_targets)
        model1 = LinearDiscriminantAnalysis()
        model2 = MLPClassifier() 
        model3 = svm.SVC(gamma="scale", probability=True, random_state=1, degree=12)
        model4 = RandomForestClassifier();
        estimators.append(('lda', model1))
        estimators.append(('mlp', model2))
        estimators.append(('svc', model3))
        estimators.append(('rand', model4))
        model = VotingClassifier(estimators, voting='soft', weights=[11, 3, 4, 7])
        model.fit(train_fea, train_targets)
        test_events = BasicDataProcess.LoadDataFromFile(data_dir + "/Test/testEvents.txt")
        test_data = BasicDataProcess.LoadEEGFromFile(data_dir, False)
        test_inputs = PreprocessData(data_dir, filter_applied = False, pca_applied = False, pca_threshold = 20, reshaped = False, data_size = len(test_events), input_data = test_data, width = sample_width)
        test_inputs = ApplySpecialFilter(test_inputs, filter_feature, reshaped = True)
        test_inputs = sel_model.transform(test_inputs)
        #test_inputs = []
        test_cwt = []
        for event in range(0, len(test_inputs)):
            cwt_test, freqs = pywt.cwt(test_inputs[event, :], scale, 'mexh')
            cwt_one_test = []
            cwt_test = np.array(cwt_test)
            cwt_one_test = cwt_test.flatten()
            #for i in range(0, len(cwtmatr)-1):
             #   cwt_oneD = sum(cwtmatr[i+1, :], [])
            test_cwt.append(cwt_one_test)
        test_fea = np.concatenate((test_inputs, test_cwt), axis = 1)
        raw_results = model.predict_proba(test_fea)
        test_targets = BasicDataProcess.GetTargetResults(raw_results)
        test_votes = BasicDataProcess.GetLabelResults(test_targets, test_events)
        test_runs_per_block = BasicDataProcess.LoadDataFromFile(data_dir + "/Test/runs_per_block.txt")[0]
        test_labels = BasicDataProcess.GetLabels(test_votes, test_runs_per_block)
        all_results.append(test_labels)
        result_file.write(str(sbj_no) + "," + str(session_no))
        for i in test_labels:
            result_file.write("," + str(i))
            result_file.write("\n")
        
if successful:
    result_file.close()
    os.system("python3 result_compare.py " + result_file_name) 

In [None]:
import numpy as np
np.shape(train_fea)
#len(cwtmatr)