In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.decomposition import PCA

import pickle

In [2]:
data1 = loadmat('./Dataset/TrainData1.mat')
data1_test = loadmat('./Dataset/TestData1.mat')
data2 = loadmat('./Dataset/TrainData2.mat')
data2_test = loadmat('./Dataset/TestData2.mat')
data3 = loadmat('./Dataset/TrainData3.mat')
data3_test = loadmat('./Dataset/TestData3.mat')
data4 = loadmat('./Dataset/TrainData4.mat')
data4_test = loadmat('./Dataset/TestData4.mat')
data5 = loadmat('./Dataset/TrainData5.mat')
data5_test = loadmat('./Dataset/TestData5.mat')
data6 = loadmat('./Dataset/TrainData6.mat')
data6_test = loadmat('./Dataset/TestData6.mat')
data7 = loadmat('./Dataset/TrainData7.mat')
data7_test = loadmat('./Dataset/TestData7.mat')
data8 = loadmat('./Dataset/TrainData8.mat')
data8_test = loadmat('./Dataset/TestData8.mat')
data9 = loadmat('./Dataset/TrainData9.mat')
data9_test = loadmat('./Dataset/TestData9.mat')

In [3]:
class Person:
    def __init__(self, data_mat):
        self.data_name = list(data_mat.keys())[-1]
        self.data = data_mat[self.data_name]
        self.t_sampling = self.data[0,1] - self.data[0,0]
    
    def find_t_stimulated(self, data, test_data = False):
        '''Finds the times when there was a stimulation and stores these times in a vector'''
        
        mask = data[9,:] != 0
        t_stimulated = data[0,mask]
        if (test_data == False):
            t_stimulated = t_stimulated[0::4]
        return t_stimulated
    
    def epoching(self, data, t_stimulated, test_data = False):
        '''This function finds the 128 samples of each channel around the times that a stimulation happend
        and also creates the label vector for each of these times and returns them as a tuple of (t_features, y)'''
        
        t_backward = 0.1
        t_forward = 0.4
        t_features = np.empty([len(t_stimulated), int((t_backward+t_forward)/self.t_sampling)*8])
        y = np.empty(len(t_stimulated))
        for t in t_stimulated:
            index = np.where(t_stimulated == t)
            t_min = t - t_backward
            t_max = t + t_forward
            mask = np.logical_and((data[0,:]<t_max),(data[0,:]>t_min))
            data_masked = data[:, mask]
            time_signal = data_masked[1:9,:]
            features = np.array([time_signal[0,:]])
            for j in range(1,8):
                features = np.append(features, time_signal[j,:])
            if(test_data == False):
                y[index] = data[10, data[0,:] == t]
            t_features[index,:] = features
        return (t_features, y)
    
    def create_all_features(self, t_features):
        '''This function calculates all possible features and returns them.'''
        ## Time Features
        # channel time signals
        ch1 = t_features[:, 0:128]
        ch2 = t_features[:, 128:256]
        ch3 = t_features[:, 256:384]
        ch4 = t_features[:, 384:512]
        ch5 = t_features[:, 512:640]
        ch6 = t_features[:, 640:768]
        ch7 = t_features[:, 768:896]
        ch8 = t_features[:, 896:1024]
        ch = (ch1+ch2+ch3+ch4+ch5+ch6+ch7+ch8)/8
        
        length = ch.shape[0]
        
        ch_hist = self.create_hist_feature(ch)
        ch1_hist = self.create_hist_feature(ch1)
        ch2_hist = self.create_hist_feature(ch2)
        ch3_hist = self.create_hist_feature(ch3)
        ch4_hist = self.create_hist_feature(ch4)
        ch5_hist = self.create_hist_feature(ch5)
        ch6_hist = self.create_hist_feature(ch6)
        ch7_hist = self.create_hist_feature(ch7)
        ch8_hist = self.create_hist_feature(ch8)
        
        ch1_mean = np.mean(ch1, axis=1).reshape((length, 1))
        ch2_mean = np.mean(ch2, axis=1).reshape((length, 1))
        ch3_mean = np.mean(ch3, axis=1).reshape((length, 1))
        ch4_mean = np.mean(ch4, axis=1).reshape((length, 1))
        ch5_mean = np.mean(ch5, axis=1).reshape((length, 1))
        ch6_mean = np.mean(ch6, axis=1).reshape((length, 1))
        ch7_mean = np.mean(ch7, axis=1).reshape((length, 1))
        ch8_mean = np.mean(ch8, axis=1).reshape((length, 1))
        
        ch1_var = np.var(ch1, axis=1).reshape((length, 1))
        ch2_var = np.var(ch2, axis=1).reshape((length, 1))
        ch3_var = np.var(ch3, axis=1).reshape((length, 1))
        ch4_var = np.var(ch4, axis=1).reshape((length, 1))
        ch5_var = np.var(ch5, axis=1).reshape((length, 1))
        ch6_var = np.var(ch6, axis=1).reshape((length, 1))
        ch7_var = np.var(ch7, axis=1).reshape((length, 1))
        ch8_var = np.var(ch8, axis=1).reshape((length, 1))
        
        ch_corr = self.create_correlation_feature(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8)
        ch_freq = self.create_frequency_features(ch)
        
        feature_tuple = (ch, ch_freq , ch1_var, ch2_var, ch3_var, ch4_var,
                         ch5_var, ch6_var, ch7_var, ch8_var, ch_hist, ch_corr)
        all_features = np.concatenate(feature_tuple, axis=1)
        #std = np.std(all_features, axis=0)
        all_features = all_features
        
        return all_features
    
    def create_hist_feature(self, channel):
        '''This function gets the channel time series and returns the histogram feature for it.'''
        length = channel.shape[0]
        bins = 12
        channel_hist = np.empty((length, bins))
        for i in range(length):
            channel_sample = channel[i,:]
            hists = np.histogram(channel_sample, bins=bins, range=(-60,60))[0]
            channel_hist[i,:] = hists
        return channel_hist
    
    def create_correlation_feature(self, ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8):
        '''This function gets the channel time series and returns the correlation feature for it.'''
        length = ch1.shape[0]
        ch_corr_list = np.empty((length, 28))
        for i in range(length):
            ch1_sample = ch1[i,:]
            ch2_sample = ch2[i,:]
            ch3_sample = ch3[i,:]
            ch4_sample = ch4[i,:]
            ch5_sample = ch5[i,:]
            ch6_sample = ch6[i,:]
            ch7_sample = ch7[i,:]
            ch8_sample = ch8[i,:]
            corr = np.corrcoef([ch1_sample,ch2_sample,ch3_sample,ch4_sample,ch5_sample,ch6_sample
                                      ,ch7_sample, ch8_sample])
            ch_corr = np.array([corr[0,1], corr[0,2], corr[0,3], corr[0,4], corr[0,5], corr[0,6], corr[0,7],
                                corr[1,2], corr[1,3], corr[1,4], corr[1,5], corr[1,6], corr[1,7],
                                corr[2,3], corr[2,4], corr[2,5], corr[2,6], corr[2,7],
                                corr[3,4], corr[3,5], corr[3,6], corr[3,7],
                                corr[4,5], corr[4,6], corr[4,7],
                                corr[5,6], corr[5,7],
                                corr[6,7]])
            ch_corr_list[i,:] = ch_corr
        return ch_corr_list
    
    def create_frequency_features(self, ch):
        '''Gets a channel time data and finds frequency features related to it.'''
        length = ch.shape[0]
        N = 4096
        ch_fft = abs(np.fft.rfft(ch, N))
        ch_fft_no_dc = np.delete(ch_fft,0,1)
        ch_psd = ch_fft**2
        f = np.arange(0, N/2+1) * 128/(N/2)
        # 0.5Hz <= f < 4Hz
        mask1 = np.logical_and(f>=0.4, f<4)
        ch_psd1 = ch_psd[:,mask1]
        ch_energy1 = np.sum(ch_psd1, axis=1)
        # 4Hz <= f < 8Hz
        mask2 = np.logical_and(f>=4, f<8)
        ch_psd2 = ch_psd[:,mask2]
        ch_energy2 = np.sum(ch_psd2, axis=1)
        # 8Hz <= f < 13Hz
        mask3 = np.logical_and(f>=8, f<13)
        ch_psd3 = ch_psd[:,mask3]
        ch_energy3 = np.sum(ch_psd3, axis=1)
        # 13Hz <= f < 30Hz
        mask4 = np.logical_and(f>=13, f<30)
        ch_psd4 = ch_psd[:,mask4]
        ch_energy4 = np.sum(ch_psd4, axis=1)
        # 30Hz <= f
        mask5 = f>=30
        ch_psd5 = ch_psd[:,mask5]
        ch_energy5 = np.sum(ch_psd5, axis=1)
        
        
        # mean frequency
        f_array = np.array([f]*length)
        f_mean = np.sum(f_array * ch_fft, axis=1)/np.sum(ch_fft, axis=1)
        
        # 5 max frequencies
        ind = np.argsort(-ch_fft[:,1:], axis=1)
        f_sorted = np.take_along_axis(f_array[:,1:], ind, axis=1)
        f_5_max = f_sorted[:,0:5]
        
        # appending features together
        temp1 = np.append(ch_energy1.reshape((length, 1)), ch_energy2.reshape((length, 1)), axis=1)
        temp2 = np.append(ch_energy3.reshape((length, 1)), ch_energy4.reshape((length, 1)), axis=1)
        f_features = np.append(temp1, temp2, axis=1)
        f_features = np.append(f_features, ch_energy5.reshape((length, 1)), axis=1)
        f_features = np.append(f_features, f_mean.reshape(length, 1), axis=1)
        f_features = np.append(f_features, f_5_max, axis=1)
        
        return f_features
    
    def return_features(self, data, test_data=False):
        '''Gets a dataset as its input and creates all possible features using this dataset in order to be used later
        and returns these features as its output.'''
        
        print("Creating all possible features...\n")
        t_stimulated = self.find_t_stimulated(data, test_data)
        t_features, y = self.epoching(data, t_stimulated, test_data)
        all_features = self.create_all_features(t_features)
        if (test_data):
            out = all_features
        else:
            out = (all_features, y)
        return out
    
    def fisher_score_index(self, X_train, y_train):
        '''Finds the index of fisher score (column indexes) using X_train and y_train which are the features and their
        labels.'''
        mask1 = (y_train==0)
        mask2 = (y_train==1)

        X1_train = X_train[mask1,:]
        X2_train = X_train[mask2,:]
        n1 = X1_train.shape[0]
        n2 = X2_train.shape[0]

        u0 = np.mean(X_train, axis=0)
        u1 = np.mean(X1_train, axis=0)
        u2 = np.mean(X2_train, axis=0)
        var1 = np.var(X1_train, axis=0)
        var2 = np.var(X2_train, axis=0)
    
        fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))

        ind = np.argsort(-fisher_score)
        return ind
    
    def cross_validation(self, X, y, model, feature_num, n_splits, metric="roc_auc"):
        kf = KFold(n_splits = n_splits)
        score_list = np.array([])
        X = X[:, 0:feature_num]
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            score = self.scorer(model, X_train, y_train, X_test, y_test, metric)
            score_list = np.append(score_list, score)
        score = np.mean(score_list)
        return score, score_list
    
    def scorer(self, model, X_train, y_train, X_test, y_test, metric, return_conf_matrix=False):
        '''Finds either roc auc or accuracy of model on a given train and test dataset and returns the calculated
        score.'''
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        if metric == "roc_auc":
            score = roc_auc_score(y_test, y_pred)
        elif metric == "accuracy":
            score = balanced_accuracy_score(y_test, y_pred)
        if(return_conf_matrix):
            return (score, confusion_matrix(y_test, y_pred))
        else:
            return score
        
    def find_best_model(self, X, y):
        '''Finds the best model for a given dataset using cross-validation and fisher-score, then returns the 
        shuffled, splited and sorted dataset, best model, best number of features and finally the priority of 
        the features based on their fisher-score.'''
        X_train_all, X_test_all, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        fisher_ind = self.fisher_score_index(X_train_all, y_train)
        X_train = X_train_all[:, fisher_ind]
        X_test = X_test_all[:, fisher_ind]
        models = self.model_generator()
        feature_nums = [10, 20, 40, 70, 100]
        scores_array = np.empty((len(models), len(feature_nums)))
        
        for i in range(len(models)):
            model = models[i]
            for j in range(len(feature_nums)):
                progress = i/len(models)*100 + j/len(feature_nums) *100/len(models)
                progress_str = "{:.2f}".format(progress)
                print("Cross-Validation in progress:", progress_str,"%")
                feature_num = feature_nums[j]
                score, score_list = self.cross_validation(X_train, y_train, model, feature_num, 6)
                scores_array[i,j] = score
        print("Cross-Validation is completed!\n")
        
        indices = np.unravel_index(np.argmax(scores_array, axis=None), scores_array.shape) 
        best_model = models[indices[0]]
        best_feature_num = feature_nums[indices[1]]
        return X_train, X_test, y_train, y_test, best_model, best_feature_num, fisher_ind
        
    def fit(self):
        '''Simply creates the best model for a person and saves the model, the number of features which should be
        used in the model and finally the indexing for sorting features based on their fisher score.'''
        X, y = self.return_features(self.data)
        print("\nFinding the best model is started...")
        
        X_train, X_test, y_train, y_test, model, feature_num, fisher_ind = self.find_best_model(X, y)
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        roc_auc = roc_auc_score(y_test, y_pred)
        acc = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)
        
        print("The best model is:", model," with", feature_num,"features")
        print("The accuracy of the best model is:", "{:.2f}%".format(acc*100))
        print("The roc_auc of the best model is:", "{:.2f}%".format(roc_auc*100))
        print("The confusion matrix of the best model is:\n", conf_matrix)
        
        X_all = (X[:, fisher_ind])[:, 0:feature_num]
        model.fit(X_all, y)
        self.model = model
        self.X_train = X_all
        self.y_train = y
        self.feature_num = feature_num
        self.fisher_ind = fisher_ind
        filename = './models/'+ self.data_name + '_Model.sav'
        pickle.dump(model, open(filename, 'wb'))
        
        print("Test confusion matrix:\n", confusion_matrix(y, model.predict(X_all)))
        
    def load_test_data(self, data_mat):
        '''Gets a dataset as its input and findes the features(sorted and selected based on the fisher score 
        calculated in create_best_model function) for this dataset and also the stimulation times for this dataset.'''
        print("\nLoading the test data is started...")
        data_name = list(data_mat.keys())[-1]
        data = data_mat[data_name]
        t_stimulated = self.find_t_stimulated(data, test_data=True)
        chars_array = []
        for t in t_stimulated:
            index = np.where(t_stimulated == t)
            char = data[9,np.where(data[0,:] == t)][0,0]
            chars_array.append(char)
        X = self.return_features(data, test_data = True)
        X_sorted = X[:, self.fisher_ind]
        X_test = X_sorted[:, 0:self.feature_num]
        self.X_test = X_test
        self.t_stimulated_test = t_stimulated
        self.chars_array = np.array(chars_array)
        
    def find_test_word(self):
        model = self.model
        y_pred = model.predict(self.X_test)
        mask = y_pred == 1
        chars_array = self.chars_array
        chars = chars_array[mask]
        print("Ones founded:", chars.shape[0])
        
        max_char = max(chars)
        
        if(max_char > 12):
            word = self.display_type1(chars)
            print("Single Character Display.")
        else:
            word = self.display_type2(chars_array, mask)
            print("Row-Column Display.")
            
        print("The final word is:\n", word)
        
    def display_type1(self, chars):
        '''Finds the intended word in the Single Character display type.'''
        char_dict = {1:'A', 2:'B', 3:'C', 4:'D', 5:'E', 6:'F', 7:'G', 8:'H', 9:'I', 10:'J', 11:'K', 12:'L', 13:'M',
                     14:'N', 15:'O', 16:'P', 17:'Q', 18:'R', 19:'S', 20:'T', 21:'U', 22:'V', 23:'W', 24:'X', 25:'Y',
                     26:'Z', 27:'0', 28:'1', 29:'2', 30:'3', 31:'4', 32:'5', 33:'6', 34:'7', 35:'8', 36:'9'}
        
        step = int(chars.shape[0]/5)
        char1 = chars[0:step].astype(int)
        char2 = chars[step:2*step].astype(int)
        char3 = chars[2*step:3*step].astype(int)
        char4 = chars[3*step:4*step].astype(int)
        char5 = chars[4*step:].astype(int)
        
        i1 = np.bincount(char1).argmax()
        i2 = np.bincount(char2).argmax()
        i3 = np.bincount(char3).argmax()
        i4 = np.bincount(char4).argmax()
        i5 = np.bincount(char5).argmax()
        
        word = char_dict[i1] + char_dict[i2] + char_dict[i3] + char_dict[i4] + char_dict[i5]
        return word
    
    def display_type2(self, chars_array, mask):
        '''Finds the intended word in the Row-Column display type.'''
        chars_array = chars_array.astype('int')
        char_dict = [['A', 'B', 'C', 'D', 'E', 'F'],
                     ['G', 'H', 'I', 'J', 'K', 'L'],
                     ['M', 'N', 'O', 'P', 'Q', 'R'],
                     ['S', 'T', 'U', 'V', 'W', 'X'],
                     ['Y', 'Z', '0', '1', '2', '3'],
                     ['4', '5', '6', '7', '8', '9']]

        chars = chars_array[mask].astype(int)
        step = int(chars.shape[0]/5)
        chars1 = chars[0:step]
        chars2 = chars[step:2*step]
        chars3 = chars[2*step:3*step]
        chars4 = chars[3*step:4*step]
        chars5 = chars[4*step:]
        
        chars1_row = chars1[chars1<=6]
        chars2_row = chars2[chars2<=6]
        chars3_row = chars3[chars3<=6]
        chars4_row = chars4[chars4<=6]
        chars5_row = chars5[chars5<=6]
        
        chars1_col = chars1[chars1>6]
        chars2_col = chars2[chars2>6]
        chars3_col = chars3[chars3>6]
        chars4_col = chars4[chars4>6]
        chars5_col = chars5[chars5>6]
        
        row1 = self.most_frequent(chars1_row)-1
        row2 = self.most_frequent(chars2_row)-1
        row3 = self.most_frequent(chars3_row)-1
        row4 = self.most_frequent(chars4_row)-1
        row5 = self.most_frequent(chars5_row)-1
        
        col1 = self.most_frequent(chars1_col)-7
        col2 = self.most_frequent(chars2_col)-7
        col3 = self.most_frequent(chars3_col)-7
        col4 = self.most_frequent(chars4_col)-7
        col5 = self.most_frequent(chars5_col)-7
        
        char1 = char_dict[row1][col1]
        char2 = char_dict[row2][col2]
        char3 = char_dict[row3][col3]
        char4 = char_dict[row4][col4]
        char5 = char_dict[row5][col5]
        
        word = char1+char2+char3+char4+char5
        return word
        
    def model_generator(self):
        '''This method just creates an array of different models and it was written like this just for the sake of 
        the order and the beauty of the code.'''
        return [LogisticRegression(class_weight={0:1, 1:35}, max_iter=10000),
                LinearDiscriminantAnalysis(),
                LinearDiscriminantAnalysis(priors=[0.5, 0.5]),
                RandomForestClassifier(n_estimators = 5, max_depth = 4, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 7, max_depth = 4, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 9, max_depth = 4, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 5, max_depth = 6, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 7, max_depth = 6, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 9, max_depth = 6, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 5, max_depth = 8, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 7, max_depth = 8, class_weight='balanced_subsample'),
                RandomForestClassifier(n_estimators = 9, max_depth = 8, class_weight='balanced_subsample'),
                #QuadraticDiscriminantAnalysis(priors = [0.5, 0.5])
                #LogisticRegression(class_weight={0:1, 1:35}, max_iter=10000)
                #LogisticRegression(class_weight={0:1, 1:100}, max_iter=10000)
                ]
    
    def most_frequent(self, List):
        
        return max(set(List), key = list(List).count)

## Person 1

In [4]:
p1 = Person(data1)
p1.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %


  fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))


Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation

In [5]:
p1.load_test_data(data1)


Loading the test data is started...
Creating all possible features...



In [6]:
p1.find_test_word()

Ones founded: 4234
Single Character Display.
The final word is:
 0WBEW


In [7]:
p1.data_name + '_Model'

'TrainData1_Model'

## Person 2

In [8]:
p2 = Person(data2)
p2.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %
Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in p

In [9]:
p2.load_test_data(data2_test)


Loading the test data is started...
Creating all possible features...



In [10]:
p2.find_test_word()

Ones founded: 3307
Single Character Display.
The final word is:
 1UKAS


In [11]:
p2.fisher_ind.shape

(187,)

## Person 3

In [12]:
p3 = Person(data3)
p3.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %
Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in p

In [13]:
p3.load_test_data(data3)


Loading the test data is started...
Creating all possible features...



In [14]:
p3.find_test_word()

Ones founded: 1374
Row-Column Display.
The final word is:
 7O2AD


## Person 4

In [15]:
p4 = Person(data4)
p4.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %
Cross-Validation in progress: 3.33 %


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stab

Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation in progress: 46.67 %
Cross-Validation in progress: 48.33 %
Cross-Validation in progress: 50.00 %
Cross-Validation in progress: 51.67 %
Cross-Validation in progress: 53.33 %
Cross-Validation in progress: 55.00 %
Cross-Validation in progress: 56.67 %
Cross-Valida

In [16]:
p4.load_test_data(data4_test)


Loading the test data is started...
Creating all possible features...



In [17]:
p4.find_test_word()

Ones founded: 946
Row-Column Display.
The final word is:
 52ZCD


## Person 5

In [18]:
p5 = Person(data5)
p5.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %
Cross-Validation in progress: 3.33 %


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stab

Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation in progress: 46.67 %
Cross-Validatio

In [19]:
p5.load_test_data(data5_test)


Loading the test data is started...
Creating all possible features...



In [20]:
p5.find_test_word()

Ones founded: 1956
Row-Column Display.
The final word is:
 XAPZT


## Person 6

In [21]:
p6 = Person(data6)
p6.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %


  fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))


Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation

In [22]:
p6.load_test_data(data6_test)


Loading the test data is started...
Creating all possible features...



In [23]:
p6.find_test_word()

Ones founded: 1784
Row-Column Display.
The final word is:
 MJD4R


## Person 7

In [24]:
p7 = Person(data7)
p7.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %


  fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))


Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation

In [25]:
p7.load_test_data(data7_test)


Loading the test data is started...
Creating all possible features...



In [26]:
p7.find_test_word()

Ones founded: 984
Row-Column Display.
The final word is:
 PXLMU


## Person 8

In [27]:
p8 = Person(data8)
p8.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %


  fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))


Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation

In [28]:
p8.load_test_data(data8)


Loading the test data is started...
Creating all possible features...



In [29]:
p8.find_test_word()

Ones founded: 1560
Row-Column Display.
The final word is:
 3RV4D


## Person 9

In [30]:
p9 = Person(data9)
p9.fit()

Creating all possible features...


Finding the best model is started...
Cross-Validation in progress: 0.00 %
Cross-Validation in progress: 1.67 %


  fisher_score = np.divide((n1 * (u1 - u0)**2 + n2 * (u2 - u0)**2),(n1*var1 + n2*var2))


Cross-Validation in progress: 3.33 %
Cross-Validation in progress: 5.00 %
Cross-Validation in progress: 6.67 %
Cross-Validation in progress: 8.33 %
Cross-Validation in progress: 10.00 %
Cross-Validation in progress: 11.67 %
Cross-Validation in progress: 13.33 %
Cross-Validation in progress: 15.00 %
Cross-Validation in progress: 16.67 %
Cross-Validation in progress: 18.33 %
Cross-Validation in progress: 20.00 %
Cross-Validation in progress: 21.67 %
Cross-Validation in progress: 23.33 %
Cross-Validation in progress: 25.00 %
Cross-Validation in progress: 26.67 %
Cross-Validation in progress: 28.33 %
Cross-Validation in progress: 30.00 %
Cross-Validation in progress: 31.67 %
Cross-Validation in progress: 33.33 %
Cross-Validation in progress: 35.00 %
Cross-Validation in progress: 36.67 %
Cross-Validation in progress: 38.33 %
Cross-Validation in progress: 40.00 %
Cross-Validation in progress: 41.67 %
Cross-Validation in progress: 43.33 %
Cross-Validation in progress: 45.00 %
Cross-Validation

In [31]:
p9.load_test_data(data9)


Loading the test data is started...
Creating all possible features...



In [32]:
p9.find_test_word()

Ones founded: 1435
Row-Column Display.
The final word is:
 5PZAN
