In [14]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.feature_selection import SelectKBest, chi2
from trainer import train_evaluate

from feature_selection import get_k_best
import json

In [2]:
def get_data(files):
    data_matrix = np.loadtxt(files[0], dtype='i', delimiter='\t')
    data_matrix = data_matrix.T

    last_col = [0] * len(data_matrix)
    data_matrix = np.column_stack((data_matrix, last_col))

    for x in range(len(files) - 1):
        temp_matrix = np.loadtxt(files[x + 1], dtype='i', delimiter='\t')
        temp_matrix = temp_matrix.T
        last_col = [x+1] * len(temp_matrix)
        temp_matrix = np.column_stack((temp_matrix, last_col))
        data_matrix = np.concatenate((data_matrix, temp_matrix), axis=0)

    X = data_matrix[:, :-1]
    Y = data_matrix[:, -1]
    return X, Y

In [3]:
files = glob.glob('dataset/*.txt')
X, Y = get_data(files)

get_k_best(X,Y,10)

array([[  5,  20, 135, ...,   1,   1,   0],
       [  2,  18, 127, ...,   1,   1,   1],
       [  1,  12, 164, ...,   0,   1,   0],
       ...,
       [  3,  15, 191, ...,   0,   0,   1],
       [  6,  19, 208, ...,   0,   1,   0],
       [  1,   1, 191, ...,   1,   1,   0]])

In [4]:
fvalue_selector = SelectKBest(chi2, k=10)
fvalue_selector.fit(X, Y)

rank = fvalue_selector.scores_
top_rank = []
indexes = rank.argsort()[-10:][::-1]

for index in indexes:
    top_rank.append(rank[index])

In [16]:
layer_sizes = [100,200,350]
momentum_values = [0, 0.9]
max_patience = 100
filename = "resultaty.csv"
rkf = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=3)

In [17]:
results = pd.DataFrame(columns=["fold","layer_size","momentum_value","feature_number","score"])
best_score = 0
best_matrix = []    
best_params = {}
for fold, (train, test) in enumerate(rkf.split(X, Y)):
    for layer_size in layer_sizes:
        for momentum_value in momentum_values:
            patience = max_patience   
            temp_best_score = 0
            temp_best_matrix = []
            temp_best_params = {}
            for feature_number in range(1,np.shape(X)[1]+1):
                
                score, matrix = train_evaluate(X=get_k_best(X,Y,feature_number),
                                               Y=Y,
                                               momentum_value=momentum_value,
                                               layer_size=layer_size,
                                               train=train,
                                               test=test)
                params = {"fold":fold,
                          "layer_size":layer_size,
                          "momentum_value":momentum_value,
                          "feature_number":feature_number,
                          "score":score}
                results = results.append(params, ignore_index=True)
                print(f"{layer_size}\t{momentum_value}\t{feature_number}\t{score}")   
                if score > temp_best_score:
                    temp_best_matrix = matrix
                    temp_best_score = score
                    patience = max_patience
                    temp_best_params = params
                else:
                    patience -= 1
                    if patience == 0:
                        break
            
             
            if temp_best_score > best_score:
                best_matrix = temp_best_matrix
                best_params = temp_best_params
                
with open("best_params.json",'x') as f:
    json.dump(best_params,f)
np.savetxt("best_matrix.txt",best_matrix)

100	0	1	0.2926829268292683
100	0	2	0.2926829268292683
100	0	3	0.36807095343680707
100	0	4	0.21951219512195122
100	0	5	0.21729490022172948
100	0	6	0.4878048780487805
100	0	7	0.376940133037694
100	0	8	0.4279379157427938
100	0	9	0.31263858093126384
100	0	10	0.188470066518847
100	0	11	0.2328159645232816
100	0	12	0.36363636363636365
100	0	13	0.37250554323725055
100	0	14	0.29490022172949004
100	0	15	0.37028824833702884
100	0	16	0.28159645232815966
100	0	17	0.3902439024390244
100	0	18	0.458980044345898
100	0	19	0.3902439024390244
100	0	20	0.35033259423503327
100	0	21	0.3082039911308204
100	0	22	0.3902439024390244
100	0	23	0.34146341463414637
100	0	24	0.3303769401330377
100	0	25	0.35476718403547675
100	0	26	0.31042128603104213
100	0	27	0.37472283813747226
100	0	28	0.43015521064301554
100	0	29	0.37472283813747226
100	0	30	0.3370288248337029
100	0	31	0.4079822616407982
100	0	32	0.48337028824833705
100	0	33	0.43015521064301554
100	0	34	0.22394678492239467
100	0	35	0.37915742793791574
100	0	36	0.3



PermissionError: [Errno 13] Permission denied: 'resultaty.csv'

In [18]:
results.to_csv(filename)

In [10]:
resultaty = pd.read_csv(filename)