In [1]:
import pandas as pd
import numpy as np
import glob
from sklearn.feature_selection import SelectKBest, chi2
from trainer import train_evaluate
from feature_selection import get_k_best
import json

In [2]:
def get_data(files):
    data_matrix = np.loadtxt(files[0], dtype='i', delimiter='\t')
    data_matrix = data_matrix.T

    last_col = [0] * len(data_matrix)
    data_matrix = np.column_stack((data_matrix, last_col))

    for x in range(len(files) - 1):
        temp_matrix = np.loadtxt(files[x + 1], dtype='i', delimiter='\t')
        temp_matrix = temp_matrix.T
        last_col = [x+1] * len(temp_matrix)
        temp_matrix = np.column_stack((temp_matrix, last_col))
        data_matrix = np.concatenate((data_matrix, temp_matrix), axis=0)

    X = data_matrix[:, :-1]
    Y = data_matrix[:, -1]
    return X, Y

In [3]:
files = glob.glob('dataset/*.txt')
X, Y = get_data(files)

get_k_best(X,Y,10)

array([[  5,  20, 135, ...,   1,   1,   0],
       [  2,  18, 127, ...,   1,   1,   1],
       [  1,  12, 164, ...,   0,   1,   0],
       ...,
       [  3,  15, 191, ...,   0,   0,   1],
       [  6,  19, 208, ...,   0,   1,   0],
       [  1,   1, 191, ...,   1,   1,   0]])

In [4]:
fvalue_selector = SelectKBest(chi2, k=10)
fvalue_selector.fit(X, Y)

rank = fvalue_selector.scores_
top_rank = []
indexes = rank.argsort()[-10:][::-1]

for index in indexes:
    top_rank.append(rank[index])

In [5]:
layer_sizes = [100,200,350]
momentum_values = [0, 0.9]
max_patience = 10
filename = "resultaty.csv"

In [None]:
results = pd.DataFrame(columns=["layer_size","momentum_value","feature_number","score"])
best_score = 0
best_matrix = []
best_params = {}
for layer_size in layer_sizes:
    for momentum_value in momentum_values:
        patience = max_patience   
        temp_best_score = 0
        temp_best_matrix = []
        temp_best_params = {}
        for feature_number in range(1,np.shape(X)[1]+1):
            
            score, matrix = train_evaluate(X=get_k_best(X,Y,feature_number),
                                           Y=Y,
                                           momentum_value=momentum_value,
                                           layer_size=layer_size)
            params = {"layer_size":layer_size,
                      "momentum_value":momentum_value,
                      "feature_number":feature_number,
                      "score":score}
            results = results.append(params, ignore_index=True)
            print(f"{layer_size}\t{momentum_value}\t{feature_number}\t{score}")   
            if score > temp_best_score:
                temp_best_matrix = matrix
                temp_best_score = score
                patience = max_patience
                temp_best_params = params
            else:
                patience -= 1
                if patience == 0:
                    break
        
         
        if temp_best_score > best_score:
            best_matrix = temp_best_matrix
            best_params = temp_best_params
            
with open("best_params.json") as f:
    json.dump(best_params,f)
np.savetxt("best_matrix.txt")
results.to_csv(filename)

100	0	1	0.2521586597684159
100	0	2	0.328989898989899
100	0	3	0.3298881497905888
100	0	4	0.32852475979305246
100	0	5	0.32361320522296133
100	0	6	0.32582113821138214
100	0	7	0.2923557526484356
100	0	8	0.32871741808327176
100	0	9	0.31608474993840846
100	0	10	0.39943532889874356
100	0	11	0.2931919191919191
100	0	12	0.29544961813254494
100	0	13	0.3378354274451836
100	0	14	0.30437595466863765
100	0	15	0.3371515151515151
100	0	16	0.36718403547671846
100	0	17	0.350709534368071
100	0	18	0.3877560975609756
100	0	19	0.3848780487804878
100	0	20	0.3740044345898005
100	0.9	1	0.29189701897018966
100	0.9	2	0.4157580684897758
100	0.9	3	0.47232914510963286
100	0.9	4	0.5065479182064548
100	0.9	5	0.4814372998275437
100	0.9	6	0.4495476718403547
100	0.9	7	0.48591327913279125
100	0.9	8	0.5030238975117024
100	0.9	9	0.4601685144124169
100	0.9	10	0.4798684405025869
100	0.9	11	0.5038625277161863
100	0.9	12	0.4817038679477704
100	0.9	13	0.42618378911061844
100	0.9	14	0.4317368810051737
200	0	1	0.2493013057403301
