#### 1. Importing all the libraries

In [1]:
import pandas as pd
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import seaborn as sns
import sys
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn import tree
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

#### 2. Define the Functions

In [2]:
# Function that create the episode data - sample randomaly
def get_data(episode_size,policy,mode):
    global dataset
    if mode=='train':
        if policy==0:
             dataset=data.sample(n=episode_size)
        else:
            dataset=data
    else:
        dataset = pd.read_csv(location + '/' + file +'_test_int.csv', index_col=0)
    return dataset

In [3]:
# Function that separate the episode data into features and label
def data_separate (dataset):
    global X
    global y    
    X = dataset.iloc[:,0:dataset.shape[1]-1]  # all rows, all the features and no labels
    y = dataset.iloc[:, -1]  # all rows, label only
    return X,y

In [4]:
# Function that split the episode data into train and test
def data_split(X,y):
    global X_train_main
    global X_test_main   
    global y_train
    global y_test  
    from sklearn.model_selection import train_test_split
    X_train_main, X_test_main, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=4)
    return X_train_main, X_test_main, y_train, y_test

In [5]:
# Function that chooses exploration or explotation method
def exploration_explotation(epsilon):
    global exploration 
    if np.random.rand() < epsilon:  
        exploration=1
    else:
        exploration=0    
    return exploration

In [6]:
# Function that returns all available actions in the state given as an argument: 
def available_actions(number_of_columns,columns,initial_state,current_state,trashold, exploration):
    global exclude
    global all_columns
#    exclude=[]
    all_columns=np.arange(number_of_columns+1)
    # remove columns that have been already selected
    exclude=columns.copy()
    # remove the initial_state and the current_state
    exclude.extend([initial_state, current_state])
    available_act = list(set(all_columns)-set(exclude))
    # remove actions that have negetiv Q value
    if exploration==0:
        index = np.where(Q[current_state,available_act] > trashold)[1]
        available_act= [available_act[i] for i in index.tolist()]
    return available_act

In [7]:
# Function that chooses which action to be performed according to exploration - explotation method
def sample_next_action(current_state, Q, available_act, exploration):
    global available_act_q_value
    available_act_q_value=np.array(Q[current_state,available_act]).reshape(-1,).tolist()
    if exploration==1: 
        #random selection
        next_action = int(np.random.choice(available_act,1))
    else: 
        #greedy selection according to max value 
        maxQ=max(available_act_q_value)
        count = available_act_q_value.count(maxQ)
        if count > 1:
            max_columns =[i for i in range(len(available_act_q_value)) if available_act_q_value[i] == maxQ]
            i = np.random.choice(max_columns)
        else:
            i = available_act_q_value.index(maxQ)
        next_action=available_act[i]        
    return next_action

In [8]:
# function that update a list with all selected columns in the episode
def update_columns(action, columns):
    update_columns=columns
    update_columns.append(action)
    return update_columns

In [9]:
# function that update the X_train and X_test according to the current episode columns list 
def update_X_train_X_test(columns,X_train_main, X_test_main):
    X_train=X_train_main.iloc[:,columns]
    X_test=X_test_main.iloc[:,columns]
    X_train=pd.DataFrame(X_train)
    X_test=pd.DataFrame(X_test)
    return X_train, X_test

In [10]:
# Function that run the learner and get the error to the current episode columns list
def Learner(X_train, X_test,y_train, y_test):
    global learner
    global y_pred
    if learner_model == 'DT':
        learner = tree.DecisionTreeClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'KNN':
        learner = KNeighborsClassifier(metric='hamming',n_neighbors=5)
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)        
    elif learner_model == 'SVM':
        learner = SVC()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)        
    elif learner_model == 'NB':
        learner = MultinomialNB()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)
    elif learner_model == 'AB':
        learner = AdaBoostClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)  
    elif learner_model == 'GB':
        learner = GradientBoostingClassifier()
        learner = learner.fit(X_train, y_train)
        y_pred = learner.predict(X_test)  
    accuracy=metrics.accuracy_score(y_test, y_pred)
    error=1-accuracy
    return error

In [11]:
# Function that updates the Q matrix according to the path selected and the Q 
def q_update(current_state ,action,learning_rate, reward):
    # next_state = current action
    max_index = np.where(Q[action,] == np.max(Q[action,]))[1]    
    if max_index.shape[0] > 1:
        #np.random.seed(seed)
        max_index = int(np.random.choice(max_index, size = 1))
    else:
        max_index = int(max_index)
    max_value = Q[action, max_index]
    #we start with 1 for all Q values and update with reward only at the 1st time
    if Q[current_state, action]==1:
        Q[current_state, action] = learning_rate*reward
    else:
         Q[current_state, action] = Q[current_state, action]+ learning_rate*(reward + (discount_factor * max_value)-Q[current_state, action])
    #Q[current_state, action] = Q[current_state, action]+ learning_rate*(reward + (discount_factor * max_value)-Q[current_state, action])

### Experiment mangment

#### 3. Define the parameters 

In [12]:
#Experiment: 
experiment='new_No_show_DT_2'
number_of_experiment=5
# Dataset parameters #
location = 'C:/Users/mmoran1/Documents/TAU/Thesis/Datasets'
outputlocation='C:/Users/mmoran1/Documents/TAU/Thesis'
file='No_show' #adult #diabetic_data #Census_Income_KDD
#np.random.seed(3)

# Q learning parameter # 
learning_rate=0.005
discount_factor=0.01
epsilon = 0.1

# Learner and episode parameters #
learner_model = 'DT' #DT #KNN #SVM
episode_size=100
internal_trashold=0
external_trashold=0
filename= file +'_int.csv'

#Experiments folder management: 
if not os.path.exists('/Experiments'):
    os.makedirs('/Experiments') 
if not os.path.exists('Experiments/'+ str(experiment)):
    os.makedirs('Experiments/'+ str(experiment))
else:
    shutil.rmtree('Experiments/'+ str(experiment))          #removes all the subdirectories!
    os.makedirs('Experiments/'+ str(experiment))
writer = pd.ExcelWriter('Experiments/'+ str(experiment) + '/df.xlsx') 



text_file = open('Experiments/'+ str(experiment) +'/parameters.txt', "w")
text_file.write('experiment: ' + str(experiment)+ '\n')
text_file.write('number of experiments: ' + str(number_of_experiment)+ '\n')
text_file.write('file: ' + str(file)+ '\n')
text_file.write('learner model: ' + str(learner_model)+ '\n')
text_file.write('episode size: ' + str(episode_size)+ '\n')
#text_file.write('numbers of epocs: ' + str(epocs)+ '\n')
text_file.write('internal trashold: ' + str(internal_trashold)+ '\n')
text_file.write('external trashold: ' + str(external_trashold)+ '\n')
 
text_file.close()

#### 4. Run all experiments

In [None]:
for e in range (number_of_experiment):
    if not os.path.exists('Experiments/'+ str(experiment)+ '/'+ str(e)):
        os.makedirs('Experiments/'+ str(experiment)+ '/'+ str(e))
    else:
        shutil.rmtree('Experiments/'+ str(experiment)+ '/'+ str(e))          #removes all the subdirectories!
        os.makedirs('Experiments/'+ str(experiment)+ '/'+ str(e))
    print ('Experiments ' + str(e) + ' start')
##########################Experiment setup##########################
    # Read the data
    data = pd.read_csv(location + '/' + filename, index_col=0)
    #Set the number of iterations:
    interations=10*len(data.index)/episode_size
#    interations=2400
    # Set the number of columns exclude the class column
    number_of_columns=data.shape[1]-1 
    print ("number of columns: "+ str(number_of_columns) +" (exclude class column)" ) 
    # Set the number of episodes 
    # episodes_number=epocs*len(data.index)/episode_size
    episodes_number=interations
    print ("Number of episodes: "+ str(episodes_number) ) 
    # Initialize matrix Q as a 1 values matrix:
    #Q = np.matrix(np.ones([number_of_columns+1,number_of_columns+1])) # we will use the last dummy columns as initial state s
    Q = np.matrix(np.ones([number_of_columns+1,number_of_columns+1])) # we will use the last dummy columns as initial state s
    # Set initial_state to be the last dummy column we have created
    initial_state=number_of_columns
    # define data frame to save episode policies results
    df = pd.DataFrame(columns=('episode','episode_columns','policy_columns','policy_accuracy_train','policy_accuracy_test'))
    print ("initial state number: "+ str(initial_state) + " (the last dummy column we have created)") 
    ##########################  episode  ##########################  
    for i in range (int(episodes_number)):
    ########## Begining of episode  ############
        # Initiate lists for available_act, episode_columns and and the policy mode & episode_error
        episode_available_act=list(np.arange(number_of_columns))
        episode_columns=[]
        policy=0
        episode_error=0
        # Initiate the error to 0.5
        episode_last_error=0.5
        # Initiate current_state to be initial_state
        episode_current_state=initial_state
        # Create the episode data 
        episode= get_data(episode_size, policy=0, mode='train')
        # Separate the episode data into features and label
        X_episode,y_episode=data_separate(episode)
        # Split the data into train and test 
        X_train_main_episode, X_test_main_episode, y_train_episode, y_test_episode = data_split(X_episode,y_episode)
        if i<episodes_number*0.25:
            epsilon=0.9
            learning_rate=0.09
        elif i<episodes_number*0.5:
            epsilon=0.5
            learning_rate=0.05
        elif i<episodes_number*0.75:
            epsilon=0.3
            learning_rate=0.01
        else:
            epsilon=0.1
            learning_rate=0.005
        ########## Q learning start ############

        while len(episode_available_act)>0:
            # Get exploration or explotation flag 
            exploration=exploration_explotation(epsilon)
            # Get available actions in the current state
            episode_available_act = available_actions(number_of_columns,episode_columns,initial_state,episode_current_state,internal_trashold,exploration)
            if len(episode_available_act)>0:
                # Sample next action to be performed
                episode_action = sample_next_action(episode_current_state, Q, episode_available_act, exploration)
                # Update the episode_columns
                episode_columns=update_columns(episode_action,episode_columns)
                # Update the dataset to include all episode columns + current selected action (column)
                X_train_episode, X_test_episode =update_X_train_X_test(episode_columns,X_train_main_episode, X_test_main_episode)
                # Update the accuracy of the current columns
                episode_error= Learner(X_train_episode, X_test_episode, y_train_episode, y_test_episode)
                # Update reward
                episode_reward=episode_last_error-episode_error
                # Update Q matrix
                q_update(episode_current_state,episode_action,learning_rate, episode_reward)
                # Update parameters for next round 
#                 if episode_current_state==initial_state:
#                     beta=abs(episode_reward-Q[episode_current_state,episode_action])
#                     epsilon=final_epsilon+(beta*(1-final_epsilon))
                #    learning_rate=final_learning_rate+(beta*(1-final_learning_rate))
                episode_current_state=episode_action
                episode_last_error=episode_error
                 
        ########## Q learning End ############

        #Save Q matrix: 
        if (i%100 ==0):
            Q_save=pd.DataFrame(Q)
            Q_save.to_csv('Experiments/'+ str(experiment)+ '/'+ str(e)+ '/Q.'+ str(i+1) + '.csv') 

        # Calculate policy 
        policy_available_actions=list(np.arange(number_of_columns))
        policy_columns=[]
        policy_current_state=initial_state
        while len(policy_available_actions)>0:
            # Get available actions in the current state
            policy_available_actions = available_actions(number_of_columns,policy_columns,initial_state,policy_current_state, external_trashold, exploration=0)
            # # Sample next action to be performed
            if len(policy_available_actions)>0:
                policy_select_action = sample_next_action(policy_current_state, Q, policy_available_actions, exploration=0)
                # Update the episode_columns
                policy_columns=update_columns(policy_select_action,policy_columns)
                policy_current_state=policy_select_action
        # Calculate policy_accuracy    
        if len(policy_columns)>0:
            ##for training dataset##
            policy_data=get_data(episode_size,policy=1,mode='train')
            X_policy,y_policy=data_separate(policy_data)
            X_train_main_policy, X_test_main_policy, y_train_policy, y_test_policy = data_split(X,y)
            X_train_policy, X_test_policy =update_X_train_X_test(policy_columns, X_train_main_policy, X_test_main_policy)
            policy_error=Learner(X_train_policy, X_test_policy,y_train_policy, y_test_policy)
            policy_accuracy_train=1-policy_error
            ##for testing dataset##
            policy_data=get_data(episode_size,policy=1,mode='test')
            X_policy,y_policy=data_separate(policy_data)
            X_train_main_policy, X_test_main_policy, y_train_policy, y_test_policy = data_split(X,y)
            X_train_policy, X_test_policy =update_X_train_X_test(policy_columns, X_train_main_policy, X_test_main_policy)
            policy_error=Learner(X_train_policy, X_test_policy,y_train_policy, y_test_policy)
            policy_accuracy_test=1-policy_error 
        else:
            policy_accuracy_train=0 
            policy_accuracy_test=0
        df=df.append({'episode':str(i+1), 'episode_columns':str(episode_columns),'policy_columns':str(policy_columns),'policy_accuracy_train':policy_accuracy_train,'policy_accuracy_test':policy_accuracy_test}, ignore_index=True)
        #Prints
        print ("episode "+ str(i+1) +" start") 
        print ("episode columns: "+ str(episode_columns) + " epsilon: " + str(epsilon) + " learning rate: " + str(learning_rate) + " error: " +str(episode_error))
        print ("episode policy:" + str(policy_columns) + " train accuracy: " + str(policy_accuracy_train)  + " test accuracy: " +str(policy_accuracy_test)) 
        print ("episode "+ str(i+1) +" end") 
    ########## End of episode  ############
    df.to_excel(writer, 'Experiment' + str(e))
    df_plot=df[['episode','policy_accuracy_train','policy_accuracy_test']]
    plot=df_plot.plot()
    fig = plot.get_figure()
    fig.savefig('Experiments/'+ str(experiment) + '/plot_experiment_' + str(e) +'.png')
writer.save()

Experiments 0 start
number of columns: 12 (exclude class column)
Number of episodes: 27000.0
initial state number: 12 (the last dummy column we have created)
episode 1 start
episode columns: [2, 5, 3, 4, 11, 6, 9, 8, 0, 7, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[11, 10, 0, 4, 3, 5, 7, 6, 1, 8, 2, 9] train accuracy: 0.62012962963 test accuracy: 0.584166666667
episode 1 end
episode 2 start
episode columns: [4, 8, 6, 7, 2, 0, 9, 3, 5, 10, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[0, 3, 1, 6, 8, 10, 5, 2, 9, 7, 4] train accuracy: 0.677314814815 test accuracy: 0.6165
episode 2 end
episode 3 start
episode columns: [9, 6, 0, 5, 4, 7, 3, 8, 10, 1, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 6, 2, 4, 1, 11, 0, 3, 10, 9, 7, 8] train accuracy: 0.619740740741 test accuracy: 0.587333333333
episode 3 end
episode 4 start
episode columns: [7, 4, 8, 10, 6, 2, 5, 3, 0, 1, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
ep

episode 35 start
episode columns: [6, 4, 0, 10, 5, 2, 8, 9, 3, 11, 1, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 10, 3, 7, 1, 0, 2, 5, 8, 9, 11, 4] train accuracy: 0.619666666667 test accuracy: 0.584333333333
episode 35 end
episode 36 start
episode columns: [4, 11, 10, 7, 3, 1, 6, 0, 8, 5, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 10, 3, 7, 1, 0, 2, 5, 8, 9, 11, 4] train accuracy: 0.619333333333 test accuracy: 0.583
episode 36 end
episode 37 start
episode columns: [6, 9, 7, 3, 0, 4, 10, 1, 5, 2, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 10, 1, 0, 2, 5, 8, 9, 11, 4, 7] train accuracy: 0.621814814815 test accuracy: 0.5835
episode 37 end
episode 38 start
episode columns: [6, 10, 1, 11, 8, 2, 9, 4, 5, 7, 3, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 10, 3, 7, 1, 0, 2, 5, 8, 9, 11, 4] train accuracy: 0.620296296296 test accuracy: 0.5855
episode 38 end
episode 39 start
episode columns: [11,

episode 70 start
episode columns: [3, 11, 0, 1, 5, 2, 8, 9, 7, 10, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[2, 10, 7, 8, 0, 1, 5, 4, 9, 11, 3, 6] train accuracy: 0.619722222222 test accuracy: 0.585166666667
episode 70 end
episode 71 start
episode columns: [8, 2, 10, 6, 3, 5, 4, 9, 1, 0, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[2, 10, 7, 8, 0, 1, 5, 4, 9, 11, 3, 6] train accuracy: 0.6195 test accuracy: 0.585666666667
episode 71 end
episode 72 start
episode columns: [9, 2, 10, 1, 7, 6, 5, 11, 3, 4, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[2, 10, 7, 8, 0, 1, 5, 4, 9, 11, 3, 6] train accuracy: 0.619740740741 test accuracy: 0.585833333333
episode 72 end
episode 73 start
episode columns: [11, 10, 1, 2, 5, 4, 6, 8, 3, 9, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[2, 10, 7, 8, 0, 1, 5, 4, 9, 11, 3, 6] train accuracy: 0.620203703704 test accuracy: 0.586166666667
episode 73 end
episode 74 start
ep

episode 105 start
episode columns: [1, 8, 3, 6, 4, 9, 7, 2, 11, 5, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[2, 9, 0, 10, 7, 6, 4, 8, 1, 5] train accuracy: 0.681259259259 test accuracy: 0.621166666667
episode 105 end
episode 106 start
episode columns: [0, 8, 11, 9, 6, 5, 1, 4, 7, 3, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[2, 9, 0, 10, 7, 6, 4, 8, 1, 5] train accuracy: 0.681333333333 test accuracy: 0.621333333333
episode 106 end
episode 107 start
episode columns: [6, 4, 10, 9, 0, 3, 5, 8, 7, 1, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[2, 11, 10, 7, 6, 4, 0, 3, 1, 5] train accuracy: 0.623666666667 test accuracy: 0.5875
episode 107 end
episode 108 start
episode columns: [5, 7, 9, 3, 1, 4, 8, 11, 10, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[2, 11, 10, 7, 6, 4, 0, 3, 9, 8, 1, 5] train accuracy: 0.619740740741 test accuracy: 0.585
episode 108 end
episode 109 start
episode columns: [9, 6, 5

episode 139 start
episode columns: [3, 2, 4, 5, 11, 10, 7, 0, 9, 8, 6, 1] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[6, 4, 0, 2, 3, 7, 5, 1, 8, 9, 10] train accuracy: 0.677222222222 test accuracy: 0.617
episode 139 end
episode 140 start
episode columns: [5, 9, 8, 7, 1, 3, 11, 10, 2, 6, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[6, 4, 0, 2, 3, 7, 5, 1, 8, 9, 10] train accuracy: 0.677314814815 test accuracy: 0.616666666667
episode 140 end
episode 141 start
episode columns: [6, 4, 10, 0, 11, 5, 7, 2, 8, 1, 9, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[2, 6, 4, 0, 3, 7, 5, 1, 8, 9, 10] train accuracy: 0.677388888889 test accuracy: 0.616
episode 141 end
episode 142 start
episode columns: [2, 4, 8, 1, 10, 11, 3, 9, 7, 6, 0, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 6, 4, 0, 3, 7, 5, 1, 8, 9, 10] train accuracy: 0.677166666667 test accuracy: 0.617333333333
episode 142 end
episode 143 start
episode columns: 

episode 173 start
episode columns: [11, 8, 2, 9, 7, 6, 0, 1, 5, 4, 10, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 9, 10, 7, 5, 6, 0, 3, 4, 11, 8, 1] train accuracy: 0.619462962963 test accuracy: 0.587333333333
episode 173 end
episode 174 start
episode columns: [6, 1, 9, 7, 11, 0, 10, 5, 8, 3, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[2, 9, 10, 5, 6, 0, 3, 7, 4, 11, 8, 1] train accuracy: 0.619814814815 test accuracy: 0.590166666667
episode 174 end
episode 175 start
episode columns: [4, 5, 2, 3, 8, 11, 10, 1, 7, 6, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 7, 2, 9, 10, 5, 6, 0, 3] train accuracy: 0.686407407407 test accuracy: 0.656166666667
episode 175 end
episode 176 start
episode columns: [2, 9, 6, 8, 1, 10, 5, 0, 11, 3, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 7, 2, 9, 10, 5, 0, 3, 6, 1] train accuracy: 0.681592592593 test accuracy: 0.630833333333
episode 176 end
episode 177 start


episode 207 start
episode columns: [4, 10, 6, 5, 8, 11, 7, 0, 2, 3, 1, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 9, 7, 4, 0, 8, 2, 3, 6, 1, 5, 11] train accuracy: 0.619240740741 test accuracy: 0.583666666667
episode 207 end
episode 208 start
episode columns: [6, 5, 0, 9, 2, 11, 8, 3, 1, 4, 10, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 9, 7, 4, 0, 8, 2, 3, 6, 1, 5, 11] train accuracy: 0.620074074074 test accuracy: 0.586166666667
episode 208 end
episode 209 start
episode columns: [6, 9, 3, 5, 2, 0, 11, 1, 7, 10, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[10, 9, 7, 4, 0, 8, 2, 3, 6, 1, 5, 11] train accuracy: 0.619592592593 test accuracy: 0.586
episode 209 end
episode 210 start
episode columns: [10, 11, 3, 8, 2, 4, 7, 6, 0, 5, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[10, 9, 7, 4, 0, 8, 2, 3, 6, 1, 5, 11] train accuracy: 0.619722222222 test accuracy: 0.582333333333
episode 210 end
episode 21

episode 241 start
episode columns: [5, 10, 6, 1, 4, 3, 9, 8, 7, 0, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 7, 4, 2, 3, 5, 0, 8, 6, 1] train accuracy: 0.677296296296 test accuracy: 0.616666666667
episode 241 end
episode 242 start
episode columns: [8, 7, 0, 11, 10, 1, 5, 9, 3, 4, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 9, 7, 4, 2, 3, 5, 0, 11, 6, 1] train accuracy: 0.623574074074 test accuracy: 0.5865
episode 242 end
episode 243 start
episode columns: [1, 4, 5, 3, 8, 6, 9, 2, 11, 7, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 9, 7, 4, 2, 3, 5, 0, 11, 6, 1] train accuracy: 0.623703703704 test accuracy: 0.585333333333
episode 243 end
episode 244 start
episode columns: [6, 11, 1, 4, 9, 3, 5, 7, 0, 8, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 9, 7, 4, 2, 3, 5, 0, 11, 6, 1] train accuracy: 0.623666666667 test accuracy: 0.583833333333
episode 244 end
episode 245 start
episode 

episode 275 start
episode columns: [0, 2, 8, 10, 5, 3, 1, 11, 4, 6, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 9, 3, 5, 0, 11, 4, 2, 8, 7, 6] train accuracy: 0.634148148148 test accuracy: 0.596
episode 275 end
episode 276 start
episode columns: [8, 6, 4, 3, 10, 9, 11, 0, 7, 1, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 9, 3, 5, 0, 11, 4, 2, 8, 7, 6] train accuracy: 0.634333333333 test accuracy: 0.595333333333
episode 276 end
episode 277 start
episode columns: [3, 5, 9, 8, 0, 11, 10, 1, 2, 6, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 3, 5, 0, 11, 4, 2, 8, 7, 6] train accuracy: 0.633666666667 test accuracy: 0.594166666667
episode 277 end
episode 278 start
episode columns: [11, 0, 5, 1, 9, 4, 2, 7, 3, 10, 6, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 3, 5, 0, 11, 4, 2, 8, 7, 6] train accuracy: 0.63387037037 test accuracy: 0.594666666667
episode 278 end
episode 279 start
episode

episode 309 start
episode columns: [8, 0, 2, 6, 3, 1, 10, 5, 4, 9, 11, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 9, 3, 6, 5, 0, 11, 4, 8, 7] train accuracy: 0.659388888889 test accuracy: 0.610666666667
episode 309 end
episode 310 start
episode columns: [6, 5, 0, 10, 9, 7, 8, 4, 3, 1, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 9, 3, 6, 5, 0, 11, 4, 8, 7] train accuracy: 0.659277777778 test accuracy: 0.610166666667
episode 310 end
episode 311 start
episode columns: [10, 2, 6, 7, 9, 5, 0, 8, 4, 3, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 3, 6, 7, 4, 8, 5, 0, 11, 1] train accuracy: 0.647611111111 test accuracy: 0.5975
episode 311 end
episode 312 start
episode columns: [0, 2, 8, 4, 7, 11, 9, 6, 3, 5, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 3, 6, 7, 4, 8, 5, 0, 11, 1] train accuracy: 0.647703703704 test accuracy: 0.596666666667
episode 312 end
episode 313 start
episode co

episode 343 start
episode columns: [0, 1, 6, 5, 7, 2, 3, 9, 10, 8, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 8, 7, 10, 6, 3, 9, 4] train accuracy: 0.69612962963 test accuracy: 0.687833333333
episode 343 end
episode 344 start
episode columns: [7, 10, 6, 4, 9, 5, 8, 2, 1, 11, 3, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 8, 7, 10, 6, 3, 9, 4] train accuracy: 0.69612962963 test accuracy: 0.687666666667
episode 344 end
episode 345 start
episode columns: [8, 0, 6, 1, 5, 7, 2, 3, 10, 9, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 3, 9, 11, 7, 10, 6, 5, 8, 4] train accuracy: 0.687666666667 test accuracy: 0.6645
episode 345 end
episode 346 start
episode columns: [2, 3, 9, 6, 1, 7, 11, 8, 5, 4, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[2, 3, 9, 11, 7, 10, 6, 5, 8, 4] train accuracy: 0.687722222222 test accuracy: 0.663666666667
episode 346 end
episode 347 start
episode columns: [1, 5, 7, 10, 

episode 378 start
episode columns: [0, 7, 8, 10, 1, 9, 4, 11, 2, 5, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 8, 4, 11, 0, 3, 7, 10, 6, 9, 1] train accuracy: 0.647462962963 test accuracy: 0.598
episode 378 end
episode 379 start
episode columns: [2, 8, 7, 11, 0, 4, 10, 6, 3, 9, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 8, 4, 11, 0, 3, 9, 1, 7, 10, 6] train accuracy: 0.647814814815 test accuracy: 0.598666666667
episode 379 end
episode 380 start
episode columns: [6, 7, 2, 1, 0, 8, 4, 9, 11, 5, 3, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 8, 3, 9, 1, 7, 10, 6, 4, 11, 0, 2] train accuracy: 0.62037037037 test accuracy: 0.585666666667
episode 380 end
episode 381 start
episode columns: [2, 0, 8, 5, 11, 3, 1, 4, 10, 9, 6, 7] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 8, 3, 9, 1, 7, 10, 6, 4, 11, 0, 2] train accuracy: 0.619814814815 test accuracy: 0.587166666667
episode 381 end
episode 382 start
e

episode 412 start
episode columns: [0, 8, 4, 6, 9, 11, 1, 5, 3, 2, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 3, 10, 6, 7, 4, 11, 0, 8, 9, 1] train accuracy: 0.6475 test accuracy: 0.597666666667
episode 412 end
episode 413 start
episode columns: [8, 11, 0, 3, 5, 9, 10, 2, 7, 4, 6, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 3, 10, 6, 7, 4, 11, 0, 8, 9, 1] train accuracy: 0.647592592593 test accuracy: 0.596
episode 413 end
episode 414 start
episode columns: [6, 3, 9, 1, 4, 7, 11, 2, 8, 10, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[5, 3, 10, 6, 7, 4, 11, 0, 8, 9, 1] train accuracy: 0.64762962963 test accuracy: 0.5965
episode 414 end
episode 415 start
episode columns: [5, 0, 10, 2, 11, 7, 4, 1, 6, 3, 9, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 3, 10, 6, 7, 4, 11, 0, 8, 9, 1] train accuracy: 0.647777777778 test accuracy: 0.596833333333
episode 415 end
episode 416 start
episode columns: [5, 8,

episode 446 start
episode columns: [5, 4, 0, 8, 6, 10, 11, 3, 2, 9, 1, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 5, 3, 10, 9, 4, 2, 8, 7] train accuracy: 0.695888888889 test accuracy: 0.683
episode 446 end
episode 447 start
episode columns: [10, 7, 3, 8, 11, 0, 2, 5, 6, 1, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 3, 10, 9, 4, 2, 8, 7] train accuracy: 0.695888888889 test accuracy: 0.683166666667
episode 447 end
episode 448 start
episode columns: [7, 2, 8, 11, 10, 9, 3, 4, 0, 1, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 5, 3, 10, 9, 4, 2, 8, 7] train accuracy: 0.695888888889 test accuracy: 0.683
episode 448 end
episode 449 start
episode columns: [1, 5, 6, 3, 8, 11, 2, 0, 4, 10, 9, 7] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 5, 3, 10, 6, 9, 4, 2, 8, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 449 end
episode 450 start
episode columns: [9, 5, 10, 4, 2, 3, 7

episode 481 start
episode columns: [6, 7, 8, 10, 4, 1, 5, 9, 11, 0, 3, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 3, 10, 9, 7, 4, 8, 11, 0, 2, 6] train accuracy: 0.619148148148 test accuracy: 0.584166666667
episode 481 end
episode 482 start
episode columns: [5, 3, 6, 0, 2, 9, 4, 10, 8, 11, 1, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 3, 10, 9, 7, 4, 8, 11, 0, 6] train accuracy: 0.647444444444 test accuracy: 0.595666666667
episode 482 end
episode 483 start
episode columns: [8, 2, 0, 5, 3, 10, 9, 6, 7, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[1, 5, 3, 7, 10, 9, 8, 11, 0, 6] train accuracy: 0.64812962963 test accuracy: 0.596333333333
episode 483 end
episode 484 start
episode columns: [7, 3, 0, 11, 6, 1, 10, 9, 8, 5, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[1, 5, 3, 7, 10, 9, 8, 11, 0, 6] train accuracy: 0.648148148148 test accuracy: 0.598666666667
episode 484 end
episode 485 start
epis

episode 515 start
episode columns: [3, 0, 4, 5, 1, 7, 8, 10, 11, 9, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 7, 8, 10, 9, 3, 6, 1, 5, 11, 2] train accuracy: 0.684148148148 test accuracy: 0.6545
episode 515 end
episode 516 start
episode columns: [6, 3, 7, 10, 4, 1, 8, 11, 5, 9, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 7, 8, 10, 9, 3, 6, 1, 5, 11, 2] train accuracy: 0.684018518519 test accuracy: 0.654666666667
episode 516 end
episode 517 start
episode columns: [8, 4, 7, 2, 9, 3, 5, 6, 1, 11, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 7, 8, 10, 9, 3, 6, 1, 5, 11, 2] train accuracy: 0.684333333333 test accuracy: 0.6545
episode 517 end
episode 518 start
episode columns: [6, 1, 5, 11, 10, 2, 3, 7, 9, 8, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 7, 8, 10, 9, 3, 6, 1, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 518 end
episode 519 start
episode columns: [11,

episode 549 start
episode columns: [6, 10, 2, 9, 4, 0, 11, 8, 7, 1, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 8, 10, 9, 4, 7, 3, 6, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 549 end
episode 550 start
episode columns: [6, 9, 8, 11, 4, 2, 7, 0, 10, 3, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[1, 5, 8, 10, 9, 4, 7, 3, 6, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 550 end
episode 551 start
episode columns: [7, 5, 0, 8, 6, 4, 9, 2, 1, 10, 3, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 5, 8, 10, 9, 4, 7, 3, 6, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 551 end
episode 552 start
episode columns: [11, 7, 4, 0, 2, 6, 8, 3, 1, 5, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[1, 5, 8, 10, 9, 4, 7, 3, 6, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 552 end
episode 553 start
episode colu

episode 584 start
episode columns: [0, 2, 4, 8, 6, 11, 3, 10, 5, 9, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 7, 3, 8, 2, 9, 6, 10, 0, 5, 1] train accuracy: 0.677240740741 test accuracy: 0.618333333333
episode 584 end
episode 585 start
episode columns: [3, 7, 5, 2, 10, 9, 8, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 7, 3, 8, 2, 9, 6, 10, 0, 5, 1] train accuracy: 0.677148148148 test accuracy: 0.6165
episode 585 end
episode 586 start
episode columns: [7, 1, 0, 11, 5, 10, 6, 3, 8, 2, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 7, 3, 6, 2, 9, 8, 10, 0, 5, 1] train accuracy: 0.677388888889 test accuracy: 0.617166666667
episode 586 end
episode 587 start
episode columns: [8, 4, 3, 11, 1, 0, 5, 7, 10, 9, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 2, 4, 7, 3, 6, 9, 1, 5, 10, 0] train accuracy: 0.677314814815 test accuracy: 0.616666666667
episode 587 end
episode 588 start
episode columns: [

episode 619 start
episode columns: [5, 0, 9, 10, 4, 3, 7, 8, 11, 1, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 8, 6, 10, 4, 9, 3, 7] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 619 end
episode 620 start
episode columns: [0, 1, 9, 8, 7, 5, 10, 4, 2, 3, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 8, 6, 10, 4, 9, 3, 7] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 620 end
episode 621 start
episode columns: [5, 8, 1, 10, 9, 11, 3, 6, 0, 7, 4, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 6, 10, 4, 9, 8, 7, 3, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 621 end
episode 622 start
episode columns: [0, 11, 7, 3, 9, 8, 5, 6, 2, 1, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 6, 10, 4, 9, 3, 8, 7] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 622 end
episode 623 start
episode columns: [1, 6, 4, 9, 8

episode 654 start
episode columns: [0, 7, 3, 8, 1, 5, 2, 9, 11, 10, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[1, 5, 8, 3, 10, 9, 6, 4, 11, 2, 7] train accuracy: 0.684185185185 test accuracy: 0.655166666667
episode 654 end
episode 655 start
episode columns: [1, 3, 0, 5, 11, 8, 9, 2, 6, 4, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 3, 10, 9, 2, 4, 1, 5, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 655 end
episode 656 start
episode columns: [5, 11, 4, 10, 8, 7, 6, 3, 0, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 10, 9, 2, 4, 1, 5, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 656 end
episode 657 start
episode columns: [8, 5, 3, 2, 0, 7, 11, 10, 1, 4, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 8, 3, 10, 9, 2, 4, 11, 0, 7, 6] train accuracy: 0.620203703704 test accuracy: 0.585833333333
episode 657 end
episode 658 start
episode

episode 689 start
episode columns: [7, 2, 4, 10, 9, 5, 8, 6, 11, 0, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.686666666667 test accuracy: 0.654166666667
episode 689 end
episode 690 start
episode columns: [3, 11, 4, 9, 6, 7, 8, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.686574074074 test accuracy: 0.654
episode 690 end
episode 691 start
episode columns: [1, 2, 9, 10, 6, 11, 3, 5, 4, 8, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 10, 9, 7, 6, 4, 0, 11, 2] train accuracy: 0.641592592593 test accuracy: 0.597
episode 691 end
episode 692 start
episode columns: [4, 2, 0, 8, 7, 10, 1, 11, 6, 3, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 3, 10, 9, 7, 6, 4, 0, 11, 2] train accuracy: 0.641555555556 test accuracy: 0.5965
episode 692 end
episode 693 start
episode columns: [8, 3, 0, 9, 11, 4, 1, 6, 7, 2, 5,

episode 724 start
episode columns: [0, 3, 8, 4, 6, 10, 11, 5, 2, 1, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.686611111111 test accuracy: 0.654166666667
episode 724 end
episode 725 start
episode columns: [1, 0, 10, 6, 5, 11, 3, 7, 2, 9, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.686648148148 test accuracy: 0.653833333333
episode 725 end
episode 726 start
episode columns: [0, 7, 11, 9, 10, 6, 4, 2, 3, 1, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.68662962963 test accuracy: 0.653666666667
episode 726 end
episode 727 start
episode columns: [1, 8, 9, 6, 3, 10, 4, 11, 7, 2, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 10, 6, 4, 0, 7, 9, 2] train accuracy: 0.686666666667 test accuracy: 0.653833333333
episode 727 end
episode 728 start
episode columns: [3, 0,

episode 759 start
episode columns: [3, 2, 6, 0, 7, 10, 9, 11, 4, 5, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 10, 6, 4, 9, 2, 7, 0, 1] train accuracy: 0.683018518519 test accuracy: 0.6245
episode 759 end
episode 760 start
episode columns: [8, 6, 4, 0, 7, 2, 9, 11, 10, 1, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 3, 10, 6, 4, 9, 2, 7, 0, 1] train accuracy: 0.683203703704 test accuracy: 0.624666666667
episode 760 end
episode 761 start
episode columns: [0, 5, 10, 6, 8, 7, 3, 9, 4, 2, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[8, 3, 10, 6, 4, 9, 2, 7, 0, 5, 1] train accuracy: 0.677166666667 test accuracy: 0.617
episode 761 end
episode 762 start
episode columns: [8, 11, 2, 6, 0, 7, 9, 5, 3, 1, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 10, 6, 4, 9, 2, 7, 0, 5, 1] train accuracy: 0.677314814815 test accuracy: 0.6175
episode 762 end
episode 763 start
episode columns: [8, 1, 2, 6, 

episode 793 start
episode columns: [2, 10, 0, 3, 4, 6, 1, 5, 8, 9, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 2, 6, 8, 3, 4, 7, 11, 10, 5, 1] train accuracy: 0.684314814815 test accuracy: 0.654
episode 793 end
episode 794 start
episode columns: [8, 3, 4, 7, 11, 2, 10, 5, 9, 0, 6, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 2, 6, 8, 3, 1, 7, 4, 5, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 794 end
episode 795 start
episode columns: [9, 2, 8, 3, 1, 7, 4, 11, 0, 10, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 4, 9, 2, 6, 8, 1] train accuracy: 0.695944444444 test accuracy: 0.688166666667
episode 795 end
episode 796 start
episode columns: [7, 4, 8, 0, 6, 2, 3, 1, 9, 5, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 9, 2, 6, 8, 1, 7, 11, 10, 5, 3] train accuracy: 0.684111111111 test accuracy: 0.653666666667
episode 796 end
episode 797 start
episode columns: [8, 2,

episode 828 start
episode columns: [7, 6, 9, 1, 2, 0, 3, 4, 10, 8, 5, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 10, 0, 1, 8, 9, 6, 7, 4, 2, 3] train accuracy: 0.677388888889 test accuracy: 0.6165
episode 828 end
episode 829 start
episode columns: [2, 7, 6, 10, 3, 1, 9, 0, 4, 11, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 8, 1, 7, 4, 9, 6, 3, 10, 0, 11] train accuracy: 0.6475 test accuracy: 0.596833333333
episode 829 end
episode 830 start
episode columns: [5, 4, 3, 8, 9, 11, 7, 6, 2, 10, 1, 0] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[5, 8, 1, 7, 4, 9, 6, 3, 10, 0, 11] train accuracy: 0.64762962963 test accuracy: 0.596166666667
episode 830 end
episode 831 start
episode columns: [5, 4, 3, 10, 2, 9, 0, 8, 7, 11, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 8, 1, 7, 4, 9, 6, 3, 10, 0, 11] train accuracy: 0.647481481481 test accuracy: 0.597333333333
episode 831 end
episode 832 start
episode colum

episode 862 start
episode columns: [3, 6, 9, 8, 10, 2, 7, 0, 1, 5, 4, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 4, 9, 6, 3, 8, 5, 10, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 862 end
episode 863 start
episode columns: [7, 5, 9, 0, 11, 1, 10, 6, 3, 4, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 4, 9, 6, 3, 8, 5, 10, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 863 end
episode 864 start
episode columns: [7, 2, 0, 5, 3, 11, 6, 10, 1, 4, 8, 9] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 4, 8, 5, 10, 1, 6, 3, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 864 end
episode 865 start
episode columns: [8, 5, 2, 7, 3, 4, 0, 11, 1, 6, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 4, 8, 5, 10, 1, 9, 6, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 865 end
episode 866 start
episode columns: [0, 1,

episode 896 start
episode columns: [10, 1, 2, 8, 9, 4, 3, 5, 11, 7, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[7, 10, 0, 2, 6, 1, 8, 3, 5, 9, 4] train accuracy: 0.677314814815 test accuracy: 0.616333333333
episode 896 end
episode 897 start
episode columns: [11, 0, 2, 10, 4, 9, 7, 6, 5, 3, 1, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 10, 0, 2, 6, 1, 8, 3, 5, 9, 4] train accuracy: 0.677185185185 test accuracy: 0.617333333333
episode 897 end
episode 898 start
episode columns: [6, 8, 4, 5, 9, 7, 11, 2, 0, 10, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 10, 0, 2, 6, 1, 8, 3, 5, 9, 4] train accuracy: 0.677259259259 test accuracy: 0.617166666667
episode 898 end
episode 899 start
episode columns: [2, 7, 10, 1, 0, 6, 8, 11, 4, 5, 9, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 10, 0, 2, 6, 1, 8, 3, 5, 9, 4] train accuracy: 0.677296296296 test accuracy: 0.617666666667
episode 899 end
episode 900 start
ep

episode 931 start
episode columns: [8, 2, 11, 4, 3, 10, 1, 5, 7, 6, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 4, 10, 6, 2, 9, 8, 3, 5] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 931 end
episode 932 start
episode columns: [9, 11, 1, 3, 0, 10, 6, 2, 4, 5, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[7, 4, 10, 6, 2, 9, 8, 3, 5] train accuracy: 0.695888888889 test accuracy: 0.687
episode 932 end
episode 933 start
episode columns: [0, 9, 7, 10, 6, 5, 11, 1, 3, 4, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 4, 10, 6, 2, 9, 8, 3, 5] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 933 end
episode 934 start
episode columns: [7, 1, 6, 0, 4, 9, 3, 11, 5, 8, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 4, 10, 6, 2, 9, 8, 3, 5] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 934 end
episode 935 start
episode columns: [1, 10, 5, 11, 7

episode 965 start
episode columns: [10, 7, 4, 3, 5, 2, 1, 9, 11, 6, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 6, 2, 3, 10, 4, 7, 5, 11, 8] train accuracy: 0.687740740741 test accuracy: 0.663833333333
episode 965 end
episode 966 start
episode columns: [8, 6, 2, 10, 9, 3, 11, 7, 1, 4, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 6, 2, 3, 10, 4, 5, 11, 8, 7] train accuracy: 0.687777777778 test accuracy: 0.664166666667
episode 966 end
episode 967 start
episode columns: [9, 4, 1, 2, 6, 8, 5, 11, 10, 0, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 6, 2, 3, 10, 4, 5, 11, 8, 7] train accuracy: 0.687666666667 test accuracy: 0.663666666667
episode 967 end
episode 968 start
episode columns: [5, 7, 6, 9, 11, 2, 3, 0, 4, 8, 1, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 6, 2, 3, 10, 4, 5, 11, 8, 7] train accuracy: 0.687685185185 test accuracy: 0.6635
episode 968 end
episode 969 start
episode columns

episode 999 start
episode columns: [2, 8, 3, 10, 0, 6, 9, 5, 11, 7, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 1, 3, 8, 10, 5, 11, 4, 7, 6, 2] train accuracy: 0.684074074074 test accuracy: 0.655
episode 999 end
episode 1000 start
episode columns: [5, 10, 3, 4, 2, 6, 9, 11, 0, 7, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 1, 3, 8, 10, 5, 11, 4, 7, 6, 2] train accuracy: 0.684111111111 test accuracy: 0.654333333333
episode 1000 end
episode 1001 start
episode columns: [0, 4, 11, 2, 8, 9, 5, 7, 10, 6, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 1, 3, 8, 10, 5, 11, 4, 7, 6, 2] train accuracy: 0.684166666667 test accuracy: 0.653666666667
episode 1001 end
episode 1002 start
episode columns: [10, 4, 11, 3, 9, 8, 7, 0, 1, 5, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 1, 3, 8, 10, 5, 11, 4, 7, 6, 2] train accuracy: 0.68412962963 test accuracy: 0.654166666667
episode 1002 end
episode 1003 star

episode 1034 start
episode columns: [2, 6, 7, 8, 0, 10, 11, 3, 4, 5, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 7, 8, 3, 6, 4, 10, 2] train accuracy: 0.695611111111 test accuracy: 0.684333333333
episode 1034 end
episode 1035 start
episode columns: [1, 11, 10, 3, 0, 5, 4, 8, 6, 9, 2, 7] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[1, 7, 8, 9, 6, 4, 10, 2, 3, 0, 5, 11] train accuracy: 0.619759259259 test accuracy: 0.587666666667
episode 1035 end
episode 1036 start
episode columns: [8, 9, 4, 2, 3, 7, 5, 6, 1, 11, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.7
episode policy:[1, 7, 8, 3, 0, 6, 9, 4, 10, 2] train accuracy: 0.682981481481 test accuracy: 0.624166666667
episode 1036 end
episode 1037 start
episode columns: [1, 10, 0, 5, 9, 11, 7, 8, 3, 2, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 1, 7, 6, 4, 10, 2, 3, 0, 5, 11, 8] train accuracy: 0.619203703704 test accuracy: 0.585666666667
episode 1037 end
episode 103

episode 1068 start
episode columns: [1, 10, 11, 0, 2, 6, 5, 3, 9, 8, 4, 7] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[1, 7, 4, 10, 2, 8, 9, 6, 3, 0, 11] train accuracy: 0.627055555556 test accuracy: 0.587166666667
episode 1068 end
episode 1069 start
episode columns: [3, 7, 4, 9, 2, 8, 1, 10, 5, 11, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 10, 2, 8, 9, 7, 4, 0, 6, 3, 5, 11] train accuracy: 0.619481481481 test accuracy: 0.5865
episode 1069 end
episode 1070 start
episode columns: [11, 6, 4, 10, 2, 5, 1, 9, 7, 0, 8, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 10, 2, 8, 9, 4, 0, 6, 7, 3, 5, 11] train accuracy: 0.619962962963 test accuracy: 0.584166666667
episode 1070 end
episode 1071 start
episode columns: [1, 11, 9, 8, 5, 2, 10, 3, 7, 0, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 1, 10, 2, 8, 3, 0, 6, 7, 4, 5, 11] train accuracy: 0.619240740741 test accuracy: 0.5845
episode 1071 end
episode 1072

episode 1102 start
episode columns: [4, 2, 11, 5, 3, 10, 1, 7, 6, 8, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 8, 10, 4, 2, 1, 7, 3, 5, 6] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1102 end
episode 1103 start
episode columns: [5, 0, 11, 2, 1, 3, 7, 4, 9, 8, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 8, 6, 1, 10, 4, 2, 3, 5] train accuracy: 0.695537037037 test accuracy: 0.684666666667
episode 1103 end
episode 1104 start
episode columns: [9, 5, 4, 0, 10, 11, 8, 3, 1, 7, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 8, 9, 6, 1, 10, 4, 2, 3, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1104 end
episode 1105 start
episode columns: [4, 0, 11, 7, 3, 5, 8, 1, 2, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 3, 1, 10, 4, 2, 6, 9, 8] train accuracy: 0.695537037037 test accuracy: 0.684833333333
episode 1105 end
episode 1106 start
episode colu

episode 1136 start
episode columns: [4, 0, 3, 5, 9, 2, 11, 8, 7, 1, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 10, 11, 7, 4, 0, 6, 9, 8] train accuracy: 0.661166666667 test accuracy: 0.610333333333
episode 1136 end
episode 1137 start
episode columns: [1, 5, 3, 10, 0, 2, 9, 8, 11, 7, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 10, 11, 2, 3, 9, 8, 6, 5, 7, 4, 0] train accuracy: 0.619888888889 test accuracy: 0.587
episode 1137 end
episode 1138 start
episode columns: [11, 7, 10, 4, 5, 9, 8, 3, 0, 1, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 10, 11, 2, 3, 9, 8, 6, 5, 7, 4, 0] train accuracy: 0.620296296296 test accuracy: 0.584333333333
episode 1138 end
episode 1139 start
episode columns: [0, 8, 5, 2, 7, 1, 4, 9, 10, 11, 3, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 10, 11, 2, 3, 9, 8, 6, 5, 7, 4, 0] train accuracy: 0.619666666667 test accuracy: 0.583833333333
episode 1139 end
episode 1140 

episode 1170 start
episode columns: [1, 2, 6, 11, 7, 9, 8, 10, 3, 0, 5, 4] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 10, 11, 2, 8, 9, 6, 3, 0, 5, 7, 4] train accuracy: 0.619037037037 test accuracy: 0.588333333333
episode 1170 end
episode 1171 start
episode columns: [9, 1, 7, 11, 10, 4, 0, 2, 8, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[1, 10, 11, 2, 8, 9, 6, 3, 0, 5, 7, 4] train accuracy: 0.619611111111 test accuracy: 0.586666666667
episode 1171 end
episode 1172 start
episode columns: [1, 9, 10, 7, 8, 2, 0, 3, 5, 6, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 1, 10, 11, 2, 8, 9, 6, 4, 7] train accuracy: 0.689277777778 test accuracy: 0.663666666667
episode 1172 end
episode 1173 start
episode columns: [3, 10, 1, 9, 6, 0, 7, 4, 5, 8, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 1, 10, 11, 2, 8, 9, 6, 4, 7] train accuracy: 0.689259259259 test accuracy: 0.664666666667
episode 1173 end
episode 

episode 1204 start
episode columns: [3, 0, 5, 4, 2, 7, 9, 8, 1, 11, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 8, 9, 10, 11, 7, 4, 0, 5, 2, 6] train accuracy: 0.633944444444 test accuracy: 0.596333333333
episode 1204 end
episode 1205 start
episode columns: [11, 3, 6, 0, 9, 5, 1, 10, 2, 8, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 8, 9, 10, 11, 7, 4, 0, 5, 1, 2, 6] train accuracy: 0.620055555556 test accuracy: 0.586
episode 1205 end
episode 1206 start
episode columns: [5, 0, 7, 11, 9, 1, 10, 4, 8, 2, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 8, 9, 10, 11, 7, 4, 0, 5, 1, 2] train accuracy: 0.619944444444 test accuracy: 0.58
episode 1206 end
episode 1207 start
episode columns: [6, 4, 3, 7, 1, 2, 9, 5, 11, 10, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 8, 9, 10, 11, 7, 4, 0, 5, 1, 2] train accuracy: 0.620203703704 test accuracy: 0.582166666667
episode 1207 end
episode 1208 start
e

episode 1238 start
episode columns: [6, 1, 10, 2, 5, 0, 7, 8, 3, 4, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 8, 9, 6, 5, 1, 2, 3, 4, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1238 end
episode 1239 start
episode columns: [7, 9, 5, 4, 2, 0, 10, 8, 1, 3, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 4, 9, 6, 5, 1, 3, 8, 2] train accuracy: 0.695555555556 test accuracy: 0.684833333333
episode 1239 end
episode 1240 start
episode columns: [10, 0, 11, 1, 3, 8, 9, 7, 2, 4, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 10, 4, 9, 6, 5, 3, 8, 2] train accuracy: 0.695555555556 test accuracy: 0.684666666667
episode 1240 end
episode 1241 start
episode columns: [9, 1, 6, 7, 0, 11, 3, 4, 2, 10, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.15
episode policy:[1, 10, 4, 9, 6, 5, 3, 8, 2] train accuracy: 0.695555555556 test accuracy: 0.684833333333
episode 1241 end
episode 1242 start
episode colu

episode 1272 start
episode columns: [10, 8, 4, 11, 7, 1, 9, 5, 6, 3, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 8, 2, 9, 6, 5, 3, 1, 7, 0, 11, 4] train accuracy: 0.619240740741 test accuracy: 0.581833333333
episode 1272 end
episode 1273 start
episode columns: [11, 4, 0, 5, 6, 7, 9, 2, 1, 10, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 8, 2, 9, 6, 5, 3, 1, 7, 0, 11, 4] train accuracy: 0.620111111111 test accuracy: 0.585833333333
episode 1273 end
episode 1274 start
episode columns: [6, 5, 3, 1, 9, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 8, 2, 9, 6, 5, 3, 4, 7, 0, 11, 1] train accuracy: 0.619759259259 test accuracy: 0.584333333333
episode 1274 end
episode 1275 start
episode columns: [0, 5, 10, 4, 2, 9, 7, 8, 1, 11, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 8, 2, 9, 6, 5, 3, 4, 7, 0, 11, 1] train accuracy: 0.619481481481 test accuracy: 0.588166666667
episode 1275 end
episode 1276 s

episode 1306 start
episode columns: [0, 3, 9, 7, 10, 4, 1, 5, 2, 8, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 7, 9, 6, 4, 5, 3, 8, 11, 2, 1] train accuracy: 0.684092592593 test accuracy: 0.654166666667
episode 1306 end
episode 1307 start
episode columns: [2, 3, 1, 10, 7, 0, 11, 8, 5, 9, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 1, 3, 4, 9, 6, 2, 0, 11, 8] train accuracy: 0.629537037037 test accuracy: 0.592333333333
episode 1307 end
episode 1308 start
episode columns: [7, 2, 5, 11, 9, 10, 8, 6, 4, 0, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 1, 3, 4, 9, 6, 2, 0, 11, 8] train accuracy: 0.629333333333 test accuracy: 0.594
episode 1308 end
episode 1309 start
episode columns: [5, 10, 3, 11, 2, 1, 8, 0, 6, 9, 4, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 1, 3, 4, 9, 6, 2, 0, 11, 8] train accuracy: 0.628592592593 test accuracy: 0.593166666667
episode 1309 end
episode 1310 start
epis

episode 1340 start
episode columns: [10, 9, 5, 1, 8, 3, 2, 4, 6, 7, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 7, 5, 3, 6, 2, 8, 4, 9, 0, 1] train accuracy: 0.677333333333 test accuracy: 0.616666666667
episode 1340 end
episode 1341 start
episode columns: [8, 10, 6, 2, 4, 3, 11, 7, 9, 5, 0, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 7, 5, 3, 6, 2, 8, 4, 9, 0, 1] train accuracy: 0.677277777778 test accuracy: 0.616833333333
episode 1341 end
episode 1342 start
episode columns: [3, 5, 0, 1, 8, 2, 9, 7, 4, 11, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[10, 7, 5, 3, 6, 2, 8, 4, 9, 0, 1] train accuracy: 0.677148148148 test accuracy: 0.617333333333
episode 1342 end
episode 1343 start
episode columns: [10, 5, 3, 9, 8, 11, 0, 6, 2, 1, 4, 7] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 7, 5, 8, 6, 2, 9, 0, 1, 3, 4] train accuracy: 0.677111111111 test accuracy: 0.617
episode 1343 end
episode 1344 star

episode 1374 start
episode columns: [9, 11, 2, 5, 6, 0, 7, 1, 4, 8, 10, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 7, 5, 3, 6, 2, 9, 4, 8, 11, 0, 1] train accuracy: 0.619074074074 test accuracy: 0.5845
episode 1374 end
episode 1375 start
episode columns: [8, 1, 3, 6, 0, 4, 7, 11, 10, 9, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 7, 5, 3, 2, 6, 4, 8, 9, 0, 1] train accuracy: 0.677351851852 test accuracy: 0.617333333333
episode 1375 end
episode 1376 start
episode columns: [9, 3, 11, 0, 6, 1, 2, 7, 10, 8, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 7, 5, 3, 2, 6, 4, 8, 9, 0, 1] train accuracy: 0.677296296296 test accuracy: 0.617333333333
episode 1376 end
episode 1377 start
episode columns: [7, 5, 4, 6, 9, 0, 8, 1, 2, 11, 10, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 7, 0, 1, 3, 2, 6, 4, 5, 9] train accuracy: 0.681537037037 test accuracy: 0.6305
episode 1377 end
episode 1378 start
episode

episode 1408 start
episode columns: [7, 9, 4, 10, 3, 11, 6, 5, 0, 1, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 7, 4, 3, 2, 8, 11, 0, 1, 5, 9, 6] train accuracy: 0.620333333333 test accuracy: 0.585333333333
episode 1408 end
episode 1409 start
episode columns: [1, 3, 9, 10, 7, 4, 8, 11, 0, 2, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 7, 4, 3, 2, 8, 9, 6, 5, 1] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1409 end
episode 1410 start
episode columns: [0, 9, 8, 7, 11, 2, 1, 3, 10, 5, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[10, 7, 4, 3, 2, 8, 9, 6, 5, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1410 end
episode 1411 start
episode columns: [9, 5, 6, 1, 0, 4, 11, 3, 8, 7, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 7, 4, 3, 2, 8, 9, 6, 5, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1411 end
episode 1412 sta

episode 1443 start
episode columns: [10, 9, 8, 0, 1, 7, 3, 6, 5, 11, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 7, 4, 3, 6, 2, 9] train accuracy: 0.69612962963 test accuracy: 0.688833333333
episode 1443 end
episode 1444 start
episode columns: [6, 7, 10, 3, 9, 2, 1, 4, 8, 11, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 7, 4, 3, 6, 2, 9] train accuracy: 0.69612962963 test accuracy: 0.688833333333
episode 1444 end
episode 1445 start
episode columns: [5, 7, 4, 3, 6, 9, 0, 10, 11, 1, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 7, 4, 3, 6, 2, 9] train accuracy: 0.69612962963 test accuracy: 0.688833333333
episode 1445 end
episode 1446 start
episode columns: [2, 10, 0, 1, 6, 7, 5, 8, 4, 3, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[10, 7, 4, 3, 6, 2, 9] train accuracy: 0.69612962963 test accuracy: 0.688833333333
episode 1446 end
episode 1447 start
episode columns: [7, 0, 8, 5, 2, 1, 10, 11, 

episode 1477 start
episode columns: [10, 8, 9, 7, 1, 3, 4, 5, 2, 6, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[1, 4, 3, 6, 2, 10, 7, 9] train accuracy: 0.69612962963 test accuracy: 0.6865
episode 1477 end
episode 1478 start
episode columns: [4, 3, 8, 6, 9, 7, 5, 2, 10, 1, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 4, 3, 6, 2, 10, 7, 9] train accuracy: 0.69612962963 test accuracy: 0.6865
episode 1478 end
episode 1479 start
episode columns: [9, 11, 10, 4, 0, 8, 2, 1, 6, 7, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 4, 3, 6, 2, 10, 7, 5, 9] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1479 end
episode 1480 start
episode columns: [7, 6, 1, 11, 8, 0, 10, 2, 9, 3, 5, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[1, 4, 3, 6, 2, 10, 7, 5, 9] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1480 end
episode 1481 start
episode columns: [10, 8, 9, 5, 2, 0, 3,

episode 1512 start
episode columns: [6, 0, 10, 3, 5, 4, 9, 2, 7, 8, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 5, 8, 10, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.689833333333
episode 1512 end
episode 1513 start
episode columns: [4, 3, 11, 8, 0, 1, 10, 9, 2, 6, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 5, 8, 10, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.689833333333
episode 1513 end
episode 1514 start
episode columns: [7, 10, 3, 9, 5, 11, 6, 1, 0, 4, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 5, 8, 10, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.689833333333
episode 1514 end
episode 1515 start
episode columns: [3, 1, 11, 6, 5, 7, 10, 2, 9, 8, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 5, 8, 10, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.689833333333
episode 1515 end
episode 1516 start
episode columns: [0, 10, 6, 1, 5, 7, 4, 11, 3, 2, 8

episode 1547 start
episode columns: [6, 10, 11, 9, 2, 4, 3, 7, 1, 8, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 9, 8, 10, 7, 1, 4, 3, 2, 5] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1547 end
episode 1548 start
episode columns: [7, 5, 9, 10, 11, 1, 4, 0, 6, 3, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 9, 8, 10, 7, 1, 4, 3, 2, 5] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1548 end
episode 1549 start
episode columns: [9, 5, 8, 4, 0, 2, 6, 10, 7, 1, 11, 3] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[6, 9, 8, 10, 7, 5, 4, 3, 1, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1549 end
episode 1550 start
episode columns: [11, 7, 4, 1, 0, 10, 6, 9, 5, 2, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[6, 9, 8, 10, 7, 5, 4, 3, 1, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1550 end
episode 1551 start
epi

episode 1580 start
episode columns: [10, 4, 1, 0, 8, 6, 5, 2, 7, 11, 9, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 0, 2, 8, 10, 9, 6, 1, 4, 7, 5] train accuracy: 0.677185185185 test accuracy: 0.617166666667
episode 1580 end
episode 1581 start
episode columns: [1, 8, 7, 0, 3, 10, 2, 6, 11, 9, 5, 4] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[3, 0, 2, 8, 10, 9, 6, 1, 4, 7, 5] train accuracy: 0.677092592593 test accuracy: 0.617
episode 1581 end
episode 1582 start
episode columns: [10, 6, 1, 7, 3, 0, 9, 8, 5, 4, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 1, 4, 2, 8, 10, 9, 6, 5, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1582 end
episode 1583 start
episode columns: [11, 8, 4, 3, 7, 10, 9, 6, 2, 0, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 1, 4, 2, 8, 10, 9, 6, 5, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1583 end
episode 1584 start
episo

episode 1614 start
episode columns: [2, 1, 10, 0, 5, 3, 8, 11, 7, 9, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 9, 2, 8, 10, 6, 1, 4, 3, 0] train accuracy: 0.677203703704 test accuracy: 0.616666666667
episode 1614 end
episode 1615 start
episode columns: [8, 1, 5, 4, 7, 11, 10, 9, 2, 0, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 7, 9, 2, 8, 10, 6, 1, 4, 3, 0] train accuracy: 0.677277777778 test accuracy: 0.618
episode 1615 end
episode 1616 start
episode columns: [2, 5, 9, 8, 6, 4, 3, 1, 10, 11, 7] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 7, 9, 2, 8, 10, 6, 1, 4, 3, 0] train accuracy: 0.677240740741 test accuracy: 0.617166666667
episode 1616 end
episode 1617 start
episode columns: [9, 10, 0, 5, 4, 3, 7, 2, 8, 1, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 7, 9, 2, 8, 10, 6, 1, 4, 3, 0] train accuracy: 0.677351851852 test accuracy: 0.6175
episode 1617 end
episode 1618 start
episode co

episode 1648 start
episode columns: [1, 0, 8, 9, 5, 11, 4, 2, 3, 10, 6, 7] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[5, 6, 1, 9, 10, 3, 8, 2, 4, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1648 end
episode 1649 start
episode columns: [3, 1, 0, 8, 7, 4, 6, 5, 2, 9, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 6, 1, 9, 10, 3, 8, 2, 4, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1649 end
episode 1650 start
episode columns: [2, 7, 1, 4, 10, 0, 9, 11, 3, 5, 6, 8] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 6, 1, 9, 10, 3, 8, 2, 4, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1650 end
episode 1651 start
episode columns: [2, 5, 0, 9, 10, 3, 6, 4, 11, 8, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 6, 1, 9, 10, 8, 2, 4, 3, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1651 end
episode 1652 start
epis

episode 1682 start
episode columns: [6, 0, 9, 11, 5, 7, 10, 1, 2, 8, 3, 4] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[10, 9, 1, 5, 7, 6, 8, 2, 4, 3] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 1682 end
episode 1683 start
episode columns: [9, 4, 7, 10, 1, 5, 11, 0, 6, 3, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 9, 1, 5, 7, 6, 3, 8, 2, 4, 0, 11] train accuracy: 0.619388888889 test accuracy: 0.586
episode 1683 end
episode 1684 start
episode columns: [7, 5, 4, 2, 9, 11, 1, 8, 10, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 9, 1, 5, 7, 6, 3, 8, 2, 4, 0, 11] train accuracy: 0.62 test accuracy: 0.587333333333
episode 1684 end
episode 1685 start
episode columns: [5, 9, 4, 7, 10, 3, 8, 0, 11, 1, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 9, 1, 5, 7, 6, 3, 8, 2, 4, 0, 11] train accuracy: 0.619685185185 test accuracy: 0.5855
episode 1685 end
episode 1686 start
episode colum

episode 1716 start
episode columns: [5, 9, 6, 3, 0, 7, 8, 11, 4, 10, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 9, 10, 1, 3, 8, 11, 2, 7, 6, 4] train accuracy: 0.68412962963 test accuracy: 0.654
episode 1716 end
episode 1717 start
episode columns: [2, 7, 6, 1, 11, 5, 3, 8, 10, 0, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 9, 10, 1, 3, 8, 11, 2, 7, 6, 4] train accuracy: 0.68412962963 test accuracy: 0.6545
episode 1717 end
episode 1718 start
episode columns: [5, 2, 11, 8, 10, 3, 1, 7, 6, 4, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 9, 10, 1, 3, 8, 11, 2, 7, 6, 4] train accuracy: 0.684055555556 test accuracy: 0.6545
episode 1718 end
episode 1719 start
episode columns: [5, 4, 9, 3, 6, 2, 7, 11, 8, 0, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 9, 10, 1, 3, 8, 11, 2, 7, 6, 4] train accuracy: 0.684074074074 test accuracy: 0.653833333333
episode 1719 end
episode 1720 start
episode colum

episode 1750 start
episode columns: [1, 11, 4, 3, 5, 8, 10, 9, 0, 7, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 10, 9, 1, 3, 6, 2, 4] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1750 end
episode 1751 start
episode columns: [9, 0, 7, 10, 3, 1, 11, 2, 6, 4, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 10, 9, 1, 3, 6, 2, 4] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1751 end
episode 1752 start
episode columns: [6, 3, 9, 4, 1, 5, 2, 8, 0, 11, 10, 7] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 10, 9, 1, 3, 6, 2, 4] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1752 end
episode 1753 start
episode columns: [1, 6, 8, 2, 0, 4, 11, 5, 10, 3, 9, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 7, 10, 9, 1, 3, 6, 2, 4] train accuracy: 0.695796296296 test accuracy: 0.684166666667
episode 1753 end
episode 1754 start
episode columns: 

episode 1784 start
episode columns: [3, 4, 0, 7, 9, 1, 5, 8, 6, 10, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 10, 7, 4, 3, 5, 8, 6, 2] train accuracy: 0.695888888889 test accuracy: 0.687
episode 1784 end
episode 1785 start
episode columns: [5, 2, 9, 8, 3, 10, 4, 1, 0, 7, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 10, 7, 4, 3, 5, 8, 6, 2] train accuracy: 0.695888888889 test accuracy: 0.687
episode 1785 end
episode 1786 start
episode columns: [11, 5, 8, 7, 3, 1, 10, 0, 9, 6, 4, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 10, 7, 3, 5, 0, 11, 2, 4, 6, 1, 8] train accuracy: 0.619777777778 test accuracy: 0.589
episode 1786 end
episode 1787 start
episode columns: [9, 2, 6, 1, 4, 5, 0, 11, 8, 7, 10, 3] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 10, 7, 3, 5, 0, 11, 2, 4, 6, 1, 8] train accuracy: 0.620296296296 test accuracy: 0.584
episode 1787 end
episode 1788 start
episode columns: [2, 9, 10, 0, 

episode 1818 start
episode columns: [8, 4, 9, 1, 5, 3, 6, 10, 11, 7, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 10, 7, 3, 8, 6, 2, 4, 11, 1, 5, 0] train accuracy: 0.619796296296 test accuracy: 0.584
episode 1818 end
episode 1819 start
episode columns: [0, 11, 2, 3, 6, 4, 10, 5, 8, 7, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 10, 7, 3, 8, 6, 2, 4, 11, 1, 5, 0] train accuracy: 0.619777777778 test accuracy: 0.583333333333
episode 1819 end
episode 1820 start
episode columns: [10, 1, 5, 6, 2, 9, 0, 11, 4, 3, 8, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 10, 7, 3, 8, 6, 1, 5, 4, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 1820 end
episode 1821 start
episode columns: [3, 10, 6, 11, 2, 1, 8, 5, 9, 0, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 10, 7, 4, 3, 8, 6, 1, 5, 0, 2] train accuracy: 0.677185185185 test accuracy: 0.617333333333
episode 1821 end
episode 1822 s

episode 1852 start
episode columns: [4, 2, 8, 0, 11, 5, 6, 10, 9, 7, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 7, 4, 10, 5, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 1852 end
episode 1853 start
episode columns: [2, 0, 11, 1, 6, 10, 4, 9, 7, 3, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 10, 4, 3, 6, 7, 8, 5, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 1853 end
episode 1854 start
episode columns: [3, 2, 5, 0, 6, 11, 8, 10, 1, 4, 9, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 7, 4, 10, 5, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 1854 end
episode 1855 start
episode columns: [7, 9, 6, 10, 4, 8, 0, 1, 5, 3, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 7, 4, 10, 5, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 1855 end
episode 1856 start
episode columns: [3, 5, 10, 0, 1

episode 1888 start
episode columns: [8, 0, 7, 10, 9, 4, 11, 3, 2, 5, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 10, 5, 7, 8, 6, 3] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 1888 end
episode 1889 start
episode columns: [11, 9, 10, 4, 6, 8, 7, 1, 2, 5, 3, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 10, 5, 7, 8, 6, 3] train accuracy: 0.696277777778 test accuracy: 0.688833333333
episode 1889 end
episode 1890 start
episode columns: [9, 0, 10, 1, 7, 4, 8, 6, 11, 3, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 10, 5, 7, 8, 6, 3] train accuracy: 0.696277777778 test accuracy: 0.688833333333
episode 1890 end
episode 1891 start
episode columns: [10, 11, 8, 5, 3, 2, 1, 7, 9, 4, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[9, 10, 5, 7, 8, 6, 3] train accuracy: 0.696277777778 test accuracy: 0.688833333333
episode 1891 end
episode 1892 start
episode columns: [6, 9, 3, 1, 8, 4,

episode 1923 start
episode columns: [9, 1, 2, 0, 11, 6, 8, 5, 3, 4, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 1, 5, 7, 4, 3, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 1923 end
episode 1924 start
episode columns: [9, 5, 7, 1, 10, 8, 0, 11, 4, 6, 2, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 5, 7, 4, 3, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 1924 end
episode 1925 start
episode columns: [6, 8, 2, 7, 10, 1, 4, 5, 11, 3, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 1, 5, 7, 4, 3, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 1925 end
episode 1926 start
episode columns: [2, 0, 4, 10, 5, 11, 8, 6, 3, 1, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 1, 7, 4, 3, 10, 5, 6, 8] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 1926 end
episode 1927 start
episode columns

episode 1957 start
episode columns: [1, 3, 9, 4, 11, 8, 6, 2, 7, 5, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 10, 1, 7, 8, 4, 2, 11, 0, 6, 3] train accuracy: 0.626425925926 test accuracy: 0.5915
episode 1957 end
episode 1958 start
episode columns: [9, 10, 8, 7, 6, 3, 1, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 10, 1, 7, 8, 4, 2, 11, 0, 6, 3] train accuracy: 0.627018518519 test accuracy: 0.585666666667
episode 1958 end
episode 1959 start
episode columns: [5, 1, 8, 7, 10, 0, 6, 4, 3, 9, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 10, 1, 7, 8, 4, 2, 11, 0, 6, 3] train accuracy: 0.626833333333 test accuracy: 0.588666666667
episode 1959 end
episode 1960 start
episode columns: [3, 10, 5, 7, 11, 6, 2, 1, 4, 0, 9, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 10, 1, 7, 8, 4, 2, 11, 0, 6, 3] train accuracy: 0.626388888889 test accuracy: 0.587833333333
episode 1960 end
episode 1961 start
episo

episode 1991 start
episode columns: [10, 8, 1, 9, 7, 3, 6, 4, 2, 5, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[10, 7, 8, 9, 4, 1, 6, 3] train accuracy: 0.696333333333 test accuracy: 0.689
episode 1991 end
episode 1992 start
episode columns: [4, 7, 2, 3, 10, 6, 5, 11, 1, 9, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[10, 7, 8, 9, 4, 1, 6, 3] train accuracy: 0.696333333333 test accuracy: 0.689
episode 1992 end
episode 1993 start
episode columns: [0, 9, 11, 3, 7, 4, 10, 6, 5, 2, 1, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 7, 8, 9, 4, 1, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 1993 end
episode 1994 start
episode columns: [1, 5, 10, 3, 9, 8, 4, 11, 7, 2, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 7, 8, 9, 4, 1, 6, 3] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 1994 end
episode 1995 start
episode columns: [1, 5, 11, 6, 3, 8, 4, 2, 9

episode 2026 start
episode columns: [10, 4, 6, 0, 5, 8, 2, 11, 7, 9, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[9, 7, 4, 5, 3, 1, 6, 11, 0, 8] train accuracy: 0.657666666667 test accuracy: 0.600833333333
episode 2026 end
episode 2027 start
episode columns: [2, 10, 9, 3, 1, 5, 4, 6, 11, 0, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 7, 4, 5, 3, 1, 6, 11, 0, 8] train accuracy: 0.6575 test accuracy: 0.601666666667
episode 2027 end
episode 2028 start
episode columns: [3, 10, 4, 6, 11, 7, 2, 1, 5, 8, 0, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 7, 4, 5, 3, 1, 6, 11, 0, 8] train accuracy: 0.657240740741 test accuracy: 0.6
episode 2028 end
episode 2029 start
episode columns: [1, 8, 11, 9, 3, 10, 4, 5, 6, 7, 0, 2] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 7, 4, 5, 3, 1, 6, 11, 0, 8] train accuracy: 0.657277777778 test accuracy: 0.603
episode 2029 end
episode 2030 start
episode columns: [10, 7, 2, 6

episode 2061 start
episode columns: [3, 10, 5, 1, 7, 6, 2, 8, 9, 4, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 7, 4, 5, 8, 2, 1, 6, 10, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2061 end
episode 2062 start
episode columns: [3, 4, 11, 5, 8, 1, 9, 7, 0, 6, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 7, 4, 5, 8, 2, 1, 6, 10, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2062 end
episode 2063 start
episode columns: [3, 8, 1, 6, 9, 4, 2, 0, 5, 10, 11, 7] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 7, 4, 5, 8, 2, 1, 6, 10, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2063 end
episode 2064 start
episode columns: [9, 0, 2, 7, 11, 4, 1, 6, 5, 3, 10, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 7, 4, 5, 8, 2, 1, 6, 10, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2064 end
episode 2065 start
epis

episode 2096 start
episode columns: [3, 6, 0, 7, 11, 10, 1, 2, 5, 4, 8, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 10, 1, 6, 8, 2, 4, 7, 9] train accuracy: 0.695611111111 test accuracy: 0.6845
episode 2096 end
episode 2097 start
episode columns: [3, 8, 7, 10, 0, 5, 1, 9, 4, 2, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 1, 6, 8, 2, 4, 7, 9] train accuracy: 0.695574074074 test accuracy: 0.6845
episode 2097 end
episode 2098 start
episode columns: [2, 3, 4, 1, 6, 0, 7, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 10, 1, 6, 8, 2, 4, 7, 9] train accuracy: 0.695592592593 test accuracy: 0.6845
episode 2098 end
episode 2099 start
episode columns: [10, 3, 4, 7, 11, 8, 9, 0, 5, 6, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 10, 1, 6, 8, 2, 4, 7, 9] train accuracy: 0.695592592593 test accuracy: 0.6845
episode 2099 end
episode 2100 start
episode columns: [9, 5, 6, 2, 8, 3, 11, 4, 7, 10, 0, 1] 

episode 2131 start
episode columns: [11, 3, 2, 0, 7, 4, 9, 6, 10, 8, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 6, 8, 2, 4, 7, 9, 5, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2131 end
episode 2132 start
episode columns: [3, 1, 7, 0, 5, 10, 11, 6, 8, 2, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 1, 6, 8, 2, 4, 7, 9, 5] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2132 end
episode 2133 start
episode columns: [10, 5, 6, 11, 4, 0, 1, 9, 3, 8, 7, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 8, 2, 4, 1, 6, 9, 7, 5] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2133 end
episode 2134 start
episode columns: [3, 2, 6, 4, 7, 0, 9, 10, 8, 1, 5, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 8, 2, 4, 1, 6, 9, 7, 5] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2134 end
episode 2135 start
episode columns: [10, 4, 5,

episode 2165 start
episode columns: [9, 6, 8, 4, 11, 3, 1, 10, 7, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 3, 10, 5, 1, 11, 4, 8, 2, 9, 7] train accuracy: 0.68412962963 test accuracy: 0.654166666667
episode 2165 end
episode 2166 start
episode columns: [6, 2, 5, 8, 3, 7, 0, 9, 4, 1, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 6, 3, 10, 1, 11, 4, 8, 2, 9, 7] train accuracy: 0.684166666667 test accuracy: 0.654
episode 2166 end
episode 2167 start
episode columns: [5, 2, 0, 3, 1, 6, 10, 8, 11, 7, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 6, 10, 1, 11, 4, 8, 2, 9, 7, 3] train accuracy: 0.684203703704 test accuracy: 0.654333333333
episode 2167 end
episode 2168 start
episode columns: [4, 2, 8, 6, 1, 9, 0, 5, 11, 10, 3, 7] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 6, 10, 1, 11, 4, 8, 2, 9, 7, 3] train accuracy: 0.684259259259 test accuracy: 0.654
episode 2168 end
episode 2169 start
episode columns

episode 2199 start
episode columns: [7, 3, 5, 8, 9, 11, 10, 4, 0, 2, 6, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 1, 11, 6, 9, 7, 2, 8, 10, 5] train accuracy: 0.687648148148 test accuracy: 0.662
episode 2199 end
episode 2200 start
episode columns: [7, 4, 2, 11, 0, 1, 8, 3, 5, 6, 10, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 1, 11, 6, 9, 7, 2, 8, 10, 5] train accuracy: 0.687592592593 test accuracy: 0.6615
episode 2200 end
episode 2201 start
episode columns: [2, 8, 5, 4, 7, 0, 9, 11, 10, 1, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.7
episode policy:[4, 1, 11, 10, 5, 6, 9, 7, 2, 8, 3] train accuracy: 0.684092592593 test accuracy: 0.653666666667
episode 2201 end
episode 2202 start
episode columns: [3, 0, 1, 10, 9, 4, 11, 6, 7, 2, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 5, 1, 11, 6, 9, 7, 4, 2, 8] train accuracy: 0.684055555556 test accuracy: 0.654166666667
episode 2202 end
episode 2203 start
episode c

episode 2233 start
episode columns: [1, 8, 9, 5, 11, 3, 0, 2, 4, 7, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 0, 8, 2, 1, 10, 5, 6, 9, 11] train accuracy: 0.624037037037 test accuracy: 0.585666666667
episode 2233 end
episode 2234 start
episode columns: [4, 3, 10, 0, 5, 7, 2, 8, 1, 11, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 0, 8, 2, 1, 10, 5, 6, 9, 11] train accuracy: 0.623814814815 test accuracy: 0.5835
episode 2234 end
episode 2235 start
episode columns: [5, 3, 4, 9, 1, 7, 2, 6, 0, 8, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 0, 8, 2, 1, 10, 5, 6, 9, 11] train accuracy: 0.624444444444 test accuracy: 0.582666666667
episode 2235 end
episode 2236 start
episode columns: [10, 7, 4, 2, 0, 8, 1, 6, 5, 11, 3, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 0, 8, 2, 1, 10, 5, 6, 9, 11, 3, 7] train accuracy: 0.620018518519 test accuracy: 0.583666666667
episode 2236 end
episode 2237 start


episode 2268 start
episode columns: [8, 9, 6, 0, 7, 11, 4, 5, 2, 10, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 5, 6, 10, 9, 11, 8, 2] train accuracy: 0.691388888889 test accuracy: 0.671833333333
episode 2268 end
episode 2269 start
episode columns: [1, 6, 10, 3, 11, 7, 0, 8, 4, 9, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 5, 6, 10, 9, 11, 8, 2] train accuracy: 0.691388888889 test accuracy: 0.671833333333
episode 2269 end
episode 2270 start
episode columns: [9, 1, 8, 5, 7, 0, 3, 2, 4, 6, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 5, 6, 10, 9, 11, 8, 2] train accuracy: 0.691425925926 test accuracy: 0.6715
episode 2270 end
episode 2271 start
episode columns: [7, 8, 1, 2, 6, 10, 3, 4, 11, 0, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 5, 6, 10, 9, 11, 8, 2] train accuracy: 0.691351851852 test accuracy: 0.671166666667
episode 2271 end
episode 2272 start
episode columns: [0, 3, 6, 7, 8,

episode 2302 start
episode columns: [2, 11, 3, 7, 8, 4, 0, 6, 1, 9, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 8, 2, 0, 7, 5, 6, 10, 9, 4, 3] train accuracy: 0.677240740741 test accuracy: 0.617666666667
episode 2302 end
episode 2303 start
episode columns: [1, 8, 7, 0, 4, 9, 2, 6, 5, 11, 10, 3] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[1, 8, 2, 0, 7, 5, 6, 10, 9, 4, 3] train accuracy: 0.677259259259 test accuracy: 0.6175
episode 2303 end
episode 2304 start
episode columns: [1, 8, 9, 0, 4, 11, 6, 7, 5, 3, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 7, 0, 8, 2, 6, 10, 5, 9, 4, 3] train accuracy: 0.677037037037 test accuracy: 0.616166666667
episode 2304 end
episode 2305 start
episode columns: [1, 3, 9, 5, 8, 7, 0, 2, 11, 6, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 7, 5, 8, 2, 0, 10, 9, 4, 3, 6] train accuracy: 0.677074074074 test accuracy: 0.6165
episode 2305 end
episode 2306 start
episode

episode 2336 start
episode columns: [1, 5, 7, 8, 4, 9, 11, 10, 2, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[10, 5, 6, 9, 1, 7, 8, 4, 3, 2, 0] train accuracy: 0.677388888889 test accuracy: 0.616666666667
episode 2336 end
episode 2337 start
episode columns: [4, 10, 9, 2, 5, 1, 3, 0, 7, 6, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[4, 1, 9, 0, 8, 3, 7, 5, 6, 10] train accuracy: 0.689833333333 test accuracy: 0.668666666667
episode 2337 end
episode 2338 start
episode columns: [6, 10, 9, 3, 5, 11, 0, 2, 8, 4, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 1, 9, 0, 8, 3, 7, 5, 6, 10] train accuracy: 0.689833333333 test accuracy: 0.669166666667
episode 2338 end
episode 2339 start
episode columns: [1, 5, 2, 6, 8, 7, 9, 0, 3, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 1, 9, 0, 8, 3, 7, 5, 6, 10] train accuracy: 0.689759259259 test accuracy: 0.6695
episode 2339 end
episode 2340 start
episode columns: [

episode 2370 start
episode columns: [5, 1, 7, 8, 9, 4, 0, 2, 6, 3, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[1, 7, 5, 6, 9, 0, 8, 4, 3, 2] train accuracy: 0.684222222222 test accuracy: 0.6375
episode 2370 end
episode 2371 start
episode columns: [9, 2, 6, 7, 0, 4, 11, 5, 10, 1, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 7, 5, 6, 9, 0, 8, 4, 3, 2] train accuracy: 0.684185185185 test accuracy: 0.637333333333
episode 2371 end
episode 2372 start
episode columns: [0, 3, 4, 6, 1, 8, 9, 11, 10, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[1, 7, 5, 6, 9, 0, 8, 4, 3, 2] train accuracy: 0.684148148148 test accuracy: 0.637666666667
episode 2372 end
episode 2373 start
episode columns: [8, 11, 3, 5, 0, 10, 7, 4, 2, 6, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 7, 5, 6, 9, 0, 8, 4, 3, 2] train accuracy: 0.684259259259 test accuracy: 0.637166666667
episode 2373 end
episode 2374 start
episode columns: 

episode 2405 start
episode columns: [7, 1, 6, 3, 0, 9, 4, 5, 10, 2, 11, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 4, 8, 7, 5, 6, 3, 2, 1, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2405 end
episode 2406 start
episode columns: [8, 11, 10, 5, 6, 3, 9, 4, 0, 2, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 8, 7, 5, 6, 3, 2, 1, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2406 end
episode 2407 start
episode columns: [7, 5, 3, 0, 9, 10, 1, 8, 4, 2, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 4, 8, 7, 5, 6, 3, 2, 1, 10] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2407 end
episode 2408 start
episode columns: [4, 6, 2, 7, 1, 0, 8, 11, 5, 9, 3, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 4, 8, 7, 5, 6, 3, 2, 1, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2408 end
episode 2409 start
epis

episode 2439 start
episode columns: [2, 6, 8, 4, 0, 11, 1, 3, 5, 9, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 7, 8, 4, 1, 10, 5, 6, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2439 end
episode 2440 start
episode columns: [4, 1, 11, 8, 3, 0, 7, 5, 10, 2, 6, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 7, 8, 4, 1, 10, 5, 6, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2440 end
episode 2441 start
episode columns: [11, 0, 7, 8, 4, 1, 6, 3, 9, 10, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 7, 8, 3, 2, 1, 10, 5, 6] train accuracy: 0.695425925926 test accuracy: 0.683333333333
episode 2441 end
episode 2442 start
episode columns: [2, 5, 4, 3, 9, 1, 6, 0, 11, 10, 8, 7] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 7, 8, 3, 2, 1, 10, 5, 6, 0, 4] train accuracy: 0.677259259259 test accuracy: 0.618166666667
episode 2442 end
episode 2443 start
ep

episode 2474 start
episode columns: [5, 9, 1, 6, 7, 10, 3, 4, 8, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 7, 10, 5, 6, 9, 1, 4, 3, 11, 0] train accuracy: 0.647444444444 test accuracy: 0.594833333333
episode 2474 end
episode 2475 start
episode columns: [8, 10, 7, 3, 2, 11, 0, 6, 5, 1, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[9, 1, 7, 8, 3, 6, 0, 4, 5] train accuracy: 0.692907407407 test accuracy: 0.669333333333
episode 2475 end
episode 2476 start
episode columns: [7, 8, 3, 11, 9, 10, 5, 4, 1, 2, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 1, 7, 8, 4, 3, 6, 0, 5] train accuracy: 0.692907407407 test accuracy: 0.669333333333
episode 2476 end
episode 2477 start
episode columns: [2, 5, 0, 8, 3, 6, 4, 7, 9, 11, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 7, 8, 4, 3, 6, 0, 5] train accuracy: 0.692907407407 test accuracy: 0.669
episode 2477 end
episode 2478 start
episode columns: [5, 2,

episode 2508 start
episode columns: [5, 9, 4, 10, 3, 1, 7, 0, 8, 6, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 1, 7, 10, 5, 9, 2, 6, 11, 4] train accuracy: 0.684203703704 test accuracy: 0.654166666667
episode 2508 end
episode 2509 start
episode columns: [1, 5, 6, 3, 9, 8, 10, 2, 0, 4, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 3, 1, 7, 10, 5, 9, 2, 0, 4] train accuracy: 0.67937037037 test accuracy: 0.618
episode 2509 end
episode 2510 start
episode columns: [0, 9, 6, 11, 10, 5, 8, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 1, 7, 10, 5, 9, 2, 0, 4] train accuracy: 0.679314814815 test accuracy: 0.617666666667
episode 2510 end
episode 2511 start
episode columns: [11, 6, 5, 0, 10, 9, 8, 7, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 1, 7, 10, 5, 9, 2, 0, 4] train accuracy: 0.679333333333 test accuracy: 0.617666666667
episode 2511 end
episode 2512 start
episode columns: [7, 11,

episode 2543 start
episode columns: [0, 5, 8, 9, 10, 11, 6, 1, 7, 2, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 3, 6, 9, 1, 7, 5, 4, 2, 0] train accuracy: 0.684148148148 test accuracy: 0.637666666667
episode 2543 end
episode 2544 start
episode columns: [2, 7, 4, 0, 3, 1, 6, 9, 10, 5, 11, 8] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[8, 3, 6, 5, 7, 1, 2, 9, 4, 10] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 2544 end
episode 2545 start
episode columns: [0, 3, 11, 1, 9, 2, 4, 5, 6, 7, 8, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 3, 6, 5, 7, 1, 2, 9, 4, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2545 end
episode 2546 start
episode columns: [4, 7, 8, 10, 3, 2, 11, 9, 5, 6, 1, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 6, 5, 7, 1, 2, 9, 4, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 2546 end
episode 2547 start
episo

episode 2578 start
episode columns: [1, 7, 8, 0, 11, 3, 4, 10, 9, 2, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 8, 9, 2, 1, 6, 3, 4] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2578 end
episode 2579 start
episode columns: [8, 0, 10, 1, 6, 3, 11, 2, 4, 5, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 7, 8, 9, 2, 1, 6, 3, 4] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2579 end
episode 2580 start
episode columns: [0, 4, 7, 6, 11, 10, 3, 2, 9, 1, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 8, 9, 2, 1, 6, 3, 4] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2580 end
episode 2581 start
episode columns: [11, 5, 9, 7, 3, 0, 8, 1, 6, 4, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 7, 8, 9, 2, 1, 6, 3, 4] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 2581 end
episode 2582 start
episode columns: [9, 0, 10, 11, 1, 6, 3, 8, 5, 2, 7

episode 2613 start
episode columns: [7, 0, 2, 11, 5, 1, 9, 6, 8, 3, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 5, 4, 1, 6, 7, 8, 9] train accuracy: 0.696203703704 test accuracy: 0.686166666667
episode 2613 end
episode 2614 start
episode columns: [1, 3, 9, 7, 11, 6, 2, 0, 5, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 5, 4, 1, 6, 2, 0, 7, 8, 9] train accuracy: 0.684296296296 test accuracy: 0.637
episode 2614 end
episode 2615 start
episode columns: [0, 5, 11, 8, 7, 4, 10, 6, 1, 3, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 5, 4, 1, 6, 2, 0, 7, 8, 9] train accuracy: 0.684074074074 test accuracy: 0.637833333333
episode 2615 end
episode 2616 start
episode columns: [11, 7, 8, 9, 1, 5, 3, 10, 0, 4, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 5, 4, 1, 6, 2, 0, 7, 8, 9] train accuracy: 0.684055555556 test accuracy: 0.637
episode 2616 end
episode 2617 start
episode columns: [1, 9, 7, 4, 10, 8, 

episode 2648 start
episode columns: [9, 4, 2, 3, 5, 8, 6, 0, 1, 7, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 5, 4, 1, 11, 6, 7, 8, 9] train accuracy: 0.69162962963 test accuracy: 0.6745
episode 2648 end
episode 2649 start
episode columns: [4, 1, 8, 7, 2, 6, 10, 5, 3, 9, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.7
episode policy:[3, 5, 4, 1, 11, 6, 7, 8, 9] train accuracy: 0.69162962963 test accuracy: 0.674666666667
episode 2649 end
episode 2650 start
episode columns: [6, 11, 1, 0, 10, 5, 8, 4, 3, 7, 9, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 5, 4, 1, 11, 6, 7, 8, 9] train accuracy: 0.691592592593 test accuracy: 0.674333333333
episode 2650 end
episode 2651 start
episode columns: [5, 0, 7, 6, 10, 11, 9, 3, 8, 2, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 5, 4, 1, 11, 9] train accuracy: 0.694703703704 test accuracy: 0.682333333333
episode 2651 end
episode 2652 start
episode columns: [3, 9, 0, 6, 7, 1

episode 2682 start
episode columns: [11, 1, 2, 6, 7, 9, 3, 8, 4, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 5, 4, 6, 7, 8, 9, 2, 1, 11, 0, 10] train accuracy: 0.619981481481 test accuracy: 0.587666666667
episode 2682 end
episode 2683 start
episode columns: [11, 8, 9, 0, 7, 2, 4, 3, 1, 5, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[3, 5, 4, 6, 7, 8, 9, 2, 1, 11, 0, 10] train accuracy: 0.619722222222 test accuracy: 0.584666666667
episode 2683 end
episode 2684 start
episode columns: [0, 4, 2, 10, 7, 6, 8, 9, 5, 3, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 5, 4, 6, 7, 8, 9, 2, 10, 1, 11, 0] train accuracy: 0.619555555556 test accuracy: 0.583166666667
episode 2684 end
episode 2685 start
episode columns: [8, 9, 11, 1, 5, 7, 4, 0, 3, 2, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 5, 4, 6, 7, 8, 9, 2, 10, 1, 11, 0] train accuracy: 0.61962962963 test accuracy: 0.585333333333
episode 2685 end
e

episode 2716 start
episode columns: [1, 10, 9, 0, 8, 3, 4, 5, 2, 7, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 2, 5, 10, 6, 7, 8, 9, 4, 0, 11, 1] train accuracy: 0.619888888889 test accuracy: 0.587833333333
episode 2716 end
episode 2717 start
episode columns: [0, 5, 6, 11, 9, 7, 1, 4, 3, 10, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[3, 2, 5, 10, 6, 7, 8, 9, 4, 0, 11, 1] train accuracy: 0.620111111111 test accuracy: 0.581333333333
episode 2717 end
episode 2718 start
episode columns: [5, 2, 10, 0, 8, 7, 6, 9, 1, 4, 3, 11] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[3, 2, 5, 10, 6, 7, 8, 9, 1, 11, 0, 4] train accuracy: 0.620388888889 test accuracy: 0.588666666667
episode 2718 end
episode 2719 start
episode columns: [0, 8, 6, 11, 5, 10, 2, 9, 7, 1, 3, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 2, 5, 4, 6, 7, 8, 9, 1, 11, 0, 10] train accuracy: 0.619740740741 test accuracy: 0.582333333333
episode 2

episode 2750 start
episode columns: [6, 5, 10, 0, 4, 9, 3, 8, 1, 2, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[6, 7, 8, 9, 1, 11, 5, 10, 4, 3, 2] train accuracy: 0.684166666667 test accuracy: 0.653833333333
episode 2750 end
episode 2751 start
episode columns: [6, 11, 1, 2, 8, 3, 5, 7, 0, 4, 10, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 1, 11, 5, 10, 4, 3, 2, 7, 8, 6] train accuracy: 0.684148148148 test accuracy: 0.653833333333
episode 2751 end
episode 2752 start
episode columns: [5, 1, 4, 2, 10, 6, 7, 9, 3, 0, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[9, 1, 11, 5, 10, 4, 3, 2, 7, 8, 6] train accuracy: 0.684259259259 test accuracy: 0.654166666667
episode 2752 end
episode 2753 start
episode columns: [10, 7, 8, 3, 6, 5, 4, 9, 0, 11, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 11, 5, 10, 4, 3, 2, 7, 8, 6] train accuracy: 0.684185185185 test accuracy: 0.653666666667
episode 2753 end
episod

episode 2785 start
episode columns: [9, 4, 5, 6, 8, 11, 0, 2, 1, 3, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 8, 7, 11, 5, 3, 2, 10, 9, 1, 4] train accuracy: 0.684203703704 test accuracy: 0.6535
episode 2785 end
episode 2786 start
episode columns: [8, 0, 10, 5, 1, 4, 11, 9, 6, 3, 7, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 8, 7, 11, 5, 3, 2, 10, 9, 1, 4] train accuracy: 0.684092592593 test accuracy: 0.655166666667
episode 2786 end
episode 2787 start
episode columns: [6, 5, 9, 1, 0, 2, 3, 11, 10, 8, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.15
episode policy:[6, 8, 5, 3, 2, 7, 11, 1, 10, 9, 4] train accuracy: 0.684185185185 test accuracy: 0.654333333333
episode 2787 end
episode 2788 start
episode columns: [7, 8, 2, 0, 1, 6, 11, 3, 4, 5, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 8, 5, 3, 2, 7, 11, 1, 10, 9, 4] train accuracy: 0.68412962963 test accuracy: 0.654666666667
episode 2788 end
episode 2789 s

episode 2819 start
episode columns: [2, 10, 7, 4, 3, 6, 9, 1, 11, 8, 0, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 8, 0, 11, 5, 3, 1, 10, 7, 4, 9] train accuracy: 0.647592592593 test accuracy: 0.5955
episode 2819 end
episode 2820 start
episode columns: [3, 4, 9, 7, 1, 2, 8, 11, 5, 0, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 8, 0, 11, 5, 3, 1, 10, 7, 4, 9] train accuracy: 0.647574074074 test accuracy: 0.5975
episode 2820 end
episode 2821 start
episode columns: [7, 3, 5, 10, 8, 9, 1, 11, 2, 4, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 8, 0, 11, 5, 3, 1, 10, 7, 4, 9] train accuracy: 0.647444444444 test accuracy: 0.596
episode 2821 end
episode 2822 start
episode columns: [2, 0, 1, 10, 4, 8, 5, 6, 11, 9, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 8, 0, 11, 5, 3, 1, 7, 4, 9] train accuracy: 0.657388888889 test accuracy: 0.600666666667
episode 2822 end
episode 2823 start
episode columns

episode 2853 start
episode columns: [11, 5, 8, 6, 10, 2, 9, 3, 4, 1, 0, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 6, 10, 9, 8, 4, 3, 2, 7] train accuracy: 0.695888888889 test accuracy: 0.687
episode 2853 end
episode 2854 start
episode columns: [1, 2, 9, 6, 7, 3, 11, 10, 4, 5, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 6, 10, 9, 8, 0, 11, 7, 3, 4, 1] train accuracy: 0.647425925926 test accuracy: 0.597333333333
episode 2854 end
episode 2855 start
episode columns: [5, 7, 9, 6, 10, 3, 4, 11, 8, 0, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 10, 3, 4, 5, 9, 8, 0, 11, 7] train accuracy: 0.659425925926 test accuracy: 0.613166666667
episode 2855 end
episode 2856 start
episode columns: [0, 8, 1, 7, 2, 11, 4, 9, 10, 3, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 10, 3, 4, 5, 9, 8, 0, 11, 7, 2, 1] train accuracy: 0.620074074074 test accuracy: 0.587333333333
episode 2856 end
episode 2857 start
e

episode 2887 start
episode columns: [11, 3, 7, 4, 5, 8, 1, 2, 10, 9, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 2, 4, 5, 9, 6, 8, 11, 0, 7, 10] train accuracy: 0.633314814815 test accuracy: 0.593166666667
episode 2887 end
episode 2888 start
episode columns: [7, 4, 2, 1, 11, 3, 10, 9, 8, 6, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 2, 4, 5, 9, 6, 8, 11, 0, 7, 10] train accuracy: 0.634222222222 test accuracy: 0.5975
episode 2888 end
episode 2889 start
episode columns: [6, 8, 10, 4, 11, 1, 0, 7, 3, 9, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 2, 4, 5, 9, 6, 8, 11, 0, 7, 10] train accuracy: 0.633981481481 test accuracy: 0.595666666667
episode 2889 end
episode 2890 start
episode columns: [7, 1, 4, 11, 9, 2, 8, 10, 5, 0, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 2, 4, 5, 9, 6, 8, 10, 7] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 2890 end
episode 2891 start
ep

episode 2921 start
episode columns: [3, 5, 0, 4, 9, 11, 8, 6, 7, 2, 1, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 1, 5, 9, 6, 2, 4, 0, 11, 10, 7, 8] train accuracy: 0.619648148148 test accuracy: 0.586833333333
episode 2921 end
episode 2922 start
episode columns: [2, 0, 6, 9, 8, 5, 4, 11, 10, 7, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[3, 6, 2, 4, 5, 9, 8, 10, 7] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 2922 end
episode 2923 start
episode columns: [4, 9, 11, 5, 6, 7, 1, 2, 10, 0, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[3, 8, 10, 7, 2, 4, 5, 9, 6] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 2923 end
episode 2924 start
episode columns: [7, 8, 6, 1, 9, 5, 3, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 8, 10, 7, 2, 4, 5, 9, 6, 1, 0, 11] train accuracy: 0.619962962963 test accuracy: 0.587833333333
episode 2924 end
episode 2925 start
episode c

episode 2955 start
episode columns: [11, 0, 9, 5, 10, 7, 4, 6, 3, 8, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 5, 10, 7, 6, 2, 4, 0, 11, 9, 8, 3] train accuracy: 0.620055555556 test accuracy: 0.583166666667
episode 2955 end
episode 2956 start
episode columns: [1, 5, 11, 3, 2, 4, 10, 0, 7, 8, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[1, 5, 10, 7, 6, 2, 4, 0, 11, 9, 8, 3] train accuracy: 0.619666666667 test accuracy: 0.588
episode 2956 end
episode 2957 start
episode columns: [5, 4, 11, 6, 3, 0, 2, 1, 8, 10, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[1, 5, 10, 7, 6, 2, 4, 0, 11, 9, 8, 3] train accuracy: 0.61987037037 test accuracy: 0.585
episode 2957 end
episode 2958 start
episode columns: [1, 8, 6, 9, 10, 3, 11, 0, 7, 4, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 5, 10, 7, 6, 2, 4, 0, 11, 9, 8, 3] train accuracy: 0.62012962963 test accuracy: 0.583166666667
episode 2958 end
episode 2959 st

episode 2989 start
episode columns: [1, 0, 11, 6, 5, 8, 3, 7, 10, 2, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[9, 6, 1, 5, 11, 10, 7, 3, 4, 0, 8, 2] train accuracy: 0.619814814815 test accuracy: 0.582333333333
episode 2989 end
episode 2990 start
episode columns: [0, 3, 7, 6, 1, 5, 11, 9, 4, 10, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 6, 4, 5, 11, 10, 7, 3, 8, 2] train accuracy: 0.687759259259 test accuracy: 0.663
episode 2990 end
episode 2991 start
episode columns: [11, 9, 6, 3, 5, 0, 8, 1, 2, 10, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 6, 4, 5, 11, 10, 1, 8, 3, 7] train accuracy: 0.690740740741 test accuracy: 0.673833333333
episode 2991 end
episode 2992 start
episode columns: [2, 6, 4, 7, 1, 0, 8, 11, 3, 9, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 6, 4, 5, 11, 10, 1, 8, 3, 7] train accuracy: 0.690833333333 test accuracy: 0.6735
episode 2992 end
episode 2993 start
episode co

episode 3023 start
episode columns: [9, 8, 2, 1, 6, 5, 0, 11, 3, 4, 10, 7] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 4, 1, 6, 2, 3, 11, 5, 10] train accuracy: 0.68837037037 test accuracy: 0.662166666667
episode 3023 end
episode 3024 start
episode columns: [5, 7, 8, 6, 4, 1, 10, 3, 2, 11, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 10, 1, 6, 2, 4, 0, 11, 3, 7, 8, 9] train accuracy: 0.620148148148 test accuracy: 0.585166666667
episode 3024 end
episode 3025 start
episode columns: [5, 3, 7, 10, 1, 4, 9, 11, 2, 6, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 4, 1, 6, 2, 3, 11, 5, 10] train accuracy: 0.688407407407 test accuracy: 0.662333333333
episode 3025 end
episode 3026 start
episode columns: [1, 5, 8, 9, 2, 11, 3, 10, 6, 7, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 4, 1, 6, 2, 3, 11, 5, 10] train accuracy: 0.688388888889 test accuracy: 0.662333333333
episode 3026 end
episode 3027 start
epis

episode 3058 start
episode columns: [8, 1, 10, 6, 11, 0, 5, 2, 7, 4, 9, 3] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 10, 8, 6, 2, 4, 1, 9, 3, 0, 11, 7] train accuracy: 0.619574074074 test accuracy: 0.582666666667
episode 3058 end
episode 3059 start
episode columns: [3, 6, 9, 8, 1, 10, 7, 4, 11, 2, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 10, 8, 6, 2, 4, 1, 9, 3, 0, 11, 7] train accuracy: 0.620111111111 test accuracy: 0.584333333333
episode 3059 end
episode 3060 start
episode columns: [1, 2, 7, 0, 11, 5, 9, 3, 4, 6, 10, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 10, 8, 6, 2, 4, 1, 9, 3, 0, 11, 7] train accuracy: 0.619888888889 test accuracy: 0.584833333333
episode 3060 end
episode 3061 start
episode columns: [0, 11, 10, 9, 7, 5, 2, 8, 1, 4, 3, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 10, 8, 6, 2, 4, 1, 9, 3, 0, 11, 7] train accuracy: 0.619240740741 test accuracy: 0.584
episode 3061 end
epis

episode 3092 start
episode columns: [9, 10, 1, 0, 5, 7, 6, 3, 11, 4, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 11, 10, 5, 7, 8, 6, 4, 1, 9] train accuracy: 0.690722222222 test accuracy: 0.6735
episode 3092 end
episode 3093 start
episode columns: [4, 9, 2, 3, 6, 10, 0, 11, 1, 7, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 11, 10, 5, 7, 8, 6, 4, 1, 9] train accuracy: 0.690703703704 test accuracy: 0.673333333333
episode 3093 end
episode 3094 start
episode columns: [0, 5, 3, 11, 2, 8, 9, 1, 6, 7, 4, 10] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[3, 11, 10, 5, 7, 8, 6, 4, 1, 9] train accuracy: 0.690777777778 test accuracy: 0.673666666667
episode 3094 end
episode 3095 start
episode columns: [3, 5, 1, 4, 0, 2, 7, 8, 10, 6, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 11, 10, 5, 7, 1, 6, 4, 9, 8, 2] train accuracy: 0.684037037037 test accuracy: 0.654166666667
episode 3095 end
episode 3096 start
epis

episode 3126 start
episode columns: [3, 6, 7, 10, 5, 2, 0, 8, 11, 9, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 6, 2, 8, 10, 5, 7, 1, 9, 4, 0, 11] train accuracy: 0.620462962963 test accuracy: 0.585333333333
episode 3126 end
episode 3127 start
episode columns: [11, 5, 10, 1, 9, 2, 6, 8, 3, 4, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 6, 2, 8, 10, 5, 7, 1, 9, 4, 0, 11] train accuracy: 0.620277777778 test accuracy: 0.586333333333
episode 3127 end
episode 3128 start
episode columns: [1, 3, 4, 10, 8, 6, 7, 0, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 6, 2, 8, 10, 5, 7, 1, 9, 4, 0, 11] train accuracy: 0.619574074074 test accuracy: 0.586
episode 3128 end
episode 3129 start
episode columns: [0, 3, 5, 6, 8, 4, 2, 10, 9, 11, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 6, 2, 8, 10, 5, 7, 1, 9, 4, 0, 11] train accuracy: 0.619888888889 test accuracy: 0.584666666667
episode 3129 end
episode 31

episode 3160 start
episode columns: [8, 0, 11, 7, 10, 2, 1, 9, 4, 5, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 7, 5, 10, 6, 4, 2, 0, 11, 1, 8, 3] train accuracy: 0.620222222222 test accuracy: 0.585
episode 3160 end
episode 3161 start
episode columns: [9, 1, 2, 11, 4, 5, 10, 7, 0, 3, 8, 6] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[9, 7, 5, 2, 0, 11, 1, 10, 6, 4] train accuracy: 0.626592592593 test accuracy: 0.589
episode 3161 end
episode 3162 start
episode columns: [10, 5, 2, 7, 6, 8, 1, 9, 11, 4, 0, 3] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 7, 5, 10, 6, 4, 2, 0, 11, 1, 8, 3] train accuracy: 0.620333333333 test accuracy: 0.5845
episode 3162 end
episode 3163 start
episode columns: [6, 4, 3, 9, 7, 11, 0, 8, 5, 2, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 7, 5, 10, 6, 4, 2, 0, 11, 1, 8, 3] train accuracy: 0.619722222222 test accuracy: 0.583333333333
episode 3163 end
episode 3164 start
episode 

episode 3196 start
episode columns: [5, 10, 7, 2, 3, 8, 9, 0, 4, 1, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[3, 8, 10, 6, 4, 2] train accuracy: 0.696074074074 test accuracy: 0.688
episode 3196 end
episode 3197 start
episode columns: [3, 7, 1, 6, 10, 0, 8, 2, 4, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 8, 10, 6, 4, 2] train accuracy: 0.696074074074 test accuracy: 0.688333333333
episode 3197 end
episode 3198 start
episode columns: [3, 11, 10, 5, 2, 8, 9, 7, 6, 0, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 8, 10, 6, 4, 2] train accuracy: 0.696074074074 test accuracy: 0.688166666667
episode 3198 end
episode 3199 start
episode columns: [2, 8, 1, 7, 3, 9, 4, 10, 5, 11, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 8, 10, 6, 4, 2] train accuracy: 0.696074074074 test accuracy: 0.688166666667
episode 3199 end
episode 3200 start
episode columns: [1, 8, 10, 11, 3, 2, 5, 9, 6, 0, 4, 7] epsilon:

episode 3231 start
episode columns: [7, 5, 4, 11, 2, 6, 1, 0, 8, 3, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[10, 8, 1, 9, 7, 4, 2, 6, 5, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 3231 end
episode 3232 start
episode columns: [4, 3, 11, 7, 10, 6, 0, 2, 8, 9, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 8, 1, 9, 7, 4, 2, 6, 5, 3] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3232 end
episode 3233 start
episode columns: [10, 11, 7, 0, 2, 3, 6, 4, 1, 9, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 8, 1, 9, 7, 4, 2, 6, 5, 3] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 3233 end
episode 3234 start
episode columns: [3, 1, 9, 8, 5, 10, 7, 4, 6, 0, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 8, 1, 9, 7, 4, 2, 6, 5, 3] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3234 end
episode 3235 start
epis

episode 3265 start
episode columns: [5, 4, 2, 11, 0, 7, 6, 9, 3, 1, 10, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 10, 6, 4, 1, 3, 5, 7, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3265 end
episode 3266 start
episode columns: [5, 7, 11, 4, 0, 2, 8, 10, 1, 3, 6, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 10, 6, 4, 1, 3, 5, 7, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3266 end
episode 3267 start
episode columns: [11, 8, 3, 7, 1, 10, 9, 0, 2, 4, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 10, 6, 4, 1, 3, 5, 7, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3267 end
episode 3268 start
episode columns: [6, 10, 11, 4, 5, 1, 0, 2, 3, 9, 8, 7] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 10, 6, 4, 1, 0, 11, 2, 3, 5, 7, 9] train accuracy: 0.619444444444 test accuracy: 0.586666666667
episode 3268 end
episode 3269 start
episod

episode 3299 start
episode columns: [10, 5, 3, 7, 6, 2, 8, 4, 9, 0, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 10, 6, 4, 7, 1, 5, 3] train accuracy: 0.696222222222 test accuracy: 0.686
episode 3299 end
episode 3300 start
episode columns: [9, 8, 6, 4, 2, 1, 3, 7, 0, 10, 11, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 10, 6, 4, 7, 1, 5, 3] train accuracy: 0.696222222222 test accuracy: 0.686
episode 3300 end
episode 3301 start
episode columns: [7, 10, 1, 11, 0, 9, 4, 6, 5, 2, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 10, 6, 4, 7, 1, 5, 3] train accuracy: 0.696222222222 test accuracy: 0.686
episode 3301 end
episode 3302 start
episode columns: [3, 4, 9, 10, 0, 2, 7, 1, 5, 6, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 8, 10, 6, 4, 7, 1, 5, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3302 end
episode 3303 start
episode columns: [0, 3, 5, 7, 9, 2, 1, 6, 8, 4, 10,

episode 3334 start
episode columns: [5, 3, 0, 4, 9, 6, 1, 10, 8, 11, 2, 7] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 10, 3, 5, 7, 1, 6, 4, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3334 end
episode 3335 start
episode columns: [1, 9, 5, 7, 6, 4, 10, 3, 8, 0, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 10, 3, 5, 7, 1, 6, 4, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3335 end
episode 3336 start
episode columns: [11, 8, 7, 3, 0, 2, 4, 10, 6, 5, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 10, 3, 5, 7, 1, 6, 4, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3336 end
episode 3337 start
episode columns: [8, 3, 9, 10, 11, 0, 7, 1, 2, 5, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 10, 3, 5, 7, 4, 6, 2, 9, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 3337 end
episode 3338 start
episode column

episode 3368 start
episode columns: [9, 6, 10, 4, 7, 5, 8, 0, 2, 3, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 10, 1, 3, 6, 4, 7, 9, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3368 end
episode 3369 start
episode columns: [8, 7, 4, 2, 5, 6, 9, 11, 1, 3, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 10, 1, 0, 4, 7, 3, 6, 9, 5] train accuracy: 0.689833333333 test accuracy: 0.669
episode 3369 end
episode 3370 start
episode columns: [9, 1, 6, 5, 8, 2, 10, 0, 4, 3, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 10, 1, 0, 4, 7, 3, 6, 9, 5] train accuracy: 0.689888888889 test accuracy: 0.668666666667
episode 3370 end
episode 3371 start
episode columns: [8, 10, 1, 4, 9, 5, 6, 0, 3, 7, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[8, 10, 1, 0, 4, 7, 3, 6, 9] train accuracy: 0.691814814815 test accuracy: 0.672833333333
episode 3371 end
episode 3372 start
episode columns: 

episode 3403 start
episode columns: [6, 5, 8, 7, 4, 9, 0, 2, 11, 3, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 4, 9, 10, 1, 3, 6, 8] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3403 end
episode 3404 start
episode columns: [3, 11, 8, 10, 1, 2, 5, 7, 4, 6, 0, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 7, 0, 8, 10, 9, 4, 6, 2, 3, 1] train accuracy: 0.677314814815 test accuracy: 0.616333333333
episode 3404 end
episode 3405 start
episode columns: [7, 5, 6, 4, 8, 2, 3, 0, 10, 1, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[5, 7, 0, 8, 10, 9, 4, 6, 2, 3, 1] train accuracy: 0.677333333333 test accuracy: 0.615833333333
episode 3405 end
episode 3406 start
episode columns: [8, 5, 2, 11, 3, 7, 0, 1, 9, 6, 4, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 0, 8, 10, 9, 4, 6, 2, 11, 1, 3] train accuracy: 0.620203703704 test accuracy: 0.586166666667
episode 3406 end
episode 340

episode 3438 start
episode columns: [10, 5, 7, 2, 3, 8, 6, 0, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 8, 5, 7, 4, 9, 10, 1, 3, 11, 0, 2] train accuracy: 0.619444444444 test accuracy: 0.585333333333
episode 3438 end
episode 3439 start
episode columns: [3, 5, 7, 9, 10, 4, 11, 6, 0, 8, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 8, 5, 7, 4, 9, 10, 1, 3, 11, 0, 2] train accuracy: 0.61962962963 test accuracy: 0.582
episode 3439 end
episode 3440 start
episode columns: [6, 5, 3, 8, 1, 7, 4, 11, 10, 9, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[1, 3, 5, 7, 6, 8, 9, 10] train accuracy: 0.696185185185 test accuracy: 0.686333333333
episode 3440 end
episode 3441 start
episode columns: [1, 7, 6, 3, 2, 5, 10, 9, 0, 4, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 3, 5, 7, 6, 8, 9, 10] train accuracy: 0.696185185185 test accuracy: 0.686333333333
episode 3441 end
episode 3442 start
episode columns: [9

episode 3472 start
episode columns: [4, 10, 2, 3, 7, 11, 8, 9, 6, 0, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 6, 8, 9, 10, 1, 3, 2, 4, 11, 0] train accuracy: 0.620074074074 test accuracy: 0.5845
episode 3472 end
episode 3473 start
episode columns: [1, 10, 6, 11, 9, 4, 3, 7, 5, 8, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 7, 6, 8, 9, 10, 1, 3, 2, 4, 11, 0] train accuracy: 0.619814814815 test accuracy: 0.584833333333
episode 3473 end
episode 3474 start
episode columns: [6, 9, 3, 10, 8, 1, 7, 0, 4, 5, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 7, 6, 8, 9, 10, 1, 3, 2, 4, 11, 0] train accuracy: 0.619481481481 test accuracy: 0.5855
episode 3474 end
episode 3475 start
episode columns: [8, 5, 9, 1, 10, 6, 2, 3, 7, 0, 4, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 6, 8, 9, 10, 1, 3, 2, 4, 11, 0] train accuracy: 0.619240740741 test accuracy: 0.585166666667
episode 3475 end
episode 34

episode 3506 start
episode columns: [6, 0, 7, 9, 10, 8, 4, 11, 5, 3, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 8, 9, 10, 3, 2, 5, 7, 1, 0, 4, 11] train accuracy: 0.619518518519 test accuracy: 0.585166666667
episode 3506 end
episode 3507 start
episode columns: [10, 6, 2, 0, 7, 8, 5, 3, 4, 11, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 8, 9, 10, 3, 2, 5, 7, 1, 0, 4, 11] train accuracy: 0.619481481481 test accuracy: 0.585666666667
episode 3507 end
episode 3508 start
episode columns: [1, 7, 11, 2, 4, 5, 3, 10, 8, 9, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 8, 9, 10, 3, 2, 5, 7, 1, 0, 4, 11] train accuracy: 0.619814814815 test accuracy: 0.585833333333
episode 3508 end
episode 3509 start
episode columns: [2, 11, 6, 1, 7, 3, 8, 4, 10, 9, 0, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 8, 9, 10, 3, 2, 5, 7, 1, 0, 4, 11] train accuracy: 0.619685185185 test accuracy: 0.5875
episode 3509 end
ep

episode 3540 start
episode columns: [1, 8, 9, 10, 2, 0, 3, 4, 7, 5, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 7, 5, 0, 10, 3, 4, 2, 8, 9] train accuracy: 0.682537037037 test accuracy: 0.644833333333
episode 3540 end
episode 3541 start
episode columns: [6, 4, 11, 3, 2, 1, 7, 8, 0, 5, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 7, 5, 0, 10, 3, 4, 2, 1, 8, 9] train accuracy: 0.677203703704 test accuracy: 0.616666666667
episode 3541 end
episode 3542 start
episode columns: [3, 10, 1, 9, 7, 4, 11, 0, 2, 5, 6, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 7, 5, 0, 10, 3, 4, 2, 1, 8, 9] train accuracy: 0.677296296296 test accuracy: 0.616833333333
episode 3542 end
episode 3543 start
episode columns: [11, 8, 9, 10, 0, 3, 6, 4, 1, 5, 2, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 4, 7, 5, 0, 10, 3, 1, 8, 9] train accuracy: 0.689796296296 test accuracy: 0.669166666667
episode 3543 end
episode 3544 st

episode 3574 start
episode columns: [11, 4, 8, 6, 10, 5, 1, 7, 3, 9, 0, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 4, 7, 5, 3, 1, 8, 10, 9, 11, 0] train accuracy: 0.647333333333 test accuracy: 0.597166666667
episode 3574 end
episode 3575 start
episode columns: [1, 10, 4, 11, 9, 0, 6, 7, 8, 5, 2, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 4, 7, 5, 3, 1, 8, 10, 9, 11, 0] train accuracy: 0.647407407407 test accuracy: 0.597166666667
episode 3575 end
episode 3576 start
episode columns: [3, 7, 2, 5, 8, 6, 1, 4, 9, 11, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 4, 7, 5, 3, 1, 8, 10, 9, 11, 0] train accuracy: 0.647611111111 test accuracy: 0.596166666667
episode 3576 end
episode 3577 start
episode columns: [0, 1, 7, 5, 3, 6, 8, 11, 4, 9, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 4, 7, 5, 3, 1, 8, 10, 9, 11, 0] train accuracy: 0.647462962963 test accuracy: 0.596166666667
episode 3577 end
episode

episode 3608 start
episode columns: [4, 10, 2, 7, 6, 9, 3, 8, 5, 1, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[7, 5, 3, 10, 4, 9, 8, 1, 2] train accuracy: 0.695888888889 test accuracy: 0.683166666667
episode 3608 end
episode 3609 start
episode columns: [1, 2, 0, 10, 11, 9, 8, 7, 5, 6, 3, 4] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 5, 3, 10, 4, 9, 8, 1, 2] train accuracy: 0.695888888889 test accuracy: 0.683166666667
episode 3609 end
episode 3610 start
episode columns: [7, 10, 6, 2, 0, 9, 4, 1, 3, 11, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 5, 3, 10, 4, 9, 8, 1, 2] train accuracy: 0.695888888889 test accuracy: 0.683166666667
episode 3610 end
episode 3611 start
episode columns: [1, 9, 0, 6, 2, 11, 5, 10, 8, 4, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 5, 3, 10, 4, 9, 8, 1, 2] train accuracy: 0.695888888889 test accuracy: 0.683166666667
episode 3611 end
episode 3612 start
episode columns

episode 3642 start
episode columns: [7, 1, 5, 6, 2, 3, 8, 0, 4, 11, 10, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 1, 9, 10, 4, 2, 5, 8, 0, 6, 3] train accuracy: 0.677203703704 test accuracy: 0.6165
episode 3642 end
episode 3643 start
episode columns: [5, 10, 11, 7, 0, 8, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 1, 9, 10, 4, 2, 5, 8, 0, 6, 3] train accuracy: 0.677388888889 test accuracy: 0.617166666667
episode 3643 end
episode 3644 start
episode columns: [4, 11, 6, 7, 8, 0, 9, 2, 10, 3, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 1, 9, 10, 4, 2, 5, 8] train accuracy: 0.696074074074 test accuracy: 0.6845
episode 3644 end
episode 3645 start
episode columns: [7, 8, 1, 9, 10, 3, 6, 0, 4, 11, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 1, 9, 10, 4, 2, 5, 8] train accuracy: 0.696074074074 test accuracy: 0.6845
episode 3645 end
episode 3646 start
episode columns: [5, 7, 1, 6, 9, 3, 8, 0, 4,

episode 3677 start
episode columns: [4, 3, 10, 7, 1, 8, 5, 2, 0, 6, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 3, 9, 10, 8, 1, 6, 7, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3677 end
episode 3678 start
episode columns: [2, 4, 6, 5, 9, 10, 3, 7, 8, 0, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[4, 3, 9, 10, 8, 1, 6, 7, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3678 end
episode 3679 start
episode columns: [7, 5, 1, 2, 11, 9, 6, 8, 10, 0, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[4, 10, 8, 1, 3, 9, 7, 5, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3679 end
episode 3680 start
episode columns: [9, 1, 3, 8, 6, 7, 10, 4, 11, 0, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 10, 8, 1, 3, 9, 7, 5, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3680 end
episode 3681 start
episode columns

episode 3712 start
episode columns: [3, 11, 6, 0, 5, 7, 1, 9, 8, 2, 4, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 5, 7, 3, 1, 9, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3712 end
episode 3713 start
episode columns: [10, 9, 11, 3, 2, 7, 5, 0, 6, 4, 1, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 5, 7, 3, 1, 9, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3713 end
episode 3714 start
episode columns: [3, 9, 1, 7, 5, 2, 10, 6, 4, 8, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 5, 7, 3, 1, 9, 10, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3714 end
episode 3715 start
episode columns: [6, 1, 9, 8, 10, 2, 3, 5, 7, 0, 4, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 5, 9, 10, 8, 1, 3, 6, 7] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 3715 end
episode 3716 start
episode columns

episode 3747 start
episode columns: [3, 10, 7, 9, 5, 8, 4, 11, 1, 0, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 7, 5, 9, 10, 4, 2] train accuracy: 0.6965 test accuracy: 0.6865
episode 3747 end
episode 3748 start
episode columns: [4, 0, 5, 2, 6, 3, 7, 8, 1, 10, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 7, 5, 9, 10, 4, 2, 6, 8, 1] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3748 end
episode 3749 start
episode columns: [5, 9, 3, 8, 0, 1, 7, 6, 11, 4, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[3, 7, 5, 9, 10, 4, 2, 6, 8, 1] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3749 end
episode 3750 start
episode columns: [2, 1, 8, 10, 4, 3, 11, 0, 5, 6, 7, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 7, 5, 6, 8, 4, 10, 2, 9, 1] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3750 end
episode 3751 start
episode columns: [0, 3, 8, 4

episode 3782 start
episode columns: [5, 7, 6, 11, 10, 4, 3, 2, 9, 8, 1, 0] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 7, 5, 6, 8, 11, 0, 9, 10, 4, 2] train accuracy: 0.634018518519 test accuracy: 0.594666666667
episode 3782 end
episode 3783 start
episode columns: [10, 1, 5, 3, 6, 8, 11, 4, 2, 9, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 7, 5, 6, 8, 11, 0, 9, 10, 4, 2] train accuracy: 0.634148148148 test accuracy: 0.590666666667
episode 3783 end
episode 3784 start
episode columns: [4, 0, 7, 5, 11, 6, 10, 2, 3, 8, 9, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 7, 5, 6, 8, 11, 0, 9, 10, 4, 2] train accuracy: 0.633703703704 test accuracy: 0.594333333333
episode 3784 end
episode 3785 start
episode columns: [9, 10, 11, 4, 6, 7, 2, 0, 8, 1, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.75
episode policy:[3, 7, 5, 6, 8, 11, 0, 9, 10, 4, 2] train accuracy: 0.63437037037 test accuracy: 0.5965
episode 3785 end
episode 3786 st

episode 3816 start
episode columns: [8, 0, 10, 7, 6, 9, 2, 11, 4, 1, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 7, 9, 10, 2, 6, 8, 1, 5, 4, 0] train accuracy: 0.67712962963 test accuracy: 0.617166666667
episode 3816 end
episode 3817 start
episode columns: [2, 6, 11, 5, 10, 8, 1, 9, 7, 0, 3, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 7, 9, 10, 2, 6, 8, 1, 5, 4, 0] train accuracy: 0.677277777778 test accuracy: 0.617
episode 3817 end
episode 3818 start
episode columns: [8, 9, 1, 2, 6, 0, 5, 4, 10, 7, 11, 3] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[3, 7, 9, 10, 2, 6, 8, 1, 5, 4, 0] train accuracy: 0.67712962963 test accuracy: 0.615833333333
episode 3818 end
episode 3819 start
episode columns: [0, 1, 8, 11, 4, 5, 7, 10, 2, 3, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[3, 7, 9, 10, 2, 6, 8, 1, 5, 4, 0] train accuracy: 0.677388888889 test accuracy: 0.616666666667
episode 3819 end
episode 3820 start
epis

episode 3850 start
episode columns: [10, 5, 4, 8, 0, 9, 1, 11, 3, 7, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[10, 2, 6, 9, 7, 3, 8, 1, 5, 4, 0] train accuracy: 0.677259259259 test accuracy: 0.616833333333
episode 3850 end
episode 3851 start
episode columns: [3, 7, 1, 9, 8, 10, 11, 6, 5, 2, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[10, 2, 6, 9, 7, 3, 8, 1, 5, 4, 0] train accuracy: 0.677166666667 test accuracy: 0.618166666667
episode 3851 end
episode 3852 start
episode columns: [9, 4, 2, 1, 7, 8, 0, 5, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.7
episode policy:[10, 2, 6, 9, 7, 8, 1, 5, 4, 3, 0] train accuracy: 0.677351851852 test accuracy: 0.617166666667
episode 3852 end
episode 3853 start
episode columns: [5, 9, 4, 1, 2, 8, 3, 10, 11, 6, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[10, 2, 6, 9, 7, 8, 1, 5, 4, 3, 0] train accuracy: 0.677185185185 test accuracy: 0.617333333333
episode 3853 end
episode 3854 start

episode 3884 start
episode columns: [11, 7, 6, 0, 9, 10, 4, 8, 5, 1, 2, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 1, 3, 7, 9, 6, 11, 0, 2, 8, 5, 4] train accuracy: 0.620092592593 test accuracy: 0.584833333333
episode 3884 end
episode 3885 start
episode columns: [11, 0, 8, 5, 1, 7, 3, 6, 9, 4, 10, 2] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[10, 1, 3, 7, 9, 6, 11, 8, 5, 4, 0, 2] train accuracy: 0.620074074074 test accuracy: 0.583833333333
episode 3885 end
episode 3886 start
episode columns: [10, 2, 8, 11, 6, 0, 9, 7, 3, 4, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 7, 9, 1, 8, 10, 2, 5, 6, 11, 4, 0] train accuracy: 0.61962962963 test accuracy: 0.585333333333
episode 3886 end
episode 3887 start
episode columns: [7, 11, 3, 10, 1, 8, 0, 9, 6, 4, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 7, 9, 6, 11, 8, 10, 1, 4, 5, 2] train accuracy: 0.684148148148 test accuracy: 0.654
episode 3887 end
episode 

episode 3918 start
episode columns: [2, 5, 8, 7, 10, 9, 6, 11, 1, 3, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 6, 11, 0, 2, 8, 7, 3, 9, 1, 4, 10] train accuracy: 0.619407407407 test accuracy: 0.5855
episode 3918 end
episode 3919 start
episode columns: [9, 2, 5, 8, 3, 4, 6, 7, 11, 0, 1, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 5, 6, 11, 0, 2, 8, 7, 3, 1, 4, 10] train accuracy: 0.619537037037 test accuracy: 0.583166666667
episode 3919 end
episode 3920 start
episode columns: [10, 2, 9, 1, 8, 7, 11, 4, 6, 5, 3, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 5, 6, 11, 0, 2, 8, 7, 3, 1, 4, 10] train accuracy: 0.619777777778 test accuracy: 0.5875
episode 3920 end
episode 3921 start
episode columns: [7, 10, 3, 4, 2, 11, 6, 5, 0, 1, 9, 8] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 5, 6, 11, 0, 2, 8, 7, 3, 1, 4, 10] train accuracy: 0.619833333333 test accuracy: 0.583166666667
episode 3921 end
episode 39

episode 3952 start
episode columns: [8, 7, 5, 6, 2, 11, 3, 1, 4, 9, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 7, 1, 4, 0, 6, 9, 5, 10, 2, 8] train accuracy: 0.677333333333 test accuracy: 0.616833333333
episode 3952 end
episode 3953 start
episode columns: [9, 11, 1, 8, 3, 0, 4, 5, 7, 6, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[3, 7, 1, 4, 0, 6, 9, 5, 10, 2, 8] train accuracy: 0.677388888889 test accuracy: 0.616833333333
episode 3953 end
episode 3954 start
episode columns: [4, 10, 1, 8, 6, 0, 3, 5, 2, 7, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 7, 1, 4, 0, 6, 9, 5, 10, 2, 8] train accuracy: 0.677277777778 test accuracy: 0.617166666667
episode 3954 end
episode 3955 start
episode columns: [9, 11, 3, 7, 8, 0, 1, 4, 10, 2, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 7, 1, 4, 0, 6, 9, 5, 10, 2, 8] train accuracy: 0.677222222222 test accuracy: 0.617666666667
episode 3955 end
episode 3

episode 3986 start
episode columns: [9, 7, 2, 11, 5, 1, 6, 8, 4, 10, 3, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 6, 8, 7, 1, 4, 5, 11, 0, 9] train accuracy: 0.657259259259 test accuracy: 0.600333333333
episode 3986 end
episode 3987 start
episode columns: [11, 1, 5, 4, 9, 10, 0, 3, 7, 8, 2, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 6, 8, 7, 1, 4, 5, 11, 0, 9, 10, 2] train accuracy: 0.619462962963 test accuracy: 0.585
episode 3987 end
episode 3988 start
episode columns: [6, 8, 10, 9, 7, 0, 4, 3, 11, 2, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 6, 8, 7, 1, 5, 11, 4, 9, 10, 2] train accuracy: 0.684166666667 test accuracy: 0.655
episode 3988 end
episode 3989 start
episode columns: [8, 5, 11, 4, 3, 7, 0, 9, 2, 1, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 6, 8, 7, 1, 5, 4, 9, 10, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 3989 end
episode 3990 start
episode c

episode 4021 start
episode columns: [11, 10, 2, 8, 9, 7, 6, 5, 1, 0, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 1, 4, 5, 9, 7, 10, 6, 8] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4021 end
episode 4022 start
episode columns: [10, 5, 3, 7, 6, 0, 11, 1, 9, 4, 8, 2] epsilon: 0.9 learning rate: 0.09 error: 0.8
episode policy:[3, 1, 4, 5, 9, 7, 10, 6, 8] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4022 end
episode 4023 start
episode columns: [3, 6, 5, 4, 8, 10, 11, 7, 1, 0, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 4, 5, 9, 7, 10, 6, 8] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4023 end
episode 4024 start
episode columns: [3, 1, 6, 2, 7, 8, 10, 11, 4, 5, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 4, 5, 9, 7, 8, 6] train accuracy: 0.696203703704 test accuracy: 0.686166666667
episode 4024 end
episode 4025 start
episode columns: [6,

episode 4056 start
episode columns: [4, 7, 9, 11, 0, 1, 10, 2, 3, 6, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 5, 9, 6, 8, 7, 11, 0, 2, 4] train accuracy: 0.64537037037 test accuracy: 0.593166666667
episode 4056 end
episode 4057 start
episode columns: [0, 3, 10, 4, 7, 11, 8, 5, 6, 1, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 5, 9, 6, 8, 7, 11, 0, 2, 4] train accuracy: 0.645259259259 test accuracy: 0.594666666667
episode 4057 end
episode 4058 start
episode columns: [11, 3, 9, 5, 6, 2, 7, 1, 4, 8, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 5, 9, 6, 8, 7, 11, 0, 2, 4] train accuracy: 0.644777777778 test accuracy: 0.592666666667
episode 4058 end
episode 4059 start
episode columns: [3, 5, 1, 8, 11, 2, 9, 6, 4, 0, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[3, 5, 9, 6, 8, 7, 11, 0, 2, 4] train accuracy: 0.644962962963 test accuracy: 0.593833333333
episode 4059 end
episode 4060 start
episode 

episode 4090 start
episode columns: [3, 4, 1, 8, 2, 7, 9, 5, 10, 11, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[1, 0, 5, 3, 2, 8, 7, 10, 4, 6, 9] train accuracy: 0.677111111111 test accuracy: 0.6175
episode 4090 end
episode 4091 start
episode columns: [1, 6, 8, 7, 0, 11, 2, 10, 9, 4, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 0, 5, 3, 2, 8, 7, 10, 4, 6, 9] train accuracy: 0.677166666667 test accuracy: 0.616166666667
episode 4091 end
episode 4092 start
episode columns: [5, 3, 6, 11, 2, 4, 1, 10, 8, 9, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[1, 0, 5, 3, 2, 8, 7, 10, 4, 6, 9] train accuracy: 0.677277777778 test accuracy: 0.616333333333
episode 4092 end
episode 4093 start
episode columns: [0, 6, 11, 3, 1, 8, 5, 4, 9, 7, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 0, 5, 3, 2, 8, 7, 10, 4, 6, 9] train accuracy: 0.677203703704 test accuracy: 0.6165
episode 4093 end
episode 4094 start
episode

episode 4124 start
episode columns: [9, 10, 4, 7, 11, 5, 2, 3, 6, 1, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 5, 8, 3, 2, 9, 4, 7, 10, 11, 0] train accuracy: 0.633666666667 test accuracy: 0.5985
episode 4124 end
episode 4125 start
episode columns: [9, 2, 10, 0, 5, 6, 3, 1, 7, 4, 11, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 8, 6, 5, 4, 2, 3, 1, 10, 11, 0, 7] train accuracy: 0.619703703704 test accuracy: 0.584833333333
episode 4125 end
episode 4126 start
episode columns: [3, 2, 9, 8, 7, 5, 6, 1, 0, 11, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 2, 3, 1, 8, 6, 5] train accuracy: 0.695833333333 test accuracy: 0.686
episode 4126 end
episode 4127 start
episode columns: [0, 2, 9, 4, 8, 3, 1, 11, 6, 10, 5, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 2, 3, 5, 8, 6, 1, 10, 11, 0, 7] train accuracy: 0.619685185185 test accuracy: 0.586333333333
episode 4127 end
episode 4128 start
episode

episode 4158 start
episode columns: [1, 0, 4, 10, 6, 9, 5, 8, 2, 3, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 3, 10, 6, 1, 8, 4, 11, 2, 5, 7] train accuracy: 0.684166666667 test accuracy: 0.654166666667
episode 4158 end
episode 4159 start
episode columns: [11, 7, 5, 1, 3, 4, 9, 6, 10, 2, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 3, 10, 6, 1, 8, 4, 11, 2, 5, 7] train accuracy: 0.684055555556 test accuracy: 0.655
episode 4159 end
episode 4160 start
episode columns: [1, 0, 9, 3, 2, 4, 7, 5, 8, 10, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 3, 10, 6, 1, 8, 4, 11, 2, 5, 7] train accuracy: 0.684240740741 test accuracy: 0.654666666667
episode 4160 end
episode 4161 start
episode columns: [2, 9, 6, 1, 4, 8, 11, 3, 0, 10, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[2, 5, 8, 6, 3, 10, 9, 4, 11, 7, 1] train accuracy: 0.684166666667 test accuracy: 0.654666666667
episode 4161 end
episode 4162 star

episode 4192 start
episode columns: [6, 10, 9, 3, 1, 2, 7, 0, 8, 11, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 3, 10, 6, 7, 8, 4, 2, 5, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4192 end
episode 4193 start
episode columns: [8, 0, 9, 3, 5, 4, 10, 1, 6, 2, 11, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 3, 10, 6, 7, 8, 4, 2, 5, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4193 end
episode 4194 start
episode columns: [5, 0, 3, 6, 11, 7, 10, 8, 1, 9, 4, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 3, 10, 6, 7, 8, 4, 1] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 4194 end
episode 4195 start
episode columns: [9, 5, 2, 8, 11, 4, 1, 0, 10, 7, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 8, 3, 10, 6, 7, 4, 9] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 4195 end
episode 4196 start
episode column

episode 4226 start
episode columns: [5, 2, 1, 8, 10, 6, 3, 11, 0, 9, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[5, 6, 1, 7, 10, 8, 4, 9, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4226 end
episode 4227 start
episode columns: [0, 2, 6, 10, 8, 4, 7, 9, 1, 11, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.15
episode policy:[5, 6, 1, 7, 10, 8, 4, 9, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4227 end
episode 4228 start
episode columns: [6, 3, 8, 0, 11, 5, 9, 7, 10, 4, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 6, 1, 7, 10, 8, 4, 9, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4228 end
episode 4229 start
episode columns: [8, 7, 4, 9, 2, 5, 11, 6, 3, 0, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 6, 1, 7, 10, 8, 4, 9, 3, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4229 end
episode 4230 start
epi

episode 4260 start
episode columns: [4, 11, 10, 2, 8, 7, 1, 6, 9, 5, 0, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 4, 8, 3, 10, 6, 1, 7, 0, 11] train accuracy: 0.647296296296 test accuracy: 0.598
episode 4260 end
episode 4261 start
episode columns: [7, 9, 5, 2, 3, 8, 4, 0, 1, 6, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[5, 4, 8, 3, 10, 6, 1, 7, 0, 11] train accuracy: 0.647574074074 test accuracy: 0.596333333333
episode 4261 end
episode 4262 start
episode columns: [5, 10, 2, 8, 0, 3, 1, 7, 6, 9, 4, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 4, 8, 3, 10, 6, 9, 7, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4262 end
episode 4263 start
episode columns: [7, 1, 11, 6, 8, 10, 3, 0, 9, 4, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 4, 8, 3, 10, 6, 9, 7, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4263 end
episode 4264 start
episode columns:

episode 4294 start
episode columns: [6, 4, 0, 11, 2, 8, 3, 10, 5, 7, 1, 9] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[5, 4, 8, 1, 7, 10, 3, 2, 6, 9] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4294 end
episode 4295 start
episode columns: [5, 0, 6, 7, 1, 3, 9, 2, 8, 4, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 4, 8, 1, 7, 10, 3, 2, 6, 9] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4295 end
episode 4296 start
episode columns: [8, 9, 10, 11, 7, 3, 0, 2, 4, 6, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 4, 8, 1, 7, 10, 3, 2, 6, 9] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 4296 end
episode 4297 start
episode columns: [1, 11, 6, 2, 5, 3, 7, 10, 0, 9, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 4, 8, 1, 7, 10, 3, 2, 6, 9] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 4297 end
episode 4298 start
ep

episode 4328 start
episode columns: [5, 4, 11, 1, 0, 7, 10, 2, 3, 9, 8, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 4, 10, 3, 2, 8, 1, 7, 0, 11, 9, 6] train accuracy: 0.619703703704 test accuracy: 0.584833333333
episode 4328 end
episode 4329 start
episode columns: [5, 1, 2, 6, 10, 9, 7, 3, 4, 8, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[5, 4, 10, 3, 2, 8, 11, 9, 6, 7, 0, 1] train accuracy: 0.619777777778 test accuracy: 0.582333333333
episode 4329 end
episode 4330 start
episode columns: [2, 1, 5, 6, 0, 9, 10, 4, 8, 11, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 4, 10, 3, 2, 8, 11, 9, 6, 7, 0, 1] train accuracy: 0.620388888889 test accuracy: 0.59
episode 4330 end
episode 4331 start
episode columns: [5, 2, 8, 11, 4, 10, 0, 7, 3, 1, 6, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 4, 9, 8, 11, 10, 3, 2, 6, 7, 0, 1] train accuracy: 0.619740740741 test accuracy: 0.588333333333
episode 4331 end
episod

episode 4361 start
episode columns: [4, 9, 10, 7, 6, 3, 2, 8, 0, 11, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 7, 0, 8, 1, 2, 11, 4, 6, 3, 5, 9] train accuracy: 0.61962962963 test accuracy: 0.587833333333
episode 4361 end
episode 4362 start
episode columns: [10, 4, 6, 2, 5, 1, 8, 11, 9, 3, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[10, 7, 0, 8, 11, 4, 6, 1, 2, 9, 5, 3] train accuracy: 0.620481481481 test accuracy: 0.582333333333
episode 4362 end
episode 4363 start
episode columns: [1, 10, 7, 8, 4, 0, 5, 6, 3, 2, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 3, 7, 0, 8, 11, 4, 6, 1, 2, 9, 5] train accuracy: 0.619537037037 test accuracy: 0.584166666667
episode 4363 end
episode 4364 start
episode columns: [6, 8, 0, 10, 2, 1, 7, 3, 11, 9, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[10, 3, 7, 0, 8, 11, 4, 6, 1, 2, 9, 5] train accuracy: 0.620055555556 test accuracy: 0.586833333333
episode 4364 e

episode 4395 start
episode columns: [8, 11, 5, 2, 3, 7, 4, 6, 0, 9, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 3, 10, 7, 11, 4, 1, 2, 9, 8] train accuracy: 0.689222222222 test accuracy: 0.664166666667
episode 4395 end
episode 4396 start
episode columns: [6, 0, 11, 2, 5, 1, 9, 10, 8, 4, 3, 7] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[6, 3, 7, 11, 4, 1, 2, 10, 9, 8] train accuracy: 0.689185185185 test accuracy: 0.664333333333
episode 4396 end
episode 4397 start
episode columns: [5, 11, 2, 10, 3, 7, 0, 1, 8, 9, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 3, 7, 11, 4, 1, 2, 10, 9, 8] train accuracy: 0.689259259259 test accuracy: 0.664
episode 4397 end
episode 4398 start
episode columns: [6, 10, 7, 2, 4, 1, 8, 5, 11, 3, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 3, 7, 11, 4, 1, 2, 10, 9, 8] train accuracy: 0.689166666667 test accuracy: 0.664833333333
episode 4398 end
episode 4399 start
episode 

episode 4429 start
episode columns: [2, 10, 9, 4, 8, 7, 11, 3, 0, 6, 1, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 5, 3, 7, 0, 1, 2, 9, 8, 4, 10] train accuracy: 0.677259259259 test accuracy: 0.618
episode 4429 end
episode 4430 start
episode columns: [5, 2, 9, 10, 0, 8, 7, 1, 11, 6, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 5, 3, 7, 0, 1, 2, 9, 8, 4, 10] train accuracy: 0.677240740741 test accuracy: 0.617333333333
episode 4430 end
episode 4431 start
episode columns: [1, 0, 6, 5, 3, 10, 2, 8, 7, 11, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 2, 9, 8, 1, 10, 7, 0, 4, 3, 5] train accuracy: 0.677185185185 test accuracy: 0.616333333333
episode 4431 end
episode 4432 start
episode columns: [10, 3, 0, 4, 5, 8, 7, 2, 9, 6, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 2, 9, 8, 1, 10, 7, 0, 4, 3, 5] train accuracy: 0.677185185185 test accuracy: 0.616166666667
episode 4432 end
episode 4433 start


episode 4463 start
episode columns: [7, 9, 10, 1, 11, 8, 6, 4, 3, 5, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 8, 1, 10, 9, 6, 3, 2, 5] train accuracy: 0.695425925926 test accuracy: 0.683333333333
episode 4463 end
episode 4464 start
episode columns: [6, 0, 10, 2, 5, 9, 7, 3, 1, 8, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 8, 1, 10, 9, 6, 3, 2, 5] train accuracy: 0.695425925926 test accuracy: 0.683333333333
episode 4464 end
episode 4465 start
episode columns: [0, 1, 5, 9, 2, 3, 11, 6, 8, 7, 10, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 8, 1, 10, 9, 6, 3, 2, 5] train accuracy: 0.695425925926 test accuracy: 0.683333333333
episode 4465 end
episode 4466 start
episode columns: [4, 2, 11, 7, 3, 6, 9, 10, 0, 5, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 8, 1, 10, 9, 6, 3, 2, 11, 4] train accuracy: 0.689259259259 test accuracy: 0.664166666667
episode 4466 end
episode 4467 start
episode co

episode 4498 start
episode columns: [5, 11, 0, 10, 2, 1, 3, 6, 8, 7, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[1, 10, 8, 6, 3, 7, 11, 4, 9] train accuracy: 0.692388888889 test accuracy: 0.674166666667
episode 4498 end
episode 4499 start
episode columns: [0, 6, 8, 2, 1, 11, 3, 9, 7, 4, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 10, 8, 6, 3, 7, 11, 4, 9] train accuracy: 0.692388888889 test accuracy: 0.674166666667
episode 4499 end
episode 4500 start
episode columns: [1, 2, 3, 7, 4, 11, 6, 0, 9, 10, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 2, 7, 11, 6, 3, 0, 1, 10, 8, 4, 9] train accuracy: 0.619907407407 test accuracy: 0.583333333333
episode 4500 end
episode 4501 start
episode columns: [6, 9, 0, 2, 7, 3, 1, 11, 10, 5, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 2, 7, 11, 6, 3, 0, 1, 10, 8, 9, 4] train accuracy: 0.620277777778 test accuracy: 0.585333333333
episode 4501 end
episode 4502 s

episode 4532 start
episode columns: [4, 8, 11, 1, 0, 6, 7, 5, 3, 9, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 9, 6, 2, 1, 10, 8, 3, 0, 5, 11, 4] train accuracy: 0.619240740741 test accuracy: 0.5835
episode 4532 end
episode 4533 start
episode columns: [3, 10, 6, 7, 2, 4, 0, 9, 5, 8, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 9, 6, 2, 1, 10, 8, 3, 0, 5, 11, 4] train accuracy: 0.619037037037 test accuracy: 0.586
episode 4533 end
episode 4534 start
episode columns: [2, 11, 3, 7, 6, 1, 10, 9, 8, 5, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 9, 6, 2, 1, 10, 8, 3, 0, 5, 11, 4] train accuracy: 0.620203703704 test accuracy: 0.582833333333
episode 4534 end
episode 4535 start
episode columns: [8, 3, 10, 6, 9, 4, 11, 0, 5, 1, 7, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[7, 9, 6, 2, 1, 10, 8, 3, 0, 5, 11, 4] train accuracy: 0.620351851852 test accuracy: 0.583666666667
episode 4535 end
episode 453

episode 4566 start
episode columns: [0, 2, 4, 8, 9, 1, 7, 11, 3, 5, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[2, 1, 5, 11, 7, 9, 6, 3, 0, 8] train accuracy: 0.631814814815 test accuracy: 0.583166666667
episode 4566 end
episode 4567 start
episode columns: [3, 1, 5, 0, 2, 7, 11, 10, 9, 6, 4, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[2, 1, 5, 11, 7, 9, 6, 3, 0, 8] train accuracy: 0.631833333333 test accuracy: 0.584333333333
episode 4567 end
episode 4568 start
episode columns: [9, 7, 6, 3, 1, 0, 4, 8, 11, 10, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[2, 1, 5, 11, 7, 9, 6, 3, 0, 8] train accuracy: 0.631611111111 test accuracy: 0.583833333333
episode 4568 end
episode 4569 start
episode columns: [3, 7, 4, 10, 0, 8, 6, 2, 1, 11, 9, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 5, 11, 7, 9, 6, 8] train accuracy: 0.692259259259 test accuracy: 0.676
episode 4569 end
episode 4570 start
episode columns: [

episode 4600 start
episode columns: [4, 3, 6, 1, 10, 0, 5, 2, 7, 9, 11, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 4, 8, 6, 5, 9, 7, 0, 11] train accuracy: 0.657296296296 test accuracy: 0.601166666667
episode 4600 end
episode 4601 start
episode columns: [2, 11, 0, 4, 8, 1, 7, 6, 3, 10, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 1, 4, 6, 8, 9, 7, 0, 11] train accuracy: 0.665648148148 test accuracy: 0.6065
episode 4601 end
episode 4602 start
episode columns: [7, 9, 5, 10, 11, 2, 8, 1, 6, 0, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 1, 4, 6, 8, 9, 7, 0, 11] train accuracy: 0.6655 test accuracy: 0.605166666667
episode 4602 end
episode 4603 start
episode columns: [3, 5, 2, 4, 1, 8, 9, 0, 11, 7, 6, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 1, 4, 6, 8, 9, 7, 0, 5] train accuracy: 0.692907407407 test accuracy: 0.669333333333
episode 4603 end
episode 4604 start
episode columns: [0, 11, 10, 

episode 4635 start
episode columns: [3, 6, 11, 9, 10, 7, 5, 2, 1, 0, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 7, 1, 2, 9, 6, 4, 8] train accuracy: 0.695666666667 test accuracy: 0.686
episode 4635 end
episode 4636 start
episode columns: [10, 3, 1, 2, 5, 0, 9, 4, 8, 7, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 7, 1, 4, 6, 9, 0, 11] train accuracy: 0.669648148148 test accuracy: 0.617333333333
episode 4636 end
episode 4637 start
episode columns: [0, 9, 11, 8, 6, 3, 7, 1, 2, 5, 4, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 7, 1, 4, 6, 9, 0, 11] train accuracy: 0.6695 test accuracy: 0.6155
episode 4637 end
episode 4638 start
episode columns: [3, 5, 0, 11, 1, 2, 10, 8, 9, 7, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 7, 1, 4, 6, 9, 0, 11] train accuracy: 0.669574074074 test accuracy: 0.615833333333
episode 4638 end
episode 4639 start
episode columns: [9, 3, 11, 6, 4, 0, 1, 8, 7, 2, 10] 

episode 4670 start
episode columns: [6, 7, 11, 8, 1, 4, 3, 0, 10, 5, 9, 2] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 3, 1, 5, 6, 9, 8, 7, 0, 11] train accuracy: 0.657518518519 test accuracy: 0.601166666667
episode 4670 end
episode 4671 start
episode columns: [11, 5, 7, 3, 6, 9, 8, 10, 1, 2, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 3, 1, 5, 6, 9, 8, 7, 0, 11] train accuracy: 0.657407407407 test accuracy: 0.600833333333
episode 4671 end
episode 4672 start
episode columns: [10, 9, 4, 7, 11, 6, 0, 5, 2, 1, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 3, 1, 5, 6, 9, 8, 7, 0, 11] train accuracy: 0.65737037037 test accuracy: 0.601166666667
episode 4672 end
episode 4673 start
episode columns: [9, 10, 1, 8, 5, 3, 11, 0, 2, 7, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 3, 1, 5, 6, 9, 8, 7, 0, 11] train accuracy: 0.656907407407 test accuracy: 0.600333333333
episode 4673 end
episode 4674 start
episo

episode 4704 start
episode columns: [2, 8, 4, 1, 11, 9, 0, 5, 6, 10, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 10, 0, 1, 7, 9, 8, 6, 4, 5, 2] train accuracy: 0.677314814815 test accuracy: 0.616333333333
episode 4704 end
episode 4705 start
episode columns: [8, 7, 0, 3, 10, 4, 1, 6, 2, 5, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 10, 0, 1, 7, 9, 8, 6, 4, 5, 2] train accuracy: 0.677111111111 test accuracy: 0.616666666667
episode 4705 end
episode 4706 start
episode columns: [4, 9, 11, 3, 1, 8, 7, 2, 0, 6, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[3, 10, 0, 1, 7, 9, 8, 6, 4, 5, 2] train accuracy: 0.677148148148 test accuracy: 0.617
episode 4706 end
episode 4707 start
episode columns: [7, 11, 3, 10, 9, 8, 6, 5, 4, 1, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 0, 1, 7, 9, 8, 6, 4, 5, 2] train accuracy: 0.677203703704 test accuracy: 0.616666666667
episode 4707 end
episode 4708 star

episode 4738 start
episode columns: [3, 5, 7, 0, 1, 6, 10, 4, 2, 9, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 7, 9, 8, 6, 1, 11, 0, 5, 2, 4] train accuracy: 0.61987037037 test accuracy: 0.588
episode 4738 end
episode 4739 start
episode columns: [4, 10, 9, 11, 3, 7, 2, 5, 8, 1, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 10, 7, 2, 9, 8, 6, 1, 11, 0, 5, 4] train accuracy: 0.619203703704 test accuracy: 0.589166666667
episode 4739 end
episode 4740 start
episode columns: [1, 3, 4, 11, 5, 2, 0, 9, 10, 7, 6, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 10, 7, 2, 9, 8, 6, 1, 11, 0, 5, 4] train accuracy: 0.619537037037 test accuracy: 0.585
episode 4740 end
episode 4741 start
episode columns: [10, 7, 2, 4, 5, 1, 11, 8, 6, 0, 3, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 10, 7, 2, 9, 8, 6, 1, 11, 0, 5, 4] train accuracy: 0.619685185185 test accuracy: 0.584
episode 4741 end
episode 4742 start
ep

episode 4772 start
episode columns: [0, 1, 8, 11, 4, 10, 7, 2, 5, 3, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 6, 3, 10, 4, 5, 8, 1, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4772 end
episode 4773 start
episode columns: [7, 9, 8, 11, 1, 5, 3, 10, 0, 2, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 6, 3, 10, 4, 5, 8, 1, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4773 end
episode 4774 start
episode columns: [0, 8, 11, 2, 3, 9, 6, 10, 7, 1, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[7, 6, 3, 10, 4, 5, 8, 1, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4774 end
episode 4775 start
episode columns: [9, 5, 3, 6, 8, 11, 10, 7, 2, 0, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 6, 3, 10, 4, 5, 8, 1, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4775 end
episode 4776 start
episode column

episode 4806 start
episode columns: [11, 2, 5, 7, 9, 10, 0, 6, 1, 3, 8, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 5, 10, 4, 2, 3, 9, 8, 6] train accuracy: 0.695888888889 test accuracy: 0.687
episode 4806 end
episode 4807 start
episode columns: [7, 6, 9, 11, 3, 10, 4, 2, 5, 0, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[7, 5, 10, 4, 2, 3, 9, 8, 6] train accuracy: 0.695888888889 test accuracy: 0.686833333333
episode 4807 end
episode 4808 start
episode columns: [5, 10, 3, 2, 11, 0, 6, 1, 4, 9, 8, 7] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 5, 10, 4, 2, 3, 9, 8, 6, 1, 0, 11] train accuracy: 0.619537037037 test accuracy: 0.5855
episode 4808 end
episode 4809 start
episode columns: [3, 10, 2, 9, 6, 7, 4, 1, 11, 0, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 5, 8, 3, 10, 4, 2, 9] train accuracy: 0.696148148148 test accuracy: 0.6865
episode 4809 end
episode 4810 start
episode columns: [4, 5, 9, 10, 0,

episode 4842 start
episode columns: [8, 3, 4, 1, 7, 11, 2, 9, 6, 5, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 5, 8, 3, 9, 6, 4, 2, 11, 0, 10] train accuracy: 0.633907407407 test accuracy: 0.5955
episode 4842 end
episode 4843 start
episode columns: [4, 1, 8, 2, 10, 7, 11, 5, 6, 9, 0, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 5, 8, 3, 9, 6, 4, 2, 10, 0, 11, 1] train accuracy: 0.619740740741 test accuracy: 0.586833333333
episode 4843 end
episode 4844 start
episode columns: [3, 10, 1, 11, 2, 4, 9, 0, 6, 8, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[7, 5, 8, 3, 9, 6, 4, 2, 10, 0, 11, 1] train accuracy: 0.619 test accuracy: 0.586166666667
episode 4844 end
episode 4845 start
episode columns: [0, 7, 4, 6, 8, 10, 9, 2, 5, 1, 3, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 5, 8, 10, 4, 2, 3, 9, 6, 1] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4845 end
episode 4846 start
e

episode 4877 start
episode columns: [0, 11, 2, 9, 10, 7, 5, 3, 8, 1, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 2, 3, 5, 8, 7, 9, 6] train accuracy: 0.696074074074 test accuracy: 0.6885
episode 4877 end
episode 4878 start
episode columns: [4, 11, 3, 6, 8, 2, 0, 5, 1, 9, 10, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 2, 3, 5, 8, 7, 9, 6] train accuracy: 0.696074074074 test accuracy: 0.6885
episode 4878 end
episode 4879 start
episode columns: [3, 5, 0, 7, 9, 1, 8, 2, 11, 10, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 2, 3, 5, 8, 7, 9, 6] train accuracy: 0.696074074074 test accuracy: 0.6885
episode 4879 end
episode 4880 start
episode columns: [4, 2, 0, 1, 8, 11, 10, 6, 7, 9, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 2, 3, 5, 8, 7, 9, 6] train accuracy: 0.696074074074 test accuracy: 0.6885
episode 4880 end
episode 4881 start
episode columns: [0, 4, 2, 10, 11, 5, 8, 7, 6, 1, 3, 9] epsilon

episode 4912 start
episode columns: [3, 6, 5, 1, 8, 10, 4, 2, 7, 11, 0, 9] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[4, 5, 7, 9, 8, 10, 6, 3, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4912 end
episode 4913 start
episode columns: [11, 3, 5, 0, 9, 6, 1, 8, 10, 2, 4, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 5, 7, 9, 8, 10, 6, 3, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 4913 end
episode 4914 start
episode columns: [2, 10, 0, 11, 9, 8, 5, 6, 4, 7, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 5, 7, 9, 8, 10, 0, 11, 6, 3, 1] train accuracy: 0.647648148148 test accuracy: 0.596833333333
episode 4914 end
episode 4915 start
episode columns: [2, 10, 7, 0, 5, 8, 3, 11, 6, 4, 1, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[4, 5, 7, 9, 8, 10, 0, 11, 6, 3, 1] train accuracy: 0.647314814815 test accuracy: 0.597
episode 4915 end
episode 4916 start
episode col

episode 4946 start
episode columns: [2, 6, 3, 5, 1, 9, 7, 10, 8, 0, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 1, 9, 2, 11, 10, 7, 5, 4, 6] train accuracy: 0.68687037037 test accuracy: 0.658333333333
episode 4946 end
episode 4947 start
episode columns: [7, 9, 0, 10, 2, 4, 8, 6, 5, 1, 3, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 1, 9, 2, 11, 10, 7, 5, 4, 6] train accuracy: 0.686925925926 test accuracy: 0.657833333333
episode 4947 end
episode 4948 start
episode columns: [3, 1, 2, 9, 6, 7, 5, 11, 4, 10, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 1, 9, 2, 11, 10, 7, 5, 4, 6] train accuracy: 0.686907407407 test accuracy: 0.657666666667
episode 4948 end
episode 4949 start
episode columns: [1, 9, 4, 6, 8, 3, 10, 2, 11, 5, 7, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 1, 9, 2, 11, 10, 7, 5, 4, 6] train accuracy: 0.686851851852 test accuracy: 0.658666666667
episode 4949 end
episode 4950 start
ep

episode 4980 start
episode columns: [5, 10, 7, 9, 11, 1, 8, 3, 0, 6, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 2, 4, 8, 10, 5, 1, 9, 7, 6, 11] train accuracy: 0.684222222222 test accuracy: 0.654833333333
episode 4980 end
episode 4981 start
episode columns: [3, 11, 0, 4, 8, 2, 10, 5, 6, 1, 9, 7] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 2, 4, 7, 10, 5, 1, 9, 8, 6, 11, 0] train accuracy: 0.620185185185 test accuracy: 0.584166666667
episode 4981 end
episode 4982 start
episode columns: [10, 2, 6, 11, 7, 8, 9, 1, 3, 4, 5, 0] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 2, 4, 5, 1, 9, 8, 10, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 4982 end
episode 4983 start
episode columns: [7, 2, 1, 8, 10, 5, 0, 9, 6, 11, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 2, 4, 5, 0, 8, 7, 10, 6, 1, 9] train accuracy: 0.677166666667 test accuracy: 0.617666666667
episode 4983 end
episode 49

episode 5014 start
episode columns: [1, 0, 3, 9, 2, 4, 7, 6, 10, 5, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 2, 11, 6, 4, 9, 1, 8, 7, 10, 5, 0] train accuracy: 0.619888888889 test accuracy: 0.584666666667
episode 5014 end
episode 5015 start
episode columns: [8, 4, 5, 3, 11, 0, 9, 2, 6, 10, 7] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[3, 2, 11, 6, 4, 9, 1, 8, 7, 10, 5, 0] train accuracy: 0.619259259259 test accuracy: 0.586666666667
episode 5015 end
episode 5016 start
episode columns: [9, 5, 4, 6, 8, 2, 10, 11, 0, 7, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[3, 2, 11, 6, 4, 9, 1, 8, 7, 10, 5, 0] train accuracy: 0.619462962963 test accuracy: 0.5845
episode 5016 end
episode 5017 start
episode columns: [11, 3, 9, 6, 10, 7, 5, 4, 8, 0, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[3, 2, 11, 6, 10, 8, 7, 9, 1, 5, 4] train accuracy: 0.684148148148 test accuracy: 0.654333333333
episode 5017 end
episode 5

episode 5048 start
episode columns: [3, 2, 5, 4, 9, 10, 6, 1, 8, 7, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 2, 4, 5, 10, 8, 6, 11, 7, 9, 1] train accuracy: 0.684111111111 test accuracy: 0.654833333333
episode 5048 end
episode 5049 start
episode columns: [2, 6, 8, 9, 4, 7, 11, 3, 10, 5, 0, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 2, 4, 5, 10, 8, 6, 11, 7, 9, 1] train accuracy: 0.68412962963 test accuracy: 0.6545
episode 5049 end
episode 5050 start
episode columns: [7, 3, 1, 11, 4, 5, 9, 8, 2, 0, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 2, 4, 5, 10, 8, 6, 11, 7, 9, 1] train accuracy: 0.684314814815 test accuracy: 0.654
episode 5050 end
episode 5051 start
episode columns: [3, 6, 8, 5, 10, 2, 4, 9, 0, 1, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 6, 2, 4, 3, 7, 9, 1, 10, 5] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 5051 end
episode 5052 start
episode 

episode 5082 start
episode columns: [9, 8, 1, 11, 6, 7, 0, 4, 5, 10, 2, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 9, 5, 10, 7, 6, 2, 11, 4, 1] train accuracy: 0.684166666667 test accuracy: 0.654833333333
episode 5082 end
episode 5083 start
episode columns: [9, 7, 6, 4, 2, 0, 10, 3, 8, 1, 11, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 3, 9, 5, 10, 7, 6, 2, 11, 4, 1] train accuracy: 0.684037037037 test accuracy: 0.653833333333
episode 5083 end
episode 5084 start
episode columns: [6, 2, 5, 4, 11, 8, 9, 7, 3, 0, 10, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 3, 9, 5, 10, 7, 6, 2, 11, 4, 1] train accuracy: 0.684166666667 test accuracy: 0.654166666667
episode 5084 end
episode 5085 start
episode columns: [6, 3, 0, 9, 8, 5, 7, 2, 10, 4, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[8, 3, 9, 5, 10, 7, 6, 2, 11, 4] train accuracy: 0.687777777778 test accuracy: 0.663333333333
episode 5085 end
episode 5

episode 5116 start
episode columns: [1, 3, 6, 8, 11, 4, 7, 9, 5, 10, 2, 0] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[8, 6, 2, 11, 3, 5, 10, 9, 7, 4, 1, 0] train accuracy: 0.619944444444 test accuracy: 0.588
episode 5116 end
episode 5117 start
episode columns: [7, 9, 2, 1, 5, 6, 10, 0, 4, 8, 3, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 6, 2, 11, 3, 5, 10, 9, 7, 4, 1, 0] train accuracy: 0.620574074074 test accuracy: 0.583833333333
episode 5117 end
episode 5118 start
episode columns: [7, 0, 4, 2, 3, 8, 6, 1, 9, 11, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 6, 2, 11, 3, 5, 10, 9, 7, 4, 1, 0] train accuracy: 0.6195 test accuracy: 0.585833333333
episode 5118 end
episode 5119 start
episode columns: [7, 6, 2, 3, 9, 5, 10, 1, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 6, 2, 11, 3, 5, 10, 1, 9, 7, 4] train accuracy: 0.684111111111 test accuracy: 0.655166666667
episode 5119 end
episode 5120 start
ep

episode 5150 start
episode columns: [8, 0, 2, 4, 1, 7, 5, 3, 6, 9, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 7, 10, 5, 1, 6, 11, 4, 3, 9] train accuracy: 0.690722222222 test accuracy: 0.6735
episode 5150 end
episode 5151 start
episode columns: [2, 5, 11, 4, 1, 7, 10, 0, 6, 3, 9, 8] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 7, 10, 5, 1, 6, 11, 4, 3, 9] train accuracy: 0.690740740741 test accuracy: 0.673666666667
episode 5151 end
episode 5152 start
episode columns: [8, 0, 2, 4, 1, 7, 10, 9, 3, 6, 11, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 7, 10, 5, 1, 6, 2, 4, 3, 9, 11] train accuracy: 0.684203703704 test accuracy: 0.654333333333
episode 5152 end
episode 5153 start
episode columns: [3, 4, 0, 2, 7, 8, 1, 5, 6, 9, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 7, 10, 5, 1, 6, 2, 4, 3, 9] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 5153 end
episode 5154 start
episo

episode 5184 start
episode columns: [3, 4, 11, 1, 0, 6, 10, 2, 9, 5, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 6, 11, 4, 3, 9, 7, 10, 5, 1] train accuracy: 0.690722222222 test accuracy: 0.673333333333
episode 5184 end
episode 5185 start
episode columns: [3, 2, 4, 11, 7, 6, 10, 5, 8, 1, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[8, 6, 11, 4, 3, 9, 7, 10, 5, 1] train accuracy: 0.690796296296 test accuracy: 0.6735
episode 5185 end
episode 5186 start
episode columns: [4, 9, 11, 6, 3, 1, 7, 2, 5, 10, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 6, 11, 4, 3, 9, 7, 10, 5, 1] train accuracy: 0.690703703704 test accuracy: 0.674
episode 5186 end
episode 5187 start
episode columns: [8, 1, 3, 9, 2, 7, 11, 5, 0, 4, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 6, 11, 4, 3, 9, 7, 10, 5, 1] train accuracy: 0.690777777778 test accuracy: 0.674166666667
episode 5187 end
episode 5188 start
episode columns

episode 5218 start
episode columns: [10, 11, 4, 7, 9, 3, 5, 2, 0, 8, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 7, 0, 8, 5, 3, 6, 10, 1, 4] train accuracy: 0.689851851852 test accuracy: 0.669333333333
episode 5218 end
episode 5219 start
episode columns: [9, 1, 2, 11, 7, 3, 5, 0, 4, 10, 8, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 7, 0, 8, 5, 3, 6, 10, 1, 4] train accuracy: 0.68987037037 test accuracy: 0.669166666667
episode 5219 end
episode 5220 start
episode columns: [2, 10, 3, 6, 11, 9, 5, 0, 1, 7, 4, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 7, 0, 8, 5, 3, 6, 10, 1, 4] train accuracy: 0.689777777778 test accuracy: 0.669
episode 5220 end
episode 5221 start
episode columns: [5, 3, 2, 1, 7, 9, 4, 8, 10, 11, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 5, 3, 6, 10, 1, 7, 0, 8, 2, 11] train accuracy: 0.619518518519 test accuracy: 0.586
episode 5221 end
episode 5222 start
episode columns: 

episode 5252 start
episode columns: [10, 1, 0, 8, 6, 11, 7, 2, 9, 3, 5, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 4, 5, 3, 6, 10, 1, 7, 0, 8, 2, 11] train accuracy: 0.619444444444 test accuracy: 0.588666666667
episode 5252 end
episode 5253 start
episode columns: [1, 3, 5, 9, 7, 2, 0, 11, 4, 6, 8, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 4, 5, 3, 6, 10, 1, 7, 0, 8, 2, 11] train accuracy: 0.619666666667 test accuracy: 0.585166666667
episode 5253 end
episode 5254 start
episode columns: [5, 1, 11, 0, 8, 6, 10, 4, 3, 7, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 4, 5, 3, 6, 10, 1, 7, 0, 11, 8, 2] train accuracy: 0.620259259259 test accuracy: 0.584166666667
episode 5254 end
episode 5255 start
episode columns: [4, 5, 0, 3, 2, 6, 10, 8, 9, 7, 11, 1] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 4, 5, 3, 6, 2, 7, 0, 11, 10, 1, 8] train accuracy: 0.619888888889 test accuracy: 0.5835
episode 5255 end
epis

episode 5286 start
episode columns: [1, 7, 11, 2, 5, 4, 3, 0, 10, 8, 6, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 5, 8, 2, 4, 1, 7, 9, 0, 10] train accuracy: 0.681203703704 test accuracy: 0.6205
episode 5286 end
episode 5287 start
episode columns: [8, 7, 4, 9, 1, 5, 2, 6, 3, 0, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[8, 2, 4, 5, 3, 9, 7, 1, 6, 11, 0, 10] train accuracy: 0.619796296296 test accuracy: 0.586166666667
episode 5287 end
episode 5288 start
episode columns: [1, 5, 3, 10, 9, 8, 11, 7, 4, 2, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 2, 4, 5, 3, 9, 7, 1, 6, 11, 0, 10] train accuracy: 0.619796296296 test accuracy: 0.586833333333
episode 5288 end
episode 5289 start
episode columns: [9, 3, 5, 11, 4, 7, 1, 2, 6, 8, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[8, 2, 4, 5, 3, 9, 7, 1, 6, 11, 0, 10] train accuracy: 0.619425925926 test accuracy: 0.583333333333
episode 5289 end
episode 5

episode 5320 start
episode columns: [7, 6, 2, 0, 5, 11, 1, 3, 4, 8, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[7, 1, 6, 9, 8, 5, 3, 4, 2, 11, 10] train accuracy: 0.684166666667 test accuracy: 0.6545
episode 5320 end
episode 5321 start
episode columns: [7, 8, 2, 4, 3, 9, 5, 10, 0, 6, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 1, 6, 9, 8, 2, 4, 5, 3] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 5321 end
episode 5322 start
episode columns: [7, 1, 10, 6, 8, 0, 11, 3, 5, 2, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 1, 6, 9, 8, 2, 4, 5, 3] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 5322 end
episode 5323 start
episode columns: [3, 6, 10, 0, 11, 7, 1, 5, 9, 8, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[7, 1, 6, 9, 3, 5, 8, 2, 4, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 5323 end
episode 5324 start
episode columns: [6, 0, 11, 10, 

episode 5355 start
episode columns: [3, 10, 1, 2, 8, 4, 11, 5, 9, 0, 6, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 1, 4, 10, 6, 9, 0, 5, 3] train accuracy: 0.691203703704 test accuracy: 0.6735
episode 5355 end
episode 5356 start
episode columns: [7, 5, 3, 9, 0, 11, 8, 1, 10, 6, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 7, 1, 4, 10, 6, 9, 0, 11, 3, 5] train accuracy: 0.64737037037 test accuracy: 0.595333333333
episode 5356 end
episode 5357 start
episode columns: [9, 5, 1, 4, 6, 11, 2, 3, 7, 10, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[8, 7, 1, 4, 10, 6, 9, 0, 11, 3, 5] train accuracy: 0.647574074074 test accuracy: 0.596666666667
episode 5357 end
episode 5358 start
episode columns: [7, 1, 0, 3, 8, 9, 2, 5, 11, 10, 4, 6] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[8, 7, 1, 4, 10, 6, 9, 0, 11, 3, 5] train accuracy: 0.647314814815 test accuracy: 0.599666666667
episode 5358 end
episode 5359 start
e

episode 5389 start
episode columns: [5, 9, 1, 10, 4, 3, 7, 6, 11, 0, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 6, 9, 1, 4, 5, 3, 10, 8, 2, 11] train accuracy: 0.68437037037 test accuracy: 0.654666666667
episode 5389 end
episode 5390 start
episode columns: [0, 2, 3, 7, 6, 11, 9, 1, 10, 8, 5, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 6, 9, 1, 4, 5, 3, 10, 8, 2, 11] train accuracy: 0.684277777778 test accuracy: 0.654333333333
episode 5390 end
episode 5391 start
episode columns: [0, 10, 4, 11, 3, 7, 2, 1, 8, 9, 6, 5] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[7, 6, 9, 1, 4, 5, 3, 10, 8, 2, 11] train accuracy: 0.684259259259 test accuracy: 0.654166666667
episode 5391 end
episode 5392 start
episode columns: [8, 1, 3, 6, 10, 4, 9, 0, 11, 7, 5, 2] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[7, 6, 9, 1, 4, 5, 3, 10, 8, 2, 11] train accuracy: 0.684037037037 test accuracy: 0.655333333333
episode 5392 end
episode

episode 5423 start
episode columns: [1, 5, 8, 7, 10, 3, 11, 2, 6, 4, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 8, 2, 9, 1, 4, 5, 3, 10, 6, 11, 0] train accuracy: 0.62 test accuracy: 0.586166666667
episode 5423 end
episode 5424 start
episode columns: [7, 6, 11, 0, 5, 1, 4, 10, 9, 2, 8, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 8, 2, 9, 1, 4, 5, 3, 10, 6, 11] train accuracy: 0.684277777778 test accuracy: 0.654166666667
episode 5424 end
episode 5425 start
episode columns: [7, 11, 1, 4, 5, 9, 2, 0, 10, 8, 3, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 2, 7, 4, 5, 9, 1, 6, 11, 3, 10] train accuracy: 0.684166666667 test accuracy: 0.6545
episode 5425 end
episode 5426 start
episode columns: [11, 5, 10, 0, 1, 7, 9, 3, 4, 8, 6, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[8, 2, 7, 4, 5, 9, 1, 6, 11, 3, 10] train accuracy: 0.684148148148 test accuracy: 0.655333333333
episode 5426 end
episode 5427 start
e

episode 5457 start
episode columns: [11, 5, 7, 3, 1, 0, 6, 4, 10, 2, 8, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[8, 9, 7, 4, 5, 1, 6, 10, 3, 2, 0, 11] train accuracy: 0.619648148148 test accuracy: 0.583833333333
episode 5457 end
episode 5458 start
episode columns: [8, 10, 5, 11, 0, 3, 2, 9, 7, 4, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 9, 7, 4, 5, 1, 3, 2, 6, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 5458 end
episode 5459 start
episode columns: [2, 0, 5, 7, 11, 4, 1, 3, 10, 8, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 9, 7, 4, 5, 1, 3, 2, 6, 10] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 5459 end
episode 5460 start
episode columns: [8, 11, 6, 1, 10, 9, 2, 5, 7, 3, 0, 4] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[7, 8, 9, 1, 4, 5, 3, 2, 6, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 5460 end
episode 5461 s

episode 5491 start
episode columns: [3, 5, 1, 0, 4, 10, 8, 6, 11, 7, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 4, 5, 8, 2, 7, 6, 0, 11, 3, 10] train accuracy: 0.619611111111 test accuracy: 0.584833333333
episode 5491 end
episode 5492 start
episode columns: [10, 2, 1, 7, 5, 9, 6, 0, 4, 11, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 1, 4, 5, 8, 2, 7, 6, 0, 11, 3, 10] train accuracy: 0.61962962963 test accuracy: 0.586333333333
episode 5492 end
episode 5493 start
episode columns: [8, 3, 5, 9, 2, 0, 11, 6, 4, 1, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 1, 4, 5, 8, 2, 7, 6, 0, 11, 3, 10] train accuracy: 0.619703703704 test accuracy: 0.583
episode 5493 end
episode 5494 start
episode columns: [4, 9, 2, 7, 11, 0, 3, 5, 1, 6, 10, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 1, 4, 5, 8, 2, 7, 6, 0, 11, 3, 10] train accuracy: 0.619759259259 test accuracy: 0.586
episode 5494 end
episode 5495

episode 5525 start
episode columns: [8, 3, 7, 6, 10, 2, 9, 4, 5, 11, 0, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 4, 7, 6, 10, 8, 3, 1, 2, 9] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 5525 end
episode 5526 start
episode columns: [0, 9, 3, 6, 1, 2, 11, 4, 5, 7, 10, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 4, 7, 10, 3, 1, 2, 9] train accuracy: 0.696185185185 test accuracy: 0.6845
episode 5526 end
episode 5527 start
episode columns: [2, 11, 0, 5, 10, 3, 9, 8, 6, 4, 7, 1] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[5, 4, 7, 10, 3, 1, 2, 9, 8, 6, 0, 11] train accuracy: 0.619592592593 test accuracy: 0.585666666667
episode 5527 end
episode 5528 start
episode columns: [1, 3, 4, 8, 7, 10, 11, 2, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.2
episode policy:[5, 4, 7, 10, 3, 1, 2, 9, 8, 6, 0, 11] train accuracy: 0.619740740741 test accuracy: 0.584
episode 5528 end
episode 5529 start
episode columns: 

episode 5559 start
episode columns: [3, 11, 0, 9, 2, 10, 6, 5, 4, 7, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 1, 7, 4, 6, 9, 8, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 5559 end
episode 5560 start
episode columns: [5, 6, 4, 1, 3, 8, 7, 11, 2, 9, 0, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 1, 7, 4, 6, 9, 8, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 5560 end
episode 5561 start
episode columns: [11, 6, 9, 0, 2, 3, 4, 5, 8, 10, 1, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 1, 7, 4, 6, 9, 8, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 5561 end
episode 5562 start
episode columns: [10, 1, 11, 2, 4, 3, 8, 0, 5, 7, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 1, 7, 4, 6, 9, 8, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 5562 end
episode 5563 start
episode columns

episode 5593 start
episode columns: [10, 8, 11, 2, 6, 4, 9, 1, 0, 3, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[8, 4, 5, 1, 7, 6, 9, 0, 2, 3, 10] train accuracy: 0.677111111111 test accuracy: 0.616833333333
episode 5593 end
episode 5594 start
episode columns: [11, 0, 10, 9, 7, 4, 5, 3, 2, 6, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 4, 5, 1, 7, 6, 9, 0, 2, 3, 10] train accuracy: 0.677203703704 test accuracy: 0.617
episode 5594 end
episode 5595 start
episode columns: [6, 4, 7, 11, 0, 2, 9, 3, 8, 1, 5, 10] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 4, 5, 1, 7, 6, 9, 0, 2, 3, 10] train accuracy: 0.677222222222 test accuracy: 0.616833333333
episode 5595 end
episode 5596 start
episode columns: [11, 2, 4, 6, 10, 1, 8, 5, 7, 9, 0, 3] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[8, 4, 5, 1, 7, 9, 0, 2, 6, 10, 3] train accuracy: 0.677148148148 test accuracy: 0.618
episode 5596 end
episode 5597 start
episode co

episode 5627 start
episode columns: [3, 4, 11, 2, 7, 10, 0, 6, 8, 1, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 0, 4, 6, 7, 3, 10, 1, 8, 2] train accuracy: 0.683037037037 test accuracy: 0.624
episode 5627 end
episode 5628 start
episode columns: [6, 4, 5, 9, 10, 0, 2, 8, 3, 1, 11, 7] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 0, 4, 6, 7, 3, 1, 8, 2] train accuracy: 0.688351851852 test accuracy: 0.644166666667
episode 5628 end
episode 5629 start
episode columns: [4, 6, 5, 11, 0, 10, 2, 1, 8, 7, 3, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 0, 4, 6, 7] train accuracy: 0.695981481481 test accuracy: 0.684333333333
episode 5629 end
episode 5630 start
episode columns: [10, 3, 9, 6, 1, 11, 5, 0, 7, 8, 2, 4] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 0, 4, 6, 7] train accuracy: 0.695981481481 test accuracy: 0.684333333333
episode 5630 end
episode 5631 start
episode columns: [10, 11, 7, 4, 9, 1, 5, 3, 0, 6,

episode 5663 start
episode columns: [4, 10, 3, 8, 6, 2, 0, 7, 1, 5, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 7, 4, 5, 10, 3, 0, 8, 9, 2] train accuracy: 0.682611111111 test accuracy: 0.644333333333
episode 5663 end
episode 5664 start
episode columns: [11, 2, 9, 5, 3, 6, 0, 4, 10, 7, 8, 1] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[6, 7, 4, 5, 10, 3, 0, 8, 9, 2] train accuracy: 0.682703703704 test accuracy: 0.645666666667
episode 5664 end
episode 5665 start
episode columns: [6, 11, 0, 8, 9, 2, 5, 7, 4, 10, 3, 1] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 0, 8, 4, 6, 7] train accuracy: 0.695722222222 test accuracy: 0.682
episode 5665 end
episode 5666 start
episode columns: [3, 2, 9, 1, 7, 4, 6, 10, 11, 5, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 0, 8, 4, 6, 7] train accuracy: 0.695722222222 test accuracy: 0.682
episode 5666 end
episode 5667 start
episode columns: [2, 5, 8, 6, 7, 3, 11, 4, 9, 1, 

episode 5697 start
episode columns: [7, 10, 0, 5, 4, 3, 9, 8, 1, 2, 11, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 9, 2, 7, 0, 10, 3, 1, 8, 4, 5] train accuracy: 0.67737037037 test accuracy: 0.616666666667
episode 5697 end
episode 5698 start
episode columns: [6, 1, 8, 10, 7, 2, 5, 0, 3, 11, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 9, 2, 5, 4, 1, 8, 3, 10, 0, 7] train accuracy: 0.677444444444 test accuracy: 0.617166666667
episode 5698 end
episode 5699 start
episode columns: [9, 10, 7, 8, 1, 5, 4, 6, 3, 11, 0, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 9, 2, 5, 4, 1, 8, 3, 10, 0, 7] train accuracy: 0.677185185185 test accuracy: 0.617166666667
episode 5699 end
episode 5700 start
episode columns: [1, 2, 9, 8, 4, 5, 10, 11, 0, 7, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 9, 2, 5, 4, 1, 8, 3, 10, 0, 7] train accuracy: 0.677203703704 test accuracy: 0.617333333333
episode 5700 end
episode 57

episode 5731 start
episode columns: [5, 11, 10, 6, 1, 4, 0, 7, 9, 2, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 5, 10, 3, 1, 7, 0, 8, 9, 2, 6] train accuracy: 0.67712962963 test accuracy: 0.617166666667
episode 5731 end
episode 5732 start
episode columns: [2, 6, 7, 1, 9, 8, 4, 0, 3, 11, 10, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 5, 10, 3, 1, 7, 0, 8, 9, 2, 6] train accuracy: 0.677333333333 test accuracy: 0.616666666667
episode 5732 end
episode 5733 start
episode columns: [10, 1, 0, 4, 6, 11, 2, 8, 9, 5, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 5, 10, 3, 1, 7, 0, 8, 9, 2, 6] train accuracy: 0.677074074074 test accuracy: 0.617666666667
episode 5733 end
episode 5734 start
episode columns: [11, 1, 3, 2, 5, 0, 7, 4, 8, 10, 9, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[4, 5, 10, 3, 1, 7, 0, 8, 9, 2, 6] train accuracy: 0.677388888889 test accuracy: 0.616833333333
episode 5734 end
episode 5

episode 5765 start
episode columns: [10, 8, 2, 5, 7, 4, 0, 11, 1, 6, 3, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 2, 8, 4, 5, 10, 3, 11, 1, 0, 6, 7] train accuracy: 0.6195 test accuracy: 0.5865
episode 5765 end
episode 5766 start
episode columns: [5, 6, 7, 8, 0, 9, 10, 3, 1, 4, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 2, 8, 4, 5, 10, 3, 11, 1, 0, 6, 7] train accuracy: 0.619648148148 test accuracy: 0.584833333333
episode 5766 end
episode 5767 start
episode columns: [10, 3, 6, 9, 7, 5, 1, 0, 4, 8, 2, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 2, 8, 4, 5, 10, 3, 11, 1, 0, 6, 7] train accuracy: 0.619444444444 test accuracy: 0.586333333333
episode 5767 end
episode 5768 start
episode columns: [9, 6, 7, 0, 11, 5, 8, 4, 3, 10, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 2, 8, 4, 5, 10, 3, 11, 1, 0, 6, 7] train accuracy: 0.620092592593 test accuracy: 0.585833333333
episode 5768 end
episode 5

episode 5799 start
episode columns: [0, 7, 5, 2, 3, 1, 6, 11, 9, 4, 8, 10] epsilon: 0.9 learning rate: 0.09 error: 0.7
episode policy:[5, 2, 8, 3, 11, 9, 0, 10, 6, 7, 4, 1] train accuracy: 0.619351851852 test accuracy: 0.587
episode 5799 end
episode 5800 start
episode columns: [5, 2, 8, 7, 10, 6, 4, 1, 3, 0, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 2, 8, 3, 11, 0, 10, 6, 7, 4, 1] train accuracy: 0.626740740741 test accuracy: 0.5875
episode 5800 end
episode 5801 start
episode columns: [7, 6, 9, 4, 3, 8, 2, 10, 5, 1, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 2, 8, 3, 11, 0, 10, 6, 7, 4, 1] train accuracy: 0.62687037037 test accuracy: 0.586
episode 5801 end
episode 5802 start
episode columns: [3, 8, 10, 2, 6, 0, 4, 1, 5, 9, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 2, 8, 3, 11, 0, 10, 6, 7, 4, 1] train accuracy: 0.627148148148 test accuracy: 0.587333333333
episode 5802 end
episode 5803 start
episode col

episode 5833 start
episode columns: [5, 2, 10, 4, 0, 3, 9, 1, 11, 6, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[4, 2, 8, 3, 11, 9, 1, 10, 6, 7, 0, 5] train accuracy: 0.619537037037 test accuracy: 0.583666666667
episode 5833 end
episode 5834 start
episode columns: [1, 2, 4, 3, 0, 5, 6, 7, 8, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 2, 8, 3, 11, 9, 1, 10, 6, 7, 0, 5] train accuracy: 0.619703703704 test accuracy: 0.585333333333
episode 5834 end
episode 5835 start
episode columns: [5, 10, 1, 3, 9, 11, 8, 4, 0, 6, 2, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 2, 8, 3, 11, 9, 1, 10, 6, 7, 0, 5] train accuracy: 0.620111111111 test accuracy: 0.584
episode 5835 end
episode 5836 start
episode columns: [9, 11, 10, 5, 3, 4, 6, 0, 8, 7, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 2, 8, 3, 11, 5, 1, 4, 10, 6, 7, 0] train accuracy: 0.620185185185 test accuracy: 0.588
episode 5836 end
episode 5837 sta

episode 5867 start
episode columns: [4, 7, 2, 9, 0, 11, 1, 6, 8, 10, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[4, 1, 3, 11, 2, 8, 6, 7, 5, 10, 9] train accuracy: 0.684240740741 test accuracy: 0.6535
episode 5867 end
episode 5868 start
episode columns: [8, 7, 6, 2, 10, 1, 9, 11, 4, 0, 3, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 1, 3, 11, 2, 8, 6, 7, 5, 10, 9] train accuracy: 0.684111111111 test accuracy: 0.653666666667
episode 5868 end
episode 5869 start
episode columns: [11, 4, 3, 7, 1, 9, 6, 8, 2, 10, 0, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 1, 3, 11, 2, 8, 6, 7, 5, 10, 0, 9] train accuracy: 0.61987037037 test accuracy: 0.588333333333
episode 5869 end
episode 5870 start
episode columns: [8, 0, 2, 5, 10, 7, 6, 11, 4, 9, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 1, 3, 11, 2, 8, 6, 7, 5, 10, 0, 9] train accuracy: 0.619944444444 test accuracy: 0.582166666667
episode 5870 end
episode 5

episode 5901 start
episode columns: [2, 3, 4, 0, 5, 6, 7, 11, 8, 1, 9, 10] epsilon: 0.9 learning rate: 0.09 error: 0.15
episode policy:[7, 4, 2, 5, 10, 6, 9, 8, 3, 0, 11, 1] train accuracy: 0.619648148148 test accuracy: 0.585
episode 5901 end
episode 5902 start
episode columns: [7, 11, 10, 6, 4, 1, 2, 8, 5, 9, 0, 3] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 4, 2, 5, 10, 6, 9, 8, 3, 0, 11, 1] train accuracy: 0.620425925926 test accuracy: 0.5865
episode 5902 end
episode 5903 start
episode columns: [5, 4, 8, 2, 3, 10, 7, 0, 6, 9, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[7, 4, 2, 5, 10, 6, 9, 8, 3, 0, 11, 1] train accuracy: 0.619740740741 test accuracy: 0.5845
episode 5903 end
episode 5904 start
episode columns: [7, 3, 11, 1, 10, 9, 6, 0, 4, 2, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 0, 10, 6, 9, 8, 2, 5, 3, 11, 1] train accuracy: 0.620259259259 test accuracy: 0.585
episode 5904 end
episode 5905 start
episode col

episode 5935 start
episode columns: [0, 4, 8, 3, 6, 2, 9, 7, 11, 1, 5, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 3, 4, 5, 6, 2, 1, 10, 8, 9] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 5935 end
episode 5936 start
episode columns: [8, 0, 10, 2, 6, 5, 3, 7, 4, 1, 9, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[7, 3, 4, 5, 6, 2, 1, 9, 8] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 5936 end
episode 5937 start
episode columns: [3, 2, 9, 11, 10, 1, 6, 4, 8, 0, 5, 7] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[7, 3, 4, 5, 6, 2, 1, 9, 8] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 5937 end
episode 5938 start
episode columns: [8, 7, 3, 10, 11, 5, 0, 2, 9, 4, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[7, 3, 4, 5, 6, 2, 1, 9, 8] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 5938 end
episode 5939 start
episode columns: [3, 6, 1, 11, 5, 2, 

episode 5969 start
episode columns: [9, 5, 4, 7, 2, 0, 11, 1, 3, 8, 10, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 7, 4, 5, 10, 3, 1, 2, 0, 6] train accuracy: 0.68162962963 test accuracy: 0.629666666667
episode 5969 end
episode 5970 start
episode columns: [5, 2, 9, 8, 4, 10, 11, 3, 1, 7, 6, 0] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 7, 4, 5, 10, 3, 1, 2, 0, 6] train accuracy: 0.681592592593 test accuracy: 0.630833333333
episode 5970 end
episode 5971 start
episode columns: [10, 5, 0, 7, 9, 3, 6, 2, 4, 8, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 7, 4, 5, 10, 3, 1, 2, 0, 6] train accuracy: 0.681703703704 test accuracy: 0.6305
episode 5971 end
episode 5972 start
episode columns: [7, 4, 3, 9, 6, 0, 10, 8, 11, 2, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 7, 4, 5, 10, 3, 1, 2, 0, 6] train accuracy: 0.681537037037 test accuracy: 0.631
episode 5972 end
episode 5973 start
episode columns: [8, 3

episode 6003 start
episode columns: [9, 2, 8, 6, 1, 3, 5, 0, 10, 4, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[9, 4, 5, 1, 2, 8, 6, 10, 3, 0, 11, 7] train accuracy: 0.6195 test accuracy: 0.587333333333
episode 6003 end
episode 6004 start
episode columns: [8, 6, 2, 0, 5, 11, 3, 7, 10, 4, 1, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 4, 5, 1, 2, 8, 6, 10, 3, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 6004 end
episode 6005 start
episode columns: [7, 9, 5, 1, 8, 2, 4, 3, 10, 6, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 4, 5, 6, 2, 8, 3, 7] train accuracy: 0.696074074074 test accuracy: 0.6885
episode 6005 end
episode 6006 start
episode columns: [8, 6, 5, 1, 3, 10, 7, 0, 2, 4, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 4, 5, 1, 2, 8, 6, 10, 3, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 6006 end
episode 6007 start
episode columns: [6

episode 6038 start
episode columns: [6, 1, 3, 2, 4, 5, 10, 7, 8, 0, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 6, 4, 7, 3, 8, 5, 1, 2, 0, 11, 10] train accuracy: 0.620166666667 test accuracy: 0.585833333333
episode 6038 end
episode 6039 start
episode columns: [2, 4, 5, 1, 7, 8, 9, 3, 10, 6, 0, 11] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 6, 4, 7, 3, 8, 5, 1, 2, 0, 11, 10] train accuracy: 0.619259259259 test accuracy: 0.583833333333
episode 6039 end
episode 6040 start
episode columns: [7, 4, 9, 10, 8, 0, 1, 11, 2, 6, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 6, 4, 7, 3, 8, 5, 1, 2, 0, 11, 10] train accuracy: 0.620351851852 test accuracy: 0.587
episode 6040 end
episode 6041 start
episode columns: [11, 4, 7, 9, 8, 6, 0, 3, 5, 10, 1, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 6, 4, 5, 1, 2, 8, 3, 10, 7, 11, 0] train accuracy: 0.619407407407 test accuracy: 0.587333333333
episode 6041 end
ep

episode 6072 start
episode columns: [8, 11, 10, 3, 0, 2, 7, 4, 9, 1, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 6, 4, 3, 5, 8, 1] train accuracy: 0.696277777778 test accuracy: 0.686833333333
episode 6072 end
episode 6073 start
episode columns: [6, 7, 1, 4, 9, 0, 10, 8, 5, 3, 11, 2] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 6, 4, 3, 5, 8, 1] train accuracy: 0.696277777778 test accuracy: 0.686833333333
episode 6073 end
episode 6074 start
episode columns: [9, 8, 7, 3, 1, 6, 11, 4, 5, 2, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 6, 4, 3, 5, 8, 1] train accuracy: 0.696277777778 test accuracy: 0.686833333333
episode 6074 end
episode 6075 start
episode columns: [4, 1, 0, 6, 7, 10, 2, 8, 9, 11, 5, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[9, 6, 4, 3, 5, 8, 1] train accuracy: 0.696277777778 test accuracy: 0.686833333333
episode 6075 end
episode 6076 start
episode columns: [3, 11, 7, 5, 8, 1, 9, 2, 

episode 6106 start
episode columns: [3, 4, 10, 0, 11, 8, 9, 1, 2, 7, 5, 6] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[9, 1, 5, 3, 8, 10, 6, 4, 7, 11, 0] train accuracy: 0.647592592593 test accuracy: 0.597
episode 6106 end
episode 6107 start
episode columns: [4, 1, 10, 3, 2, 11, 5, 6, 8, 7, 9, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 1, 5, 3, 8, 10, 6, 4, 7, 11, 0] train accuracy: 0.647962962963 test accuracy: 0.596
episode 6107 end
episode 6108 start
episode columns: [2, 6, 1, 4, 11, 0, 10, 5, 3, 8, 9, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 1, 5, 3, 8, 10, 6, 4, 7, 11, 0] train accuracy: 0.6475 test accuracy: 0.5965
episode 6108 end
episode 6109 start
episode columns: [1, 10, 6, 9, 7, 2, 5, 3, 8, 0, 11, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[9, 1, 5, 3, 8, 10, 7, 4, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6109 end
episode 6110 start
episode columns: [9, 2, 10, 

episode 6140 start
episode columns: [0, 11, 10, 3, 4, 6, 1, 2, 8, 7, 5, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 4, 9, 1, 5, 8, 7, 6, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6140 end
episode 6141 start
episode columns: [5, 11, 3, 10, 4, 9, 6, 2, 1, 0, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 4, 9, 1, 5, 8, 7, 6, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6141 end
episode 6142 start
episode columns: [4, 3, 10, 7, 1, 5, 2, 11, 6, 9, 8, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 4, 9, 1, 5, 8, 7, 6, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6142 end
episode 6143 start
episode columns: [1, 7, 6, 3, 10, 2, 5, 11, 4, 9, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 4, 9, 1, 5, 8, 7, 6, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6143 end
episode 6144 start
episode columns: 

episode 6174 start
episode columns: [5, 11, 2, 8, 6, 3, 4, 10, 7, 9, 1, 0] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 1, 5, 8, 10, 4, 9, 7, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6174 end
episode 6175 start
episode columns: [4, 8, 9, 0, 11, 10, 3, 5, 7, 2, 1, 6] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 1, 5, 8, 10, 4, 9, 7, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6175 end
episode 6176 start
episode columns: [10, 8, 4, 9, 0, 5, 3, 2, 11, 1, 6, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 1, 5, 8, 10, 4, 9, 7, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6176 end
episode 6177 start
episode columns: [3, 9, 6, 2, 4, 5, 8, 10, 7, 0, 1, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 10, 4, 6, 1, 5, 8, 7, 9] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6177 end
episode 6178 start
episode columns

episode 6208 start
episode columns: [11, 1, 10, 4, 8, 2, 0, 6, 7, 3, 9, 5] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 4, 10, 1, 3, 8, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 6208 end
episode 6209 start
episode columns: [1, 0, 10, 5, 7, 6, 3, 11, 8, 2, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 4, 10, 1, 3, 8, 7, 9] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 6209 end
episode 6210 start
episode columns: [2, 4, 8, 11, 10, 6, 5, 1, 0, 9, 7, 3] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 4, 10, 1, 3, 8, 7, 9] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 6210 end
episode 6211 start
episode columns: [7, 1, 10, 5, 9, 8, 11, 2, 0, 4, 3, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 4, 10, 1, 3, 8, 7, 9] train accuracy: 0.696351851852 test accuracy: 0.689
episode 6211 end
episode 6212 start
episode columns: [4, 8, 11, 7, 0, 9, 

episode 6243 start
episode columns: [9, 6, 2, 4, 3, 0, 8, 5, 11, 1, 7, 10] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 10, 1, 3, 8, 9, 7, 2, 11, 4] train accuracy: 0.68937037037 test accuracy: 0.665666666667
episode 6243 end
episode 6244 start
episode columns: [8, 0, 9, 6, 2, 1, 5, 3, 4, 7] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 10, 1, 3, 8, 9, 7, 2, 11, 4] train accuracy: 0.689185185185 test accuracy: 0.664166666667
episode 6244 end
episode 6245 start
episode columns: [3, 5, 6, 2, 0, 7, 4, 9, 1, 10, 8, 11] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 10, 1, 3, 8, 9, 7, 2, 11, 4] train accuracy: 0.689296296296 test accuracy: 0.665
episode 6245 end
episode 6246 start
episode columns: [5, 8, 9, 0, 4, 3, 1, 2, 7, 6, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[6, 10, 1, 3, 8, 9, 7, 2, 11, 4] train accuracy: 0.689277777778 test accuracy: 0.6645
episode 6246 end
episode 6247 start
episode columns: [2, 11, 

episode 6277 start
episode columns: [10, 7, 1, 11, 2, 0, 6, 5, 8, 3, 4, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[8, 6, 5, 11, 2, 7, 3, 10, 1, 4, 9] train accuracy: 0.68412962963 test accuracy: 0.654333333333
episode 6277 end
episode 6278 start
episode columns: [7, 11, 9, 1, 0, 8, 3, 10, 5, 2, 6, 4] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 6, 5, 11, 2, 7, 3, 10, 1, 4, 9] train accuracy: 0.684185185185 test accuracy: 0.654833333333
episode 6278 end
episode 6279 start
episode columns: [2, 4, 1, 7, 9, 8, 6, 5, 3, 11, 10, 0] epsilon: 0.9 learning rate: 0.09 error: 0.1
episode policy:[8, 6, 7, 2, 3, 10, 1, 11, 4, 9] train accuracy: 0.689222222222 test accuracy: 0.664333333333
episode 6279 end
episode 6280 start
episode columns: [11, 8, 5, 7, 4, 1, 9, 10, 2, 0, 3, 6] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[8, 6, 7, 2, 3, 10, 1, 11, 4, 9] train accuracy: 0.689203703704 test accuracy: 0.6645
episode 6280 end
episode 6281 start
e

episode 6311 start
episode columns: [8, 3, 5, 1, 9, 10, 6, 4, 7, 2, 11, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[3, 8, 10, 6, 7, 1, 11, 2, 9, 4] train accuracy: 0.689333333333 test accuracy: 0.664166666667
episode 6311 end
episode 6312 start
episode columns: [2, 11, 3, 8, 10, 7, 6, 5, 0, 9, 4, 1] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[3, 8, 1, 11, 2, 7, 4, 10, 6, 5, 9] train accuracy: 0.684314814815 test accuracy: 0.6545
episode 6312 end
episode 6313 start
episode columns: [6, 7, 1, 5, 3, 4, 9, 8, 11, 0, 2, 10] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 8, 1, 11, 2, 7, 4, 10, 6, 5, 9] train accuracy: 0.684203703704 test accuracy: 0.654166666667
episode 6313 end
episode 6314 start
episode columns: [10, 6, 1, 7, 2, 3, 4, 9, 0, 8, 5, 11] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 8, 1, 11, 2, 7, 4, 10, 6, 5, 9] train accuracy: 0.684259259259 test accuracy: 0.654
episode 6314 end
episode 6315 start
episod

episode 6345 start
episode columns: [0, 4, 11, 1, 2, 5, 6, 7, 10, 8, 9, 3] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[3, 8, 10, 6, 5, 9, 7, 1, 4] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6345 end
episode 6346 start
episode columns: [1, 5, 6, 8, 3, 9, 4, 11, 0, 10, 7, 2] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[3, 8, 10, 6, 5, 9, 7, 1, 4] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6346 end
episode 6347 start
episode columns: [0, 4, 2, 8, 6, 9, 10, 3, 7, 11, 5, 1] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[3, 8, 10, 6, 5, 9, 7, 1, 4] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6347 end
episode 6348 start
episode columns: [11, 4, 7, 8, 10, 6, 0, 2, 5, 9, 1, 3] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[3, 8, 1, 9, 6, 5, 11, 7, 4, 10] train accuracy: 0.690833333333 test accuracy: 0.6735
episode 6348 end
episode 6349 start
episode columns: [9,

episode 6379 start
episode columns: [2, 6, 7, 5, 8, 11, 1, 3, 0, 10, 9, 4] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 2, 10, 1, 0, 4, 6, 5, 8, 3, 7] train accuracy: 0.677296296296 test accuracy: 0.6175
episode 6379 end
episode 6380 start
episode columns: [0, 4, 10, 2, 1, 3, 9, 11, 6, 5, 8, 7] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[9, 2, 10, 1, 0, 4, 6, 5, 8, 3, 7] train accuracy: 0.677462962963 test accuracy: 0.617166666667
episode 6380 end
episode 6381 start
episode columns: [6, 4, 0, 5, 9, 3, 7, 8, 2, 1, 11, 10] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[9, 2, 10, 1, 0, 4, 6, 5, 8, 3] train accuracy: 0.680277777778 test accuracy: 0.621
episode 6381 end
episode 6382 start
episode columns: [1, 5, 10, 4, 0, 6, 8, 2, 9, 11, 3, 7] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[9, 2, 10, 1, 0, 4, 6, 5, 8, 3] train accuracy: 0.680333333333 test accuracy: 0.621166666667
episode 6382 end
episode 6383 start
episode column

episode 6414 start
episode columns: [0, 10, 11, 2, 1, 7, 9, 5, 8, 6, 4, 3] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[1, 4, 9, 10, 6, 5, 8, 3] train accuracy: 0.696092592593 test accuracy: 0.686333333333
episode 6414 end
episode 6415 start
episode columns: [5, 0, 6, 4, 11, 2, 7, 8, 10, 1, 3, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 4, 9, 10, 6, 5, 8, 3] train accuracy: 0.696092592593 test accuracy: 0.686333333333
episode 6415 end
episode 6416 start
episode columns: [6, 1, 11, 2, 4, 7, 10, 8, 3, 0, 9, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 4, 9, 10, 6, 5, 8, 3] train accuracy: 0.696092592593 test accuracy: 0.686333333333
episode 6416 end
episode 6417 start
episode columns: [4, 7, 3, 0, 5, 1, 11, 6, 9, 10, 2, 8] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[1, 4, 9, 10, 6, 5, 8, 3] train accuracy: 0.696092592593 test accuracy: 0.686166666667
episode 6417 end
episode 6418 start
episode columns: [2, 7, 3,

episode 6449 start
episode columns: [0, 4, 7, 10, 8, 1, 9, 5, 11, 6, 2, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 4, 3, 10, 8, 9, 7] train accuracy: 0.69637037037 test accuracy: 0.6885
episode 6449 end
episode 6450 start
episode columns: [1, 2, 7, 8, 0, 4, 5, 10, 6, 3, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[1, 4, 3, 10, 8, 9, 7] train accuracy: 0.69637037037 test accuracy: 0.6885
episode 6450 end
episode 6451 start
episode columns: [1, 2, 8, 0, 4, 5, 6, 3, 7, 10, 11, 9] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[1, 4, 3, 10, 8, 9, 7] train accuracy: 0.69637037037 test accuracy: 0.6885
episode 6451 end
episode 6452 start
episode columns: [1, 5, 2, 7, 4, 11, 8, 10, 0, 9, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[1, 4, 3, 10, 8, 9, 7] train accuracy: 0.69637037037 test accuracy: 0.6885
episode 6452 end
episode 6453 start
episode columns: [4, 8, 11, 1, 5, 0, 6, 3, 10, 9, 7, 2] epsilon: 0.9 learnin

episode 6484 start
episode columns: [9, 1, 10, 4, 5, 0, 2, 11, 8, 3, 7, 6] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[6, 4, 3, 10, 1, 9, 5, 7] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 6484 end
episode 6485 start
episode columns: [5, 1, 10, 2, 0, 4, 6, 3, 11, 7, 9, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[6, 4, 3, 10, 1, 9, 5, 7] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 6485 end
episode 6486 start
episode columns: [7, 9, 3, 6, 4, 11, 0, 10, 8, 5, 2, 1] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[6, 4, 3, 10, 1, 9, 5, 7] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 6486 end
episode 6487 start
episode columns: [3, 0, 6, 9, 8, 10, 5, 2, 1, 11, 7, 4] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[6, 4, 3, 10, 1, 9, 5, 7] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 6487 end
episode 6488 start
episode columns: [10, 0, 5, 8, 7, 6, 4, 2, 11, 1, 9, 3] eps

episode 6519 start
episode columns: [6, 4, 10, 0, 5, 7, 9, 3, 2, 11, 1, 8] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[6, 4, 3, 7, 10, 1, 9, 8] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 6519 end
episode 6520 start
episode columns: [2, 1, 9, 10, 4, 3, 5, 6, 7, 11, 0, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[6, 4, 3, 7, 10, 1, 9, 8] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 6520 end
episode 6521 start
episode columns: [6, 5, 3, 10, 11, 0, 9, 4, 1, 8, 7, 2] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[6, 4, 3, 7, 10, 1, 9, 8] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 6521 end
episode 6522 start
episode columns: [8, 9, 3, 7, 6, 0, 1, 11, 4, 10, 2, 5] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[6, 4, 3, 7, 10, 1, 9, 8] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 6522 end
episode 6523 start
episode columns: [8, 4, 0,

episode 6554 start
episode columns: [8, 7, 11, 4, 5, 1, 6, 10, 0, 9, 3, 2] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 3, 7, 8, 4, 6, 10, 1] train accuracy: 0.696222222222 test accuracy: 0.686
episode 6554 end
episode 6555 start
episode columns: [2, 10, 1, 5, 3, 7, 11, 8, 4, 0, 6] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 3, 7, 8, 10, 6, 4, 9, 0, 2, 11, 1] train accuracy: 0.619685185185 test accuracy: 0.588166666667
episode 6555 end
episode 6556 start
episode columns: [2, 3, 7, 11, 10, 8, 0, 4, 6, 1, 9, 5] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 3, 7, 8, 10, 6, 4, 9, 0, 2, 11, 1] train accuracy: 0.619888888889 test accuracy: 0.585666666667
episode 6556 end
episode 6557 start
episode columns: [10, 5, 8, 7, 11, 0, 6, 3, 1, 4, 2, 9] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 3, 7, 8, 10, 6, 4, 9, 0, 2, 11, 1] train accuracy: 0.619388888889 test accuracy: 0.584666666667
episode 6557 end
episode 6558 start

episode 6588 start
episode columns: [6, 5, 0, 1, 8, 2, 4, 9, 3, 7, 10, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 10, 3, 6, 4, 9, 0, 2, 8, 1] train accuracy: 0.677259259259 test accuracy: 0.6165
episode 6588 end
episode 6589 start
episode columns: [3, 1, 7, 11, 6, 4, 9, 10, 0, 2, 8, 5] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[5, 7, 10, 3, 6, 4, 9, 0, 2, 8, 1] train accuracy: 0.677222222222 test accuracy: 0.616333333333
episode 6589 end
episode 6590 start
episode columns: [6, 2, 10, 11, 9, 7, 5, 8, 1, 3, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 10, 3, 6, 4, 9, 0, 2, 8, 1] train accuracy: 0.677203703704 test accuracy: 0.6165
episode 6590 end
episode 6591 start
episode columns: [4, 3, 2, 0, 1, 5, 10, 7, 8, 9, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[5, 7, 10, 3, 6, 4, 9, 0, 2, 8, 1] train accuracy: 0.677333333333 test accuracy: 0.617333333333
episode 6591 end
episode 6592 start
episod

episode 6623 start
episode columns: [5, 7, 1, 11, 9, 8, 2, 3, 6, 10, 4, 0] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 4, 6, 1, 3, 7, 10, 8, 9, 11] train accuracy: 0.690648148148 test accuracy: 0.674
episode 6623 end
episode 6624 start
episode columns: [11, 3, 10, 2, 1, 0, 7, 9, 6, 8, 4, 5] epsilon: 0.9 learning rate: 0.09 error: 0.3
episode policy:[5, 4, 6, 1, 3, 10, 8, 9, 11, 7, 0, 2] train accuracy: 0.619833333333 test accuracy: 0.588333333333
episode 6624 end
episode 6625 start
episode columns: [8, 7, 1, 4, 6, 5, 0, 2, 11, 3, 10, 9] epsilon: 0.9 learning rate: 0.09 error: 0.45
episode policy:[5, 4, 6, 1, 3, 10, 8, 9, 11, 7, 0, 2] train accuracy: 0.619981481481 test accuracy: 0.586
episode 6625 end
episode 6626 start
episode columns: [3, 2, 1, 6, 10, 9, 0, 7, 4, 11, 5, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[5, 4, 6, 1, 3, 10, 8, 9, 11, 7, 0, 2] train accuracy: 0.620037037037 test accuracy: 0.584166666667
episode 6626 end
episode 6627 star

episode 6657 start
episode columns: [0, 9, 5, 10, 2, 4, 1, 7, 6, 11, 8, 3] epsilon: 0.9 learning rate: 0.09 error: 0.5
episode policy:[5, 3, 10, 4, 7, 9, 0, 8, 6, 1] train accuracy: 0.689777777778 test accuracy: 0.6695
episode 6657 end
episode 6658 start
episode columns: [10, 0, 6, 1, 9, 8, 3, 5, 4, 2, 7, 11] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[5, 3, 10, 4, 7, 9, 0, 8, 6, 1] train accuracy: 0.689833333333 test accuracy: 0.669166666667
episode 6658 end
episode 6659 start
episode columns: [11, 3, 0, 5, 6, 1, 9, 7, 4, 2, 8, 10] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[5, 3, 10, 4, 7, 9, 0, 8, 6, 1] train accuracy: 0.689833333333 test accuracy: 0.669
episode 6659 end
episode 6660 start
episode columns: [10, 1, 6, 2, 3, 11, 9, 8, 4, 0, 7, 5] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[5, 3, 10, 4, 7, 9, 0, 8, 6, 1] train accuracy: 0.689888888889 test accuracy: 0.668833333333
episode 6660 end
episode 6661 start
episode columns: [3

episode 6691 start
episode columns: [5, 1, 2, 10, 7, 4, 8, 9, 0, 11, 6, 3] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[4, 5, 3, 9, 7, 11, 8, 10, 6] train accuracy: 0.692888888889 test accuracy: 0.677
episode 6691 end
episode 6692 start
episode columns: [3, 4, 5, 8, 2, 9, 7, 0, 10, 1, 6, 11] epsilon: 0.9 learning rate: 0.09 error: 0.6
episode policy:[4, 5, 3, 9, 2, 8, 10, 6, 7, 11, 1] train accuracy: 0.684092592593 test accuracy: 0.653833333333
episode 6692 end
episode 6693 start
episode columns: [11, 9, 7, 5, 1, 0, 3, 10, 2, 6, 4, 8] epsilon: 0.9 learning rate: 0.09 error: 0.4
episode policy:[4, 5, 3, 9, 2, 8, 10, 6, 7, 11, 1] train accuracy: 0.68412962963 test accuracy: 0.654833333333
episode 6693 end
episode 6694 start
episode columns: [3, 2, 4, 7, 8, 10, 6, 1, 11, 0, 9, 5] epsilon: 0.9 learning rate: 0.09 error: 0.35
episode policy:[4, 5, 3, 9, 2, 8, 10, 6, 7, 11, 1] train accuracy: 0.684240740741 test accuracy: 0.654
episode 6694 end
episode 6695 start
episode colum

episode 6725 start
episode columns: [0, 2, 5, 1, 7, 10, 8, 6, 4, 9, 11, 3] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[4, 1, 7, 9, 10, 3, 5, 6, 8, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 6725 end
episode 6726 start
episode columns: [6, 2, 1, 4, 0, 5, 10, 7, 11, 9, 3, 8] epsilon: 0.9 learning rate: 0.09 error: 0.25
episode policy:[4, 1, 7, 9, 10, 3, 5, 6, 8, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 6726 end
episode 6727 start
episode columns: [2, 11, 0, 10, 5, 3, 1, 4, 9, 6, 7, 8] epsilon: 0.9 learning rate: 0.09 error: 0.65
episode policy:[4, 1, 7, 9, 10, 3, 5, 6, 8, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 6727 end
episode 6728 start
episode columns: [3, 5, 0, 11, 6, 8, 10, 9, 7, 1, 4] epsilon: 0.9 learning rate: 0.09 error: 0.55
episode policy:[4, 1, 7, 9, 10, 3, 5, 6, 8, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 6728 end
episode 6729 start
epis

episode 6759 start
episode columns: [10, 6, 7, 8, 5, 2, 0, 4, 1, 9, 3, 11] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[10, 4, 1, 5, 7, 9, 3, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6759 end
episode 6760 start
episode columns: [8, 9, 7, 3, 10, 4, 1, 5, 6] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[10, 4, 1, 5, 7, 9, 3, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6760 end
episode 6761 start
episode columns: [2, 1, 4, 3, 7, 0, 8, 11, 6, 5, 9, 10] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[10, 4, 1, 5, 7, 9, 3, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6761 end
episode 6762 start
episode columns: [3, 7, 1, 8, 9, 4, 5, 6, 10] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[10, 4, 1, 5, 7, 9, 3, 8, 6] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6762 end
episode 6763 start
episode columns: [11, 2, 10, 4, 9, 

episode 6794 start
episode columns: [4, 10, 8, 5, 3, 7, 6, 9, 2, 1] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[4, 3, 5, 7, 6, 9, 10, 8, 0, 1] train accuracy: 0.689833333333 test accuracy: 0.669166666667
episode 6794 end
episode 6795 start
episode columns: [4, 3, 5, 7, 0, 1, 8, 11, 6, 9, 2] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[4, 3, 5, 7, 6, 9, 10, 8, 0, 1] train accuracy: 0.689851851852 test accuracy: 0.668833333333
episode 6795 end
episode 6796 start
episode columns: [4, 11, 7, 6, 3, 5, 10, 0, 8, 9, 2, 1] epsilon: 0.5 learning rate: 0.05 error: 0.15
episode policy:[4, 3, 10, 8, 9, 7, 6, 5] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 6796 end
episode 6797 start
episode columns: [4, 5, 10, 7, 2, 9, 3] epsilon: 0.5 learning rate: 0.05 error: 0.15
episode policy:[4, 3, 10, 8, 9, 7, 6, 5] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 6797 end
episode 6798 start
episode columns: [4, 3, 10, 8, 9, 5, 6, 1

episode 6829 start
episode columns: [0, 5, 3, 7, 8, 9, 2, 1, 4, 6] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[4, 3, 7, 8, 9, 1, 5, 2] train accuracy: 0.696055555556 test accuracy: 0.685666666667
episode 6829 end
episode 6830 start
episode columns: [4, 10, 6, 0, 8, 9, 11, 5, 3, 1, 7] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[4, 3, 7, 8, 9, 1, 5, 2] train accuracy: 0.696055555556 test accuracy: 0.685666666667
episode 6830 end
episode 6831 start
episode columns: [4, 7, 8, 11, 0, 1, 5, 3, 2, 6, 9, 10] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[4, 3, 7, 6, 9, 1, 8, 10, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 6831 end
episode 6832 start
episode columns: [1, 4, 6, 11, 7, 8, 9, 3, 5, 2, 0, 10] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[4, 3, 7, 6, 9, 1, 8, 10, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 6832 end
episode 6833 start
episode columns: [4, 3, 5, 8

episode 6865 start
episode columns: [11, 7, 6, 5, 3, 10, 0, 1, 8, 2, 9, 4] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[6, 4, 1, 8, 9, 10, 7] train accuracy: 0.696462962963 test accuracy: 0.688666666667
episode 6865 end
episode 6866 start
episode columns: [6, 4, 1, 8, 9, 10, 7, 11, 0, 2, 5, 3] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[6, 4, 1, 8, 9, 7] train accuracy: 0.696351851852 test accuracy: 0.689
episode 6866 end
episode 6867 start
episode columns: [8, 9, 7, 6, 4, 1, 5, 3, 10] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[6, 4, 1, 8, 9, 10, 7] train accuracy: 0.696462962963 test accuracy: 0.688833333333
episode 6867 end
episode 6868 start
episode columns: [10, 4, 0, 11, 7, 1, 8, 6, 9, 5, 3] epsilon: 0.5 learning rate: 0.05 error: 0.15
episode policy:[6, 4, 1, 8, 9, 10, 7] train accuracy: 0.696462962963 test accuracy: 0.688833333333
episode 6868 end
episode 6869 start
episode columns: [6, 2, 9, 10, 4, 1, 8, 7] epsilon: 0.5 learning r

episode 6901 start
episode columns: [1, 10, 6, 4, 8, 2, 9, 3, 11, 5, 7] epsilon: 0.5 learning rate: 0.05 error: 0.65
episode policy:[6, 10, 4, 1, 8, 9, 3, 7, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6901 end
episode 6902 start
episode columns: [6, 2, 4, 3, 0, 11, 7, 8, 5, 1, 10] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[6, 10, 4, 1, 8, 9, 3, 7, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6902 end
episode 6903 start
episode columns: [8, 3, 7, 6, 10, 4, 0, 1, 9] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[6, 10, 4, 1, 8, 9, 3, 7, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6903 end
episode 6904 start
episode columns: [6, 10, 4, 1, 8, 3, 5, 7, 9, 11, 0, 2] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[6, 10, 4, 1, 7, 8, 9, 3, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6904 end
episode 6905 start
episode columns: [8, 7, 4, 2, 9,

episode 6936 start
episode columns: [6, 3, 10, 5, 7, 4, 8, 9, 0, 1] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[6, 10, 4, 8, 9, 3, 7, 1] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 6936 end
episode 6937 start
episode columns: [6, 10, 4, 8, 2, 9, 1, 11, 3, 7, 5] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[6, 10, 4, 8, 9, 1, 7, 5, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6937 end
episode 6938 start
episode columns: [3, 10, 0, 1, 9, 8, 7, 6, 4, 5] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[6, 10, 4, 8, 9, 1, 7, 5, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6938 end
episode 6939 start
episode columns: [7, 1, 2, 9, 8, 4, 5, 11, 0, 10, 6, 3] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[6, 10, 4, 8, 9, 1, 7, 5, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 6939 end
episode 6940 start
episode columns: [6, 10, 5, 2, 1, 

episode 6971 start
episode columns: [2, 9, 6, 3, 1, 7, 8, 4, 10, 0, 5] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[1, 7, 6, 3, 9, 8, 4, 5, 0, 2] train accuracy: 0.684166666667 test accuracy: 0.637333333333
episode 6971 end
episode 6972 start
episode columns: [8, 9, 6, 4, 1, 7, 5, 3, 2] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[1, 7, 6, 3, 9, 8, 4, 5, 0, 2] train accuracy: 0.684185185185 test accuracy: 0.637
episode 6972 end
episode 6973 start
episode columns: [7, 6, 3, 1, 4, 0, 2, 9, 5, 10, 8] epsilon: 0.5 learning rate: 0.05 error: 0.15
episode policy:[1, 7, 6, 3, 9, 8, 4, 5, 0, 2] train accuracy: 0.684185185185 test accuracy: 0.637166666667
episode 6973 end
episode 6974 start
episode columns: [1, 7, 6, 8, 3, 9] epsilon: 0.5 learning rate: 0.05 error: 0.2
episode policy:[1, 7, 6, 3, 9, 8, 4, 5, 0, 2] train accuracy: 0.684092592593 test accuracy: 0.637166666667
episode 6974 end
episode 6975 start
episode columns: [1, 7, 6, 8, 3, 9, 10, 4, 11, 0, 2, 5]

episode 7007 start
episode columns: [1, 7, 0, 11, 8, 2, 3, 6, 9] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[1, 2, 9, 3, 5, 4, 8, 6, 7] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 7007 end
episode 7008 start
episode columns: [1, 5, 7, 6, 9, 11, 0, 10, 8, 3] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[1, 2, 9, 3, 5, 4, 8, 6, 7] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 7008 end
episode 7009 start
episode columns: [5, 9, 0, 1, 6, 7, 3, 8, 4] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[1, 2, 9, 3, 5, 4, 8, 6, 7] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 7009 end
episode 7010 start
episode columns: [1, 2, 10, 6, 7, 8, 3, 5, 9] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[1, 2, 9, 3, 5, 4, 8, 6, 7] train accuracy: 0.695814814815 test accuracy: 0.6855
episode 7010 end
episode 7011 start
episode columns: [1, 2, 3, 5, 9, 6, 7, 8, 4, 0, 10, 11] epsilon: 0.5 learning rate: 0.05

episode 7043 start
episode columns: [11, 5, 1, 4, 9, 3, 8, 10, 6, 2] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[1, 4, 5, 9, 3, 10, 8, 2, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7043 end
episode 7044 start
episode columns: [1, 4, 0, 6, 7, 10, 8, 3, 9, 5] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[1, 4, 5, 9, 3, 10, 8, 2, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7044 end
episode 7045 start
episode columns: [9, 0, 11, 8, 10, 3, 4, 5, 1, 7, 6, 2] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[1, 4, 5, 9, 3, 10, 8, 2, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7045 end
episode 7046 start
episode columns: [1, 4, 5, 9, 8, 10, 6, 11, 7] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[1, 4, 5, 9, 3, 10, 8, 2, 6, 7] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7046 end
episode 7047 start
episode columns: [1, 10,

episode 7078 start
episode columns: [10, 2, 4, 5, 9, 0, 1, 8, 11, 7, 6, 3] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[10, 6, 7, 4, 9, 3, 8, 2] train accuracy: 0.69612962963 test accuracy: 0.687666666667
episode 7078 end
episode 7079 start
episode columns: [10, 6, 9, 3, 8, 4, 5, 1, 7, 11, 0, 2] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 6, 7, 4, 9, 3, 8, 2] train accuracy: 0.69612962963 test accuracy: 0.687833333333
episode 7079 end
episode 7080 start
episode columns: [4, 9, 6, 11, 7, 1, 8, 2, 0, 5, 3, 10] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 6, 7, 1, 8, 4, 9, 3, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7080 end
episode 7081 start
episode columns: [10, 6, 7, 1, 5, 2, 8, 11, 4, 3, 9, 0] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[10, 6, 9, 3, 7, 1, 8, 4] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 7081 end
episode 7082 start
episode columns: [3, 4, 9, 

episode 7114 start
episode columns: [9, 3, 8, 4, 6, 10, 1, 7, 5, 0, 11, 2] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[10, 8, 4, 9, 1, 7, 6, 3, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7114 end
episode 7115 start
episode columns: [10, 8, 0, 11, 7, 1, 4, 9, 5, 2, 6, 3] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 8, 4, 3, 9, 1, 7, 6, 2] train accuracy: 0.695592592593 test accuracy: 0.684333333333
episode 7115 end
episode 7116 start
episode columns: [4, 3, 9, 0, 1, 8, 7, 5, 10, 6, 2] epsilon: 0.5 learning rate: 0.05 error: 0.6
episode policy:[10, 8, 4, 3, 9, 1, 7, 6] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 7116 end
episode 7117 start
episode columns: [10, 8, 6, 11, 7, 0, 3, 4, 5, 1, 9] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[10, 8, 4, 3, 9, 1, 7, 6] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 7117 end
episode 7118 start
episode columns: [10, 8, 2

episode 7149 start
episode columns: [0, 11, 9, 1, 2, 4, 8, 10, 6, 3, 5] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 8, 4, 9, 1, 7, 6, 3, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7149 end
episode 7150 start
episode columns: [5, 4, 9, 2, 6, 1, 0, 11, 7, 8, 10, 3] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[10, 8, 4, 9, 1, 7, 6, 3, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7150 end
episode 7151 start
episode columns: [5, 4, 0, 10, 8, 6, 2, 9, 3, 7, 1, 11] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[10, 8, 4, 9, 1, 7, 6, 3, 5, 2] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7151 end
episode 7152 start
episode columns: [10, 3, 4, 9, 1, 7, 6, 8, 2] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[6, 9, 3, 5, 2, 4] train accuracy: 0.69612962963 test accuracy: 0.688833333333
episode 7152 end
episode 7153 start
episode columns: [6, 9, 10, 8

episode 7185 start
episode columns: [4, 11, 8, 1, 5, 6, 2, 9, 7] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[9, 3, 6, 10, 8, 7, 4] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7185 end
episode 7186 start
episode columns: [9, 3, 6, 10, 8, 7, 4] epsilon: 0.5 learning rate: 0.05 error: 0.2
episode policy:[9, 3, 6, 10, 8, 7, 4] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7186 end
episode 7187 start
episode columns: [9, 5, 11, 4, 6, 3, 0, 8, 10, 2, 1, 7] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[9, 3, 6, 10, 8, 7, 4] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7187 end
episode 7188 start
episode columns: [9, 3, 6, 10, 8, 7, 4, 11, 2] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[9, 3, 6, 10, 8, 7, 4] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7188 end
episode 7189 start
episode columns: [9, 3, 6, 2, 1, 4, 11, 8, 7, 5] epsilon: 0.5 learning ra

episode 7221 start
episode columns: [8, 10, 7, 4, 6, 9, 1, 2, 5, 3] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[10, 8, 7, 4, 9, 1, 5, 6, 3] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7221 end
episode 7222 start
episode columns: [10, 8, 2, 4, 9, 3, 6, 0, 11, 7, 5] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[10, 7, 5, 9, 1, 3, 6, 4] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 7222 end
episode 7223 start
episode columns: [9, 8, 10, 7, 4, 0, 3, 5, 6, 2, 1, 11] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[10, 7, 5, 9, 1, 3, 6, 4] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 7223 end
episode 7224 start
episode columns: [10, 8, 7, 3, 2, 4, 9, 1, 11, 6] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[10, 7, 5, 9, 1, 3, 6, 4] train accuracy: 0.696259259259 test accuracy: 0.6865
episode 7224 end
episode 7225 start
episode columns: [10, 7, 2, 1, 5, 9, 3, 6, 4] epsilon: 0.5 learn

episode 7258 start
episode columns: [9, 7, 0, 3, 6, 10, 5, 4] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[10, 3, 9, 7, 4, 6] train accuracy: 0.69637037037 test accuracy: 0.689333333333
episode 7258 end
episode 7259 start
episode columns: [5, 1, 2, 3, 9, 7, 4, 6, 0, 11, 8, 10] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[10, 3, 9, 7, 4, 6] train accuracy: 0.69637037037 test accuracy: 0.689333333333
episode 7259 end
episode 7260 start
episode columns: [10, 3, 9, 7, 4, 1, 5, 8, 2, 6, 11, 0] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 3, 9, 7, 4, 6] train accuracy: 0.69637037037 test accuracy: 0.689333333333
episode 7260 end
episode 7261 start
episode columns: [10, 3, 9, 4, 5, 6, 7, 0, 11, 8, 2] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[10, 3, 9, 7, 4, 6] train accuracy: 0.69637037037 test accuracy: 0.689333333333
episode 7261 end
episode 7262 start
episode columns: [10, 3, 9, 7, 2, 4, 6] epsilon: 0.5 learning rate: 0.0

episode 7295 start
episode columns: [2, 7, 4, 10, 9, 1, 5, 3, 6, 0, 8] epsilon: 0.5 learning rate: 0.05 error: 0.15
episode policy:[9, 6, 7, 4, 5, 8, 10, 3] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 7295 end
episode 7296 start
episode columns: [9, 6, 7, 4, 8, 10, 3, 2, 5, 11] epsilon: 0.5 learning rate: 0.05 error: 0.2
episode policy:[9, 6, 7, 4, 5, 8, 10, 3] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 7296 end
episode 7297 start
episode columns: [8, 10, 2, 4, 9, 6, 7, 11, 1, 5, 3] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[9, 6, 10, 3] train accuracy: 0.696388888889 test accuracy: 0.689333333333
episode 7297 end
episode 7298 start
episode columns: [9, 6, 10, 7, 4, 5, 8, 3, 2, 11] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[9, 6, 10, 3] train accuracy: 0.696388888889 test accuracy: 0.689333333333
episode 7298 end
episode 7299 start
episode columns: [3, 4, 9, 6, 5, 8, 1] epsilon: 0.5 learning rate: 0.0

episode 7331 start
episode columns: [8, 7, 10, 1, 9, 6, 4, 3, 0, 11, 2, 5] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 4, 9, 6, 7, 1, 5, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7331 end
episode 7332 start
episode columns: [4, 9, 3, 11, 8, 7, 5, 6, 10, 2, 1] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 4, 9, 6, 10, 5, 3] train accuracy: 0.696444444444 test accuracy: 0.689333333333
episode 7332 end
episode 7333 start
episode columns: [8, 4, 9, 3, 10, 5, 6, 7, 1, 11, 0, 2] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 4, 9, 6, 10, 5, 3] train accuracy: 0.696444444444 test accuracy: 0.689333333333
episode 7333 end
episode 7334 start
episode columns: [5, 1, 0, 4, 9, 6, 2, 3, 11, 8, 10, 7] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 4, 9, 6, 10, 5, 3] train accuracy: 0.696444444444 test accuracy: 0.689333333333
episode 7334 end
episode 7335 start
episode columns: [1, 9, 7, 4, 10, 

episode 7366 start
episode columns: [4, 9, 1, 2, 7, 11, 8, 10, 6, 5, 0, 3] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[4, 9, 6, 7, 5, 8, 1, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7366 end
episode 7367 start
episode columns: [4, 3, 9, 6, 0, 2, 11, 8, 1] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[4, 9, 6, 7, 5, 8, 1, 3, 10] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7367 end
episode 7368 start
episode columns: [4, 6, 7, 5, 8, 1, 3, 10, 0, 9, 2, 11] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[4, 9, 6, 7, 5, 8, 10, 3] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 7368 end
episode 7369 start
episode columns: [6, 7, 5, 8, 4, 9] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[4, 9, 6, 7, 5, 8, 10, 3] train accuracy: 0.696537037037 test accuracy: 0.688666666667
episode 7369 end
episode 7370 start
episode columns: [7, 5, 6, 10, 3, 9] epsilon: 0.5 l

episode 7402 start
episode columns: [9, 6, 2, 7, 4, 0, 8, 11, 5, 3, 10, 1] epsilon: 0.5 learning rate: 0.05 error: 0.1
episode policy:[8, 4, 9, 6, 10, 1] train accuracy: 0.696314814815 test accuracy: 0.688333333333
episode 7402 end
episode 7403 start
episode columns: [6, 7, 9, 4, 3, 2, 11, 8, 10, 1] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[8, 4, 9, 6, 10, 1] train accuracy: 0.696314814815 test accuracy: 0.688333333333
episode 7403 end
episode 7404 start
episode columns: [8, 4, 9, 6, 1, 3, 10, 7, 11, 5, 2, 0] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 4, 9, 6, 10, 1, 3, 5] train accuracy: 0.696092592593 test accuracy: 0.686166666667
episode 7404 end
episode 7405 start
episode columns: [8, 5, 6, 10, 9, 2, 3, 0, 1] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[9, 6, 10, 1, 3, 4] train accuracy: 0.696277777778 test accuracy: 0.689166666667
episode 7405 end
episode 7406 start
episode columns: [9, 6, 10, 3, 5, 8, 4, 2, 11, 7] epsilon: 0.

episode 7438 start
episode columns: [7, 6, 4, 9] epsilon: 0.5 learning rate: 0.05 error: 0.05
episode policy:[8, 4, 9, 6, 7, 5, 11, 3, 10, 1] train accuracy: 0.690685185185 test accuracy: 0.673833333333
episode 7438 end
episode 7439 start
episode columns: [8, 4, 9, 6, 7, 5, 0, 10, 3, 2, 11] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 4, 9, 6, 7, 5, 11, 3, 10, 1] train accuracy: 0.690722222222 test accuracy: 0.674
episode 7439 end
episode 7440 start
episode columns: [8, 1, 10, 9, 4, 5, 11, 3, 6, 7] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 4, 9, 6, 7, 5, 3, 10, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7440 end
episode 7441 start
episode columns: [8, 5, 6, 4, 9] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 4, 9, 6, 7, 5, 3, 10, 1] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7441 end
episode 7442 start
episode columns: [2, 5, 6, 3, 11, 4, 9] epsilon: 0.5 learning rate: 0.05 e

episode 7475 start
episode columns: [9, 6, 7, 8, 1, 10, 5, 3, 11, 2, 4] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[9, 6, 4, 10, 1] train accuracy: 0.696351851852 test accuracy: 0.689333333333
episode 7475 end
episode 7476 start
episode columns: [2, 3, 7, 6, 4, 10, 1, 9, 11, 8] epsilon: 0.5 learning rate: 0.05 error: 0.3
episode policy:[9, 6, 4, 10, 1] train accuracy: 0.696351851852 test accuracy: 0.689333333333
episode 7476 end
episode 7477 start
episode columns: [9, 6, 4, 11, 10, 8, 2, 3] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[9, 6, 4, 10, 1] train accuracy: 0.696351851852 test accuracy: 0.689333333333
episode 7477 end
episode 7478 start
episode columns: [2, 5, 3, 4, 10, 1, 0, 8, 11, 6, 7, 9] epsilon: 0.5 learning rate: 0.05 error: 0.6
episode policy:[9, 6, 4, 3, 10, 1] train accuracy: 0.696277777778 test accuracy: 0.689166666667
episode 7478 end
episode 7479 start
episode columns: [9, 6, 1, 0, 2, 3, 10, 8, 4] epsilon: 0.5 learning rate: 0.05 er

episode 7511 start
episode columns: [5, 3, 9, 7, 8, 10, 0, 1, 4, 2, 6, 11] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[5, 3, 9, 7, 8, 10, 1, 4, 2, 6] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7511 end
episode 7512 start
episode columns: [5, 3, 7, 8, 6, 4, 1, 9] epsilon: 0.5 learning rate: 0.05 error: 0.7
episode policy:[5, 3, 9, 7, 1, 10, 6, 4, 2, 8] train accuracy: 0.695574074074 test accuracy: 0.682333333333
episode 7512 end
episode 7513 start
episode columns: [9, 7, 10, 1, 4, 2, 3, 6, 8] epsilon: 0.5 learning rate: 0.05 error: 0.2
episode policy:[5, 3, 9, 7, 1, 10, 6, 4, 2, 8] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7513 end
episode 7514 start
episode columns: [5, 3, 9, 10, 1, 7, 6, 4, 11, 2, 8] epsilon: 0.5 learning rate: 0.05 error: 0.65
episode policy:[5, 3, 9, 7, 1, 10, 6, 4, 2, 8] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7514 end
episode 7515 start
episode columns: [5, 1, 11, 10,

episode 7546 start
episode columns: [3, 9, 4, 2, 6, 0, 8, 7, 1, 5, 10] epsilon: 0.5 learning rate: 0.05 error: 0.6
episode policy:[4, 2, 6, 9, 7, 1, 5, 3, 8, 10] train accuracy: 0.695574074074 test accuracy: 0.682166666667
episode 7546 end
episode 7547 start
episode columns: [8, 11, 5, 0, 4, 2, 1, 9, 7, 6, 3, 10] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[4, 2, 6, 3, 9, 7, 1, 5] train accuracy: 0.696037037037 test accuracy: 0.685666666667
episode 7547 end
episode 7548 start
episode columns: [4, 7, 6, 0, 8, 9] epsilon: 0.5 learning rate: 0.05 error: 0.5
episode policy:[4, 2, 6, 3, 9, 7, 1, 5] train accuracy: 0.696037037037 test accuracy: 0.685666666667
episode 7548 end
episode 7549 start
episode columns: [4, 2, 6, 9, 11, 7, 3, 8, 0] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[4, 9, 7, 6, 3, 8, 2] train accuracy: 0.695962962963 test accuracy: 0.688166666667
episode 7549 end
episode 7550 start
episode columns: [4, 8, 10, 6, 0, 7, 11, 5, 9, 2, 3, 1] epsilo

episode 7583 start
episode columns: [4, 9, 6, 11, 5, 10, 1, 7, 8, 0, 2, 3] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[4, 9, 6, 3, 7, 1, 5] train accuracy: 0.696148148148 test accuracy: 0.6875
episode 7583 end
episode 7584 start
episode columns: [4, 9, 6, 0, 8, 10, 1, 5, 2, 3, 11, 7] epsilon: 0.5 learning rate: 0.05 error: 0.55
episode policy:[4, 9, 6, 3, 7, 1, 5] train accuracy: 0.696148148148 test accuracy: 0.6875
episode 7584 end
episode 7585 start
episode columns: [4, 11, 5, 3, 7, 0, 8, 9, 6, 10, 2] epsilon: 0.5 learning rate: 0.05 error: 0.6
episode policy:[4, 9, 6, 3, 7, 1, 5] train accuracy: 0.696148148148 test accuracy: 0.6875
episode 7585 end
episode 7586 start
episode columns: [4, 3, 10, 1, 5, 6, 0, 9, 11, 2] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[4, 9, 6, 3, 7, 1, 5] train accuracy: 0.696148148148 test accuracy: 0.6875
episode 7586 end
episode 7587 start
episode columns: [4, 9, 6, 3, 11, 2] epsilon: 0.5 learning rate: 0.05 error: 0.45
e

episode 7621 start
episode columns: [4, 11, 2, 3, 10, 6, 8, 9, 1, 5, 0, 7] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 9, 7, 1, 10, 4, 3, 6, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7621 end
episode 7622 start
episode columns: [8, 9, 7, 2, 3, 10, 1, 11, 0, 4, 6, 5] epsilon: 0.5 learning rate: 0.05 error: 0.6
episode policy:[8, 9, 7, 1, 10, 4, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 7622 end
episode 7623 start
episode columns: [8, 9, 5, 11, 2, 3, 10, 4, 7, 0] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 9, 7, 1, 10, 4, 6, 3] train accuracy: 0.696333333333 test accuracy: 0.688833333333
episode 7623 end
episode 7624 start
episode columns: [1, 10, 9, 7, 6, 8, 4, 3, 5, 11, 2] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 9, 7, 1, 10, 4, 6, 3] train accuracy: 0.696351851852 test accuracy: 0.688833333333
episode 7624 end
episode 7625 start
episode columns: [6, 2, 0, 9, 10,

episode 7657 start
episode columns: [8, 4, 11, 6, 1, 10, 9, 5, 0, 7, 3] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 9, 7, 6, 4, 3, 10] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7657 end
episode 7658 start
episode columns: [3, 2, 11, 0, 9, 7, 6, 1, 5, 4] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 9, 7, 6, 4, 3, 10] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7658 end
episode 7659 start
episode columns: [7, 6, 4, 3, 10, 9, 1, 2, 5, 0, 8, 11] epsilon: 0.5 learning rate: 0.05 error: 0.45
episode policy:[8, 9, 7, 6, 4, 3, 10] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7659 end
episode 7660 start
episode columns: [11, 6, 4, 1, 8, 5] epsilon: 0.5 learning rate: 0.05 error: 0.25
episode policy:[8, 9, 7, 6, 4, 3, 10] train accuracy: 0.696518518519 test accuracy: 0.689166666667
episode 7660 end
episode 7661 start
episode columns: [6, 4, 5, 8, 0, 7, 2, 9] epsilon: 0.5 learning ra

episode 7693 start
episode columns: [8, 7, 3, 0, 11, 9, 1, 2, 6, 10, 4] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 9, 7, 3, 10, 4, 6, 1, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7693 end
episode 7694 start
episode columns: [8, 9, 7, 6, 10, 4, 3, 2, 5, 11, 1, 0] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 9, 7, 3, 10, 4, 6, 1, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7694 end
episode 7695 start
episode columns: [8, 9, 11, 6, 5, 7, 3, 10, 4, 2] epsilon: 0.5 learning rate: 0.05 error: 0.35
episode policy:[8, 9, 7, 3, 10, 4, 6, 1, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7695 end
episode 7696 start
episode columns: [4, 1, 9, 8, 10, 6, 2, 5] epsilon: 0.5 learning rate: 0.05 error: 0.4
episode policy:[8, 9, 7, 3, 10, 4, 6, 1, 5] train accuracy: 0.696185185185 test accuracy: 0.685833333333
episode 7696 end
episode 7697 start
episode columns: [9, 7, 8, 10, 4, 6, 5