## AIM-5007-1
## By Zeyu Wang
## Fall 2021
## Mini Project

In [103]:
import pandas as pd
import numpy as np

# 1. Load the data

This dataset comes from https://archive.ics.uci.edu/ml/datasets/seeds. There are 7 attributes and they are 
1. area A,
2. perimeter P,
3. compactness C = 4*pi*A/P^2,
4. length of kernel,
5. width of kernel,
6. asymmetry coefficient
7. length of kernel groove.

In [186]:
data = pd.read_csv('seeds_dataset.txt', header = None, delim_whitespace = 'Ture', dtype = 'float')

In [187]:
data.columns = ['area', 'perimeter', 'compactness', 'length of kernel', 'width of kernel', 'asymmetry coefficient', 'length of kernel groove', 'Category']
data

Unnamed: 0,area,perimeter,compactness,length of kernel,width of kernel,asymmetry coefficient,length of kernel groove,Category
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1.0
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1.0
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1.0
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1.0
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1.0
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,3.0
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,3.0
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,3.0
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,3.0


# 2. Clean data
In this part, we'll find out the nan and null data in the dataset and remove them.

In [188]:
data.isna().values.any()

False

In [189]:
data.isnull().values.any()

False

In [190]:
data[data==' '].sum()

area                       0.0
perimeter                  0.0
compactness                0.0
length of kernel           0.0
width of kernel            0.0
asymmetry coefficient      0.0
length of kernel groove    0.0
Category                   0.0
dtype: float64

In this dataset, there isn't any null and nan data.

# 3. Split into train and test dataset
Here my idea is to use NumPy.random to pick up 150 data from the dataset and let them be trained data and then use np.setdiff1d to choose the rest of the data as the test data.

In [191]:
#Because our network has three outputs, so we need to use one-hot encoding to encode the Category before splitting the data.
one_hot = pd.get_dummies(data.Category)
one_hot.columns = ['A', 'B', 'C']
data = data.drop('Category', axis = 1)
data = data.join(one_hot)

In [192]:
np.random.seed(2021)

In [193]:
list_index = np.array([i for i in range(len(data))])

In [194]:
#I random choose 150 data from list_index and let them not replace
train_index = np.random.choice(list_index, 150, replace = False)

In [195]:
#The rest of data as test data
test_index = np.setdiff1d(list_index, train_index)

Reset index of training and testing data in order to avoid the error later

In [196]:
train = data.loc[train_index]
train = train.reset_index(drop = True)
train.head()

Unnamed: 0,area,perimeter,compactness,length of kernel,width of kernel,asymmetry coefficient,length of kernel groove,A,B,C
0,14.49,14.61,0.8538,5.715,3.113,4.116,5.396,1,0,0
1,16.12,15.0,0.9,5.709,3.485,2.27,5.443,1,0,0
2,18.3,15.89,0.9108,5.979,3.755,2.837,5.962,0,1,0
3,15.78,14.91,0.8923,5.674,3.434,5.593,5.136,1,0,0
4,12.3,13.34,0.8684,5.243,2.974,5.637,5.063,0,0,1


In [197]:
test = data.loc[test_index]
test = test.reset_index(drop = True)
test.head()

Unnamed: 0,area,perimeter,compactness,length of kernel,width of kernel,asymmetry coefficient,length of kernel groove,A,B,C
0,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1,0,0
1,14.38,14.21,0.8951,5.386,3.312,2.462,4.956,1,0,0
2,14.11,14.1,0.8911,5.42,3.302,2.7,5.0,1,0,0
3,14.03,14.16,0.8796,5.438,3.201,1.717,5.001,1,0,0
4,15.69,14.75,0.9058,5.527,3.514,1.599,5.046,1,0,0


# 4.Build model

I copy some code from my homework3 and use them to form a complete model. This function includes initiating weight, calculating output, gradient, and so on.

In [198]:
def weight(inputs, hiddends, outputs):
    '''
    This function will create  weights for DL and you need to give it the numbers of input, hiddents and outputs.
    -------------------------------------------------------------------------------------------------
    This function will return you two matrix that including the weight_input and weight_output
    '''
    W_int = np.random.randint(-5, 5, (inputs,hiddends))/10
    bias_input = list(np.random.random(1))
    bias_inputs = np.array(bias_input*hiddends)
    W_int = np.concatenate((W_int, [bias_inputs]), axis=0)
    
    W_out = np.random.randint(-5, 5, (hiddends,outputs))/10
    bias_out = list(np.random.random(1))
    bias_outs = np.array(bias_out*outputs)
    W_out = np.concatenate((W_out, [bias_outs]), axis=0)
    return W_int, W_out

In [199]:
def appendVector(vector):
    '''
    This function is used to extend 1 dimension for vector.
    '''
    vector1 = vector.copy()
    vector1.append(1)
    return vector1

In [200]:
def obtainRawHidden(W_int, W_out, inpis):
    '''
    This function is used to calculate the hidden layer's raw data in a network.
    '''
    inputs_nodes = len(inpis)
    append_f = np.array(appendVector(inpis))
    hraw = append_f.T@W_int
    return hraw

In [201]:
def sigmoidFun(raw):
    '''
    This function is the activate function that is used to transfer the raw data to out data.
    '''
    return 1/(1+(np.exp(-raw)))

In [202]:
def obtainOutput(W_int, W_out, inpis):
    '''
    This function will be used to calculate the function final output by using the output of hidden layer data times the weight.
    '''
    hraw = obtainRawHidden(W_int, W_out, inpis)
    hact = sigmoidFun(hraw)
    hact1 = np.array(appendVector(list(hact)))
    outraw = hact1.T@W_out
    out = sigmoidFun(outraw)
    return out, hact

In [203]:
def gradient_E_Wjk(output, targk, hact):
    '''
    This function will be used to calculate the gradient of weight between the hidden layer and output.
    '''
    gradient_out = []
    gradient_a = (output-targk)*output*(1-output)
    for i in gradient_a:
        gradient_out.append(i*hact)
    gradient_out = np.array(gradient_out)
    gradient_out = gradient_out.T
    return gradient_out

In [204]:
def gradient_E_biasO(output, targk):
    '''
    This function is used to calculate the gradient of bias between the hidden layer and output.
    '''
    gradient_a = ((output-targk)*output*(1-output)).sum()
    return gradient_a

In [205]:
def gradient_E_wij(output, targk, W_out, hact, inpis):
    '''
    This function will be used to calculate the gradient of weight between the input and hidden layer.
    '''
    gradient_a = ((output-targk)*output*(1-output)*W_out).sum()*hact*(1-hact)
    
    gradient_int = []
    int_np = np.array(inpis)
    for i in gradient_a:
        gradient_int.append(i*int_np)
    gradient_int = np.array(gradient_int)
    gradient_int = gradient_int.T
    return gradient_int

In [206]:
def gradient_E_biasH(output, targk, W_out, hact):
    '''
    This function is used to calculate the gradient of bias between the input and hidden layer.
    '''
    gradient_a = (((output-targk)*output*(1-output)*W_out).sum()*hact*(1-hact)).sum()
    return gradient_a

In [207]:
def update_wij(output, targk, W_out, hact, W_int, inpis, alpha):
    '''
    This function will be used to update the weight between the input and hidden layer.
    '''
    g_e_biasH_list = []
    g_E_wij = gradient_E_wij(output, targk, W_out, hact, inpis)
    g_E_biasH = gradient_E_biasH(output, targk, W_out, hact)
    g_E_wij_rows, g_E_wij_columns = g_E_wij.shape
    g_e_biasH_list.append(g_E_biasH)
    g_e_biasH_list = g_e_biasH_list*g_E_wij_columns
    g_E_ij = np.concatenate((g_E_wij, [g_e_biasH_list]), axis = 0)
    new_wij = W_int-alpha*g_E_ij
    return new_wij

In [208]:
def update_Wjk(output, targk, hact, W_out, alpha):
    '''
    This function will be used to update the weight between the hidden layer and output.
    '''
    g_e_biasO_list = []
    g_E_Wjk = gradient_E_Wjk(output, targk, hact)
    g_E_biasO = gradient_E_biasO(output, targk)
    g_E_Wjk_rows, g_E_Wjk_columns = g_E_Wjk.shape
    g_e_biasO_list.append(g_E_biasO)
    g_e_biasO_list = g_e_biasO_list*g_E_Wjk_columns
    g_E_jk = np.concatenate((g_E_Wjk, [g_e_biasO_list]), axis = 0)
    new_Wjk = W_out-alpha*g_E_jk
    return new_Wjk

In [332]:
def encode_output(output):
    '''
    This function is used to encode the output and let they become the one-hot encoding.
    In this function, the highest values will become 1 and the others become 0
    '''
    new_output = []
    maximun_index = np.argmax(np.array(output))
    for i in range(len(output)):
        if i==maximun_index:
            new_output.append(1)
        else:
            new_output.append(0)
    return new_output

In [413]:
def createOutput_pandas(data_dl, W_int, W_out, inpis):
    '''
    This is function will be used to create a big output panda.
    In this function, I will calculate the output and then put them back to the pandas 
    and use Out_A, Out_B and Out_C as the new columns.
    '''
    #save data
    output_data = data_dl.copy()
    
    #Save output data
    Out_A = []
    Out_B = []
    Out_C = []
    hact_list = []
    
    #calculate output
    for i in range(len(data_dl)):
        inpis = list(data_dl.iloc[i,:-3])
        out, hact = obtainOutput(W_int, W_out, inpis)
        #Use one-hot encoding to encode the output
        out_onehot = encode_output(out)
        Out_A.append(out[0])
        Out_B.append(out[1])
        Out_C.append(out[2])
        hact_list.append(hact)
    output_data['Out_A'] = Out_A
    output_data['Out_B'] = Out_B
    output_data['Out_C'] = Out_C
    output_data['hact_list'] = hact_list
    output_data['error'] = 0.5*(((output_data.Out_A-output_data.A)**2)+((output_data.Out_B-output_data.B)**2)+((output_data.Out_C-output_data.C)**2))
    
    return output_data

# 5. Train model

Here is to start to combine several functions above and train the training data. By the way, the model has the best performance with the hyperparameter alpha = 0.06, hidden_nodes = 7 and output_nodes = 3. But this model still has a probability to fail in train and I suppose maybe there is an outlier in the data and needs to be removed. But this model will have a about 1/3 probability to run successfully. And the best error is greater than 11 and the higher accuracy is less than 92. The fail running means is the model is hard to convergence.

In this process, I will imitate the perception algorithm. Firstly, I will randomly pick up data to calculate and the result. Then use this result to calculate gradient, output, and error. Then calculate all other rows data's predict output. Select those data that are different from the target data. Randomly choose one of the rows of data from this error prediction result data and use this row data to calculate and update the weight. Calculate the accuracy after each 100 training process. If the accuracy, steps, and error meet the requirement, stop training and output the weight of the input and hidden layer. Then use this weight list to test and calculate some information about the test.

In [530]:
def DeelLearning(data_dl):
    '''
    This function will use all of the functions above to learning and update the weight.
    -------------------------------------------------------------------------------------
    This function will return the two new weights. We can use these two weights to predict.
    '''
    # Hyperparameter
    hidden_nodes = 7
    output_nodes = 3
    #alpha = 0.04 error>30
    #alpha = 0.06 error>11 accuracy <92
    #alpha = 0.08 error>29
    alpha = 0.06
    
    ##get target list
    onehot_targe_list = []
    for i in range(len(data_dl)):
        targe_onthot = data_dl.iloc[i, -3:]
        onehot_targe_list.append(list(targe_onthot))
    onehot_targe_list = np.array(onehot_targe_list)
    
    # randomly extract data from dataset and use it to start
    randon_data = data_dl.sample().values[0]
    targk = list(randon_data[-3:])
    inpis = list(randon_data[:-3])
    inputs_nodes = len(inpis)
    
    #Get the first weight of input and hidden layer
    W_int, W_out = weight(inputs_nodes, hidden_nodes, output_nodes)
    
    #calculate the output data
    output_data = createOutput_pandas(data_dl, W_int, W_out, inpis)
    
    #Calculate_error
    error = output_data.error.sum()
    
    step = 1
    accuracy = 0
    # If the error>3 and doesn't reach 100000 and accuracy less than 0.9, keep training
    while error>3 and step<100000 and accuracy<.90:
        #Pick up data which its result is incorrect
        incorrct_result = output_data[output_data.error!=0]
        incorrct_result = incorrct_result.reset_index(drop = True)
        
        #Random pick up one row incorrect result data
        one_row_data = incorrct_result.sample().values[0]
        #Extract input and output
        targk = list(one_row_data[7:10])
        inpis = list(one_row_data[:7])
        hact = one_row_data[-2]
        out = []
        out.append(one_row_data[-5])
        out.append(one_row_data[-4])
        out.append(one_row_data[-3])
        
        #update weight and bias
        new_wij = update_wij(np.array(out), np.array(targk), W_out, hact, W_int, inpis, alpha)
        new_Wjk = update_Wjk(np.array(out), np.array(targk), hact, W_out, alpha)
        W_int, W_out = new_wij, new_Wjk
        
        #Get calculate output result and calculate the error
        output_data = createOutput_pandas(data_dl, W_int, W_out, inpis)
        error = output_data.error.sum()
        step+=1
        
        #calculate accuracy and print accuracy after each 100 trainning, so the epoch=100
        if step%100 == 0:
            #calculate accuray
            ##get onehot output
            onehot_output_list = []
            for i in range(len(output_data)):
                out_onthot = output_data.iloc[i, -5:-2]
                out_onthot = encode_output(out_onthot)
                onehot_output_list.append(out_onthot)
            compare_list = np.equal(onehot_targe_list,onehot_output_list)
            compare_result = []
            for i in compare_list:
                if list(i)==[True, True, True]:
                    compare_result.append(1)
                else:
                    compare_result.append(0)
            compare_result = np.array(compare_result)
            accuracy = len(compare_result[compare_result==1])/len(compare_result)
            print(f'error = {error}, accuracy = {accuracy}')
            
    #If the accuracy>=0.9, calculate its Confusion Matrix, Precision, Recall and F1 score
    if accuracy>=0.9:
        Confusion_Matrix_test = [[0, 0, 0],[0, 0, 0], [0, 0, 0]]
        for i in range(len(onehot_targe_list)):
            if onehot_targe_list[i][0]==1:
                if onehot_output_list[i][0] == 1:
                    Confusion_Matrix_test[0][0]+=1
                elif onehot_output_list[i][1] == 1:
                    Confusion_Matrix_test[0][1]+=1
                else:
                    Confusion_Matrix_test[0][2]+=1
            elif onehot_targe_list[i][1]==1:
                if onehot_output_list[i][0] == 1:
                    Confusion_Matrix_test[1][0]+=1
                elif onehot_output_list[i][1] == 1:
                    Confusion_Matrix_test[1][1]+=1
                else:
                    Confusion_Matrix_test[1][2]+=1
            elif onehot_targe_list[i][2]==1:
                if onehot_output_list[i][0] == 1:
                    Confusion_Matrix_test[2][0]+=1
                elif onehot_output_list[i][1] == 1:
                    Confusion_Matrix_test[2][1]+=1
                else:
                    Confusion_Matrix_test[2][2]+=1
            
        print(f'W_int = {W_int}, W_out = {W_out}')
        print(f'Train {step} steps, the epochs={100}, final error = {error}, accuracy = {accuracy}')
        print(f'Confusion_Matrix_test = {Confusion_Matrix_test}')
        #for class1
        calss1_Precision = Confusion_Matrix_test[0][0]/(Confusion_Matrix_test[0][0]+Confusion_Matrix_test[0][1]+Confusion_Matrix_test[0][2])
        class1_Recall = Confusion_Matrix_test[0][0]/(Confusion_Matrix_test[0][0]+Confusion_Matrix_test[1][0]+Confusion_Matrix_test[2][0])
        print(f'For calss1, Precision = {calss1_Precision}')
        print(f'For calss1, Recall = {class1_Recall}')
        print(f'For calss1, F1_score = {(2*calss1_Precision*class1_Recall)/(calss1_Precision+class1_Recall)}')
        
        #for class2
        calss2_Precision = Confusion_Matrix_test[1][1]/(Confusion_Matrix_test[1][0]+Confusion_Matrix_test[1][1]+Confusion_Matrix_test[1][2])
        class2_Recall = Confusion_Matrix_test[1][1]/(Confusion_Matrix_test[0][1]+Confusion_Matrix_test[1][1]+Confusion_Matrix_test[2][1])
        print(f'For calss1, Precision = {calss2_Precision}')
        print(f'For calss1, Recall = {class2_Recall}')
        print(f'For calss1, F1_score = {(2*calss2_Precision*class2_Recall)/(calss2_Precision+class2_Recall)}')
        
        #for class3
        calss3_Precision = Confusion_Matrix_test[2][2]/(Confusion_Matrix_test[2][0]+Confusion_Matrix_test[2][1]+Confusion_Matrix_test[2][2])
        class3_Recall = Confusion_Matrix_test[2][2]/(Confusion_Matrix_test[0][2]+Confusion_Matrix_test[1][2]+Confusion_Matrix_test[2][2])
        print(f'For calss1, Precision = {calss3_Precision}')
        print(f'For calss1, Recall = {class3_Recall}')
        print(f'For calss1, F1_score = {(2*calss3_Precision*class3_Recall)/(calss3_Precision+class3_Recall)}')
        
    return W_int, W_out

In [531]:
W_in, W_out = DeelLearning(train)

error = 51.27531351445661, accuracy = 0.32666666666666666
error = 49.9494469544596, accuracy = 0.32666666666666666
error = 48.9921646100386, accuracy = 0.64
error = 49.70002358441548, accuracy = 0.3466666666666667
error = 49.94225774318171, accuracy = 0.3466666666666667
error = 50.35473390923373, accuracy = 0.3466666666666667
error = 48.18186639822803, accuracy = 0.36666666666666664
error = 48.49189500248838, accuracy = 0.52
error = 47.10027532477841, accuracy = 0.3466666666666667
error = 46.341341522459395, accuracy = 0.4266666666666667
error = 45.612039790800836, accuracy = 0.5333333333333333
error = 46.265800801860784, accuracy = 0.4266666666666667
error = 43.60669336057228, accuracy = 0.6266666666666667
error = 43.6325431535637, accuracy = 0.6733333333333333
error = 44.747099890489494, accuracy = 0.8
error = 46.150145802307804, accuracy = 0.38666666666666666
error = 44.194227716897174, accuracy = 0.54
error = 44.33816058887176, accuracy = 0.43333333333333335
error = 47.856458277286

# 6. Test the testing data

After I ran part 5, I got the weight of the input and hidden layer. I used these parameters to calculate the output result of the test dataset. 

In [574]:
def test_testdate(test, W_int, W_out):
    hidden_nodes = 7
    output_nodes = 3
    output_data = createOutput_pandas(test, W_int, W_out, 1)
    error = output_data.error.sum()
    
    ##get target data list from test dataset 
    onehot_targe_list = []
    for i in range(len(test)):
        targe_onthot = test.iloc[i, -3:]
        onehot_targe_list.append(list(targe_onthot))
    onehot_targe_list = np.array(onehot_targe_list)
    
    ## Calculate and get the predict output
    onehot_output_list = []
    for i in range(len(output_data)):
        out_onthot = output_data.iloc[i, -5:-2]
        out_onthot = encode_output(out_onthot)
        onehot_output_list.append(out_onthot)
    compare_list = np.equal(onehot_targe_list,onehot_output_list)
    compare_result = []
    for i in compare_list:
        if list(i)==[True, True, True]:
            compare_result.append(1)
        else:
            compare_result.append(0)
    compare_result = np.array(compare_result)
    accuracy = len(compare_result[compare_result==1])/len(compare_result)
    
    #Calculate the Confusion Matrix, Precision, Recall and F1 score.
    Confusion_Matrix_test = [[0, 0, 0],[0, 0, 0], [0, 0, 0]]
    for i in range(len(onehot_targe_list)):
        if onehot_targe_list[i][0]==1:
            if onehot_output_list[i][0] == 1:
                Confusion_Matrix_test[0][0]+=1
            elif onehot_output_list[i][1] == 1:
                Confusion_Matrix_test[0][1]+=1
            else:
                Confusion_Matrix_test[0][2]+=1
        elif onehot_targe_list[i][1]==1:
            if onehot_output_list[i][0] == 1:
                Confusion_Matrix_test[1][0]+=1
            elif onehot_output_list[i][1] == 1:
                Confusion_Matrix_test[1][1]+=1
            else:
                Confusion_Matrix_test[1][2]+=1
        elif onehot_targe_list[i][2]==1:
            if onehot_output_list[i][0] == 1:
                Confusion_Matrix_test[2][0]+=1
            elif onehot_output_list[i][1] == 1:
                Confusion_Matrix_test[2][1]+=1
            else:
                Confusion_Matrix_test[2][2]+=1
            
    print(f'W_int = {W_int}')
    print(f'W_out = {W_out}')
    print(f'the epochs={100}, final error = {error}, accuracy = {accuracy}')
    print(f'Confusion_Matrix_test = {Confusion_Matrix_test}')
    #for class1
    calss1_Precision = Confusion_Matrix_test[0][0]/(Confusion_Matrix_test[0][0]+Confusion_Matrix_test[0][1]+Confusion_Matrix_test[0][2])
    class1_Recall = Confusion_Matrix_test[0][0]/(Confusion_Matrix_test[0][0]+Confusion_Matrix_test[1][0]+Confusion_Matrix_test[2][0])
    print(f'For calss1, Precision = {calss1_Precision}')
    print(f'For calss1, Recall = {class1_Recall}')
    print(f'For calss1, F1_score = {(2*calss1_Precision*class1_Recall)/(calss1_Precision+class1_Recall)}')

    #for class2
    calss2_Precision = Confusion_Matrix_test[1][1]/(Confusion_Matrix_test[1][0]+Confusion_Matrix_test[1][1]+Confusion_Matrix_test[1][2])
    class2_Recall = Confusion_Matrix_test[1][1]/(Confusion_Matrix_test[0][1]+Confusion_Matrix_test[1][1]+Confusion_Matrix_test[2][1])
    print(f'For calss1, Precision = {calss2_Precision}')
    print(f'For calss1, Recall = {class2_Recall}')
    print(f'For calss1, F1_score = {(2*calss2_Precision*class2_Recall)/(calss2_Precision+class2_Recall)}')

    #for class3
    calss3_Precision = Confusion_Matrix_test[2][2]/(Confusion_Matrix_test[2][0]+Confusion_Matrix_test[2][1]+Confusion_Matrix_test[2][2])
    class3_Recall = Confusion_Matrix_test[2][2]/(Confusion_Matrix_test[0][2]+Confusion_Matrix_test[1][2]+Confusion_Matrix_test[2][2])
    print(f'For calss1, Precision = {calss3_Precision}')
    print(f'For calss1, Recall = {class3_Recall}')
    print(f'For calss1, F1_score = {(2*calss3_Precision*class3_Recall)/(calss3_Precision+class3_Recall)}')

In [575]:
test_testdate(test, W_in, W_out)

W_int = [[-2.79410326e+00  2.86094243e+00 -3.33721350e-01 -1.05607658e+00
  -5.06310314e-01 -1.90208597e-01 -9.56754646e-02]
 [ 3.47859367e+00 -7.92083238e-01 -4.40615652e-01  2.36467012e+00
  -3.07809729e-01  6.16114554e-01 -3.96605412e-01]
 [ 9.25908557e-01 -2.54504156e-01  9.73645193e-02  4.86274602e-01
   3.99490825e-01 -4.86118651e-01  2.03490942e-04]
 [ 1.50332871e+00 -1.31772503e+00 -4.16381061e-01  1.15473336e+00
  -4.03168448e-01  1.82710787e-01 -3.98811378e-01]
 [ 1.37739079e+00  8.93313455e-01 -5.08358340e-01  7.15672379e-01
  -1.01579381e-01 -2.49875773e-01 -2.99063027e-01]
 [-2.65842922e+00 -2.49908889e+00  1.82807638e-01 -3.03017744e+00
  -3.29407189e-03  4.23113727e-01 -3.00547166e-01]
 [-2.75553033e+00 -2.89564993e+00  2.83280314e-01 -1.18626480e+00
  -3.29580107e-03  2.71476529e-01 -2.99321781e-01]
 [ 1.56150101e+00  1.56150101e+00  1.56150101e+00  1.56150101e+00
   1.56150101e+00  1.56150101e+00  1.56150101e+00]]
W_out = [[ 2.59431271 -3.57457346  0.8029079 ]
 [ 2.075

Finally, We got a not bad accuracy Precision, Recall and F1_score on test dataset.