---

<center>
    
## Knowledge Recommendation Processing System
    
### Spelix Inc. R&D Center


---

In [1]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import sys,os

tf.set_random_seed(777)

def data_preprocessing(path='./inputdata/index_model.csv'):
    
    xy = np.loadtxt(path, delimiter=',', dtype=np.float32)
    X_data = xy[:, 1:-1]
    y_data2 = xy[:, [-1]]
    y_data_temp,y_data=[],[]
    
    for i in range(len(y_data2)):
        temp=[]
        if y_data2[i][0] < 5: temp.append(y_data2[i][0])
        else : temp.append(5)
        y_data_temp.append(temp)

    y_data = np.asarray(y_data_temp,dtype=np.float32)
        
    x_train_data, x_test,  y_train_data, y_test = train_test_split(X_data, y_data, test_size=0.2)
    x_train, x_valid,  y_train, y_valid = train_test_split(x_train_data, y_train_data)
    
    nb_classes = len(np.unique(y_data))
    x_colum = X_data.shape[1]
    
    return x_train,y_train,x_test,y_test,x_valid,y_valid,nb_classes,x_colum,X_data,y_data,y_data2

def sigma(x):
    # sigmoid function
    # σ(x) = 1 / (1 + exp(-x))
    return 1. / (1. + tf.exp(-x))

def sigma_prime(x):
    # derivative of the sigmoid function
    # σ'(x) = σ(x) * (1 - σ(x))
    return sigma(x) * (1. - sigma(x))

def data_embedding(nb_classes,x_colum):
    
    X = tf.placeholder(tf.float32, [None, x_colum])
    y = tf.placeholder(tf.int32, [None, 1])

    target = tf.one_hot(y, nb_classes)
    target = tf.reshape(target, [-1, nb_classes])
    target = tf.cast(target, tf.float32)
    
    Y_one_hot = tf.one_hot(y, nb_classes)  
    Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
    
    return X, y, target,Y_one_hot

def layer_structed(X, y, target, nb_classes, x_colum):
    
    keep_prob = tf.placeholder(tf.float32)
    
    W1 = tf.get_variable("W1", shape=[x_colum, x_colum],initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.Variable(tf.random_normal([x_colum]), name='bias1')
    l1 = tf.sigmoid(tf.matmul(X, W1) + b1)
    l1 = tf.nn.dropout(l1, keep_prob=keep_prob)

    W2 = tf.get_variable("W2", shape=[x_colum, x_colum],initializer=tf.contrib.layers.xavier_initializer())
    b2 = tf.Variable(tf.random_normal([x_colum]), name='bias2')
    l2 = tf.sigmoid(tf.matmul(l1, W2) + b2)
    l2 = tf.nn.dropout(l2, keep_prob=keep_prob)

    W3 = tf.get_variable("W3", shape=[x_colum, x_colum],initializer=tf.contrib.layers.xavier_initializer())
    b3 = tf.Variable(tf.random_normal([x_colum]), name='bias3')
    l3 = tf.sigmoid(tf.matmul(l2, W3) + b3)
    l3 = tf.nn.dropout(l3, keep_prob=keep_prob)

    W4 = tf.get_variable("W4", shape=[x_colum, x_colum],initializer=tf.contrib.layers.xavier_initializer())
    b4 = tf.Variable(tf.random_normal([x_colum]), name='bias4')
    l4 = tf.sigmoid(tf.matmul(l3, W4) + b4)
    l4 = tf.nn.dropout(l2, keep_prob=keep_prob)

    W5 = tf.get_variable("W5", shape=[x_colum, nb_classes],initializer=tf.contrib.layers.xavier_initializer())
    b5 = tf.Variable(tf.random_normal([nb_classes]), name='bias5')
    #     y_pred = tf.sigmoid(tf.matmul(l4, W5) + b5)
    
    # Forward propagtion
    layer_5 = tf.matmul(X, W5) + b5
    y_pred = sigma(layer_5)
    
    return W5, b5, layer_5, y_pred, keep_prob

def loss_function(target,y_pred):
    # Loss Function (end of forwad propagation)
    loss_i = - target * tf.log(y_pred) - (1. - target) * tf.log(1. - y_pred)
    loss = tf.reduce_mean(loss_i)
    # Dimension Check
    assert y_pred.shape.as_list() == target.shape.as_list()
    return loss

def optimizer(y_pred,target,layer_5,X):
    # Back prop
    d_loss = (y_pred - target) / (y_pred * (1. - y_pred) + 1e-7)
    d_sigma = sigma_prime(layer_5)
    d_b = d_loss * d_sigma #d_layer
    d_W = tf.matmul(tf.transpose(X), d_b)
    return d_b, d_W

def pred_to_list(pred):
    pred_list=[]
    for i in range(len(pred)):
        temp=[]
        temp.append(pred[i])
        pred_list.append(temp)
    return pred_list

def pred_by_restore(checkpoint_path, W5, b5, X, temp, y):
    
    predict_list=[]
    
    #hypothesis
    hypothesis = tf.nn.sigmoid(tf.matmul(X, W5) + b5)
    
    #prediction
    prediction = tf.argmax(hypothesis, 1) 
    
    #sess
    sess = tf.Session()
    
    #restore
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path))

    pred = sess.run(prediction, feed_dict={ X : temp})
    pred_list = sess.run(hypothesis,  feed_dict={ X: temp, y: pred_to_list(pred)}).tolist()

    for i in range(len(pred_list)):
        temp=[]
        pred_list_sort, pred_list_index = sorted(pred_list[i],reverse=True),[]
        
        for j in range(len(pred_list[i])):
            pred_list_index.append(pred_list[i].index(pred_list_sort[j]))
            
        temp.append(pred_list_sort)
        temp.append(pred_list_index)
        predict_list.append(temp)
        
    return predict_list

def hyun(y,model_0,model_1,model_2,model_3,model_4):
    
    final_rank=[]
    
    for k in range(1):
        k=400
        final_rank_temp, final_temp=[], []
        
        model_index=[model_0[k][1],model_1[k][1],model_2[k][1],model_3[k][1],model_4[k][1]]
        model_pers=[model_0[k][0],model_1[k][0],model_2[k][0],model_3[k][0],model_4[k][0]]
        print(model_index)
        print(model_pers)
        
        for i in range(len(model_index)):
            if i != (len(model_index)-1):
                for j in range(6):
                    temp=[]
                    if model_index[i][j] != 5:
                        temp.append((i*5)+j)
                        temp.append(model_pers[i][j])
                        final_temp.append(temp)
                    else:
                        if i==0:rr=j
                        break
            else:
                for j in range(6):
                    temp=[]
                    temp.append((i*5)+j)
                    temp.append(model_pers[i][j])
                    final_temp.append(temp)
            print(final_temp)
                
        final_rank_temp.append(final_temp[0][0])
        print(final_temp[0][0])
        final_rank_temp.append(final_temp[:6])
        print(final_temp[:6])
        final_rank.append(final_rank_temp)
    print(final_rank)
    
    return final_rank

def hyun2(model_0,model_1,model_2,model_3,model_4):
    model_list=[]
    for i in range(len(model_0)):
        model_list_temp=[]
        frist_intserrup=0
        for j0 in range(6):
            if model_0[i][1][j0] == 5 :
                frist_intserrup=j0
                break
            else :model_list_temp.append(model_0[i][1][j0])
        for j1 in range(6):
            if model_1[i][1][j1] == 5 :break
            else :model_list_temp.append(model_1[i][1][j1]+5)
        for j2 in range(6):
            if model_2[i][1][j2] == 5 :break
            else :model_list_temp.append(model_2[i][1][j2]+10)
        for j3 in range(6):
            if model_3[i][1][j3] == 5 :break
            else :model_list_temp.append(model_3[i][1][j3]+15)
        for j4 in range(6):
            if model_4[i][0][j4] < 0.5 :
                for j5 in range(6-len(model_list_temp)):
                    try :
                        model_list_temp.append(model_0[i][1][frist_intserrup+j5+1])
                    except IndexError:
                        pass
            else : model_list_temp.append(model_4[i][1][j4]+20)
        model_list_temp=model_list_temp[:6]
        model_list.append(model_list_temp)
    return model_list

def save_csv(path,y_data,final_rank):
    
    pre_list=[]
    bool_list=[]
    
    for i in range(len(final_rank)):
        pre_list.append(final_rank[i])
        bool_list.append(final_rank[i] == y_data.flatten()[i])

    my_dict = {"Y": y_data.flatten(), "Pre": pre_list, "c": bool_list}
    df = pd.DataFrame(my_dict)
    
    df.to_csv(path, encoding='euc-kr')
    
def save_csv2(path,y_data,final_rank):
    
    new_df = pd.DataFrame(columns=['rank_0', 'rank_1','rank_2','rank_3','rank_4','rank_5'],
                          data=final_rank)
    
    y_data2=[]
    for i in range(len(y_data)):
        y_data2.append(y_data[i][0])
    
    new_df['real_Y'] = y_data2
    
    new_df['bool_result'] = (new_df['rank_0'] == new_df['real_Y']) | (new_df['rank_1'] == new_df['real_Y']) | (new_df['rank_2'] == new_df['real_Y']) | (new_df['rank_3'] == new_df['real_Y'])  | (new_df['rank_4'] == new_df['real_Y'])
    
    new_df.to_csv(path, encoding='euc-kr', index=False)
    
    return new_df

def print_predict(df):
    
    a=len(df.bool_result)
    b=len(df[df['bool_result']==True])
    c=(int(b)/int(a))*100
    t1,t2=[],[]
    
    for i in range(5):
        t1.append(len(df[df['real_Y']==df[('rank_{0}'.format(i))]]))

    for i in range(5):
        t2.append((int(t1[i])/int(a))*100)
              
    print("result_count:", a)
    print("----------------------")
    print("result_ture_count:", b)
    print("----------------------")
    print("acc(%):", c)
              
    for i in range(5):
        print('-----------------------')
        print('rank_{0}_acc : {1} %' .format(i,t2[i]))
    
def train(x_train,y_train,x_test,y_test,x_valid,y_valid,nb_classes,x_colum): #back_propagtion
    
    learning_rate = 0.0000005
    global_step = 500001
    valid_step = 10001
    view_step = 5000
    saver_step = 10000
    
    #data_embedding
    X, y, target,Y_one_hot = data_embedding(nb_classes,x_colum)
    
    #layer_structed
    W5, b5, layer_5, y_pred, keep_prob = layer_structed(X, y, target, nb_classes, x_colum)
    
    #loss_function
    loss = loss_function(target,y_pred)

    #optimizer
    d_b, d_W = optimizer(y_pred, target, layer_5, X)
    
    # Train
    # Updating network using gradients
    train_step = [
        tf.assign(W5, W5 - learning_rate * d_W),
        tf.assign(b5, b5 - learning_rate * tf.reduce_sum(d_b)),]

    # Prediction and Accuracy
    prediction = tf.argmax(y_pred, 1)
    acct_mat = tf.equal(tf.argmax(y_pred, 1), tf.argmax(target, 1))
    acct_res = tf.reduce_mean(tf.cast(acct_mat, tf.float32))

    saver = tf.train.Saver()
    sess = tf.Session()

    sess.run(tf.global_variables_initializer())
    
    train_epoch=[[global_step, x_train, y_train],
                [valid_step, x_valid, y_valid]]
    
    for i in range(len(train_epoch)) :
        for step in range(train_epoch[i][0]):
            sess.run(train_step, feed_dict={X: train_epoch[i][1], y: train_epoch[i][2], keep_prob: 0.6})

            if step % view_step == 0:
                step_loss, acc = sess.run([loss, acct_res], feed_dict={X:x_test, y:y_test})
                print("Step: {:5}\t Loss: {:10.5f}\t Acc: {:.2%}" .format(step, step_loss, acc))
            if step % saver_step == 0:
                saver.save(sess, './model_test/', global_step=step)

    pred = sess.run(prediction, feed_dict={X: x_test, keep_prob: 1})

def load(nb_classes,x_colum,X_data,y_data,y_data2,path):

    #input_data
    path='./input_data/index_model.csv'
    xy = np.loadtxt(path, delimiter=',', dtype=np.float32)
    xy2=xy[0].astype(np.int)
    xy3=xy2[1:-1]
    temp=''
    for i in range(len(xy3)):
        temp += (str(xy3[i])+',')
    temp=temp[:-1]
    temp2 = temp.split(",")
    temp3 = np.array(temp2)
    temp4 = temp3.astype(np.float32)
    df_input = (pd.DataFrame(temp4)).T
    
    #data_embedding
    X, y, target,Y_one_hot = data_embedding(nb_classes,x_colum)
    
    #layer_structed
    W5, b5, layer_5, y_pred, keep_prob = layer_structed(X, y, target, nb_classes, x_colum)
    
    model_0 = pred_by_restore('./model/model_0',W5, b5, X, df_input,y)
    model_1 = pred_by_restore('./model/model_1',W5, b5, X, df_input,y)
    model_2 = pred_by_restore('./model/model_2',W5, b5, X, df_input,y)
    model_3 = pred_by_restore('./model/model_3',W5, b5, X, df_input,y)
    model_4 = pred_by_restore('./model/model_4',W5, b5, X, df_input,y)
    
    model_list = hyun2(model_0,model_1,model_2,model_3,model_4)
    
    return(model_list[0])

---

### Data Pre-Processing

In [2]:
#Data Pre_processing

x_train,y_train,x_test,y_test,x_valid,y_valid,nb_classes,x_colum,X_data,y_data,y_data2=data_preprocessing(
    path = './input_data/index_model.csv')

---

### Training

In [3]:
# #Train

# train(x_train,y_train,x_test,y_test,x_valid,y_valid,nb_classes,x_colum)

---

### Load Pre_Trained Model

In [4]:
#Load

new_df=load(nb_classes,x_colum,X_data,y_data,y_data2,
     path='./predict_result/result.csv')


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./model/model_0\-10000
INFO:tensorflow:Restoring parameters from ./model/model_1\-10000
INFO:tensorflow:Restoring parameters from ./model/model_2\-10000
INFO:tensorflow:Restoring parameters from ./model/model_3\-10000
INFO:tensorflow:Restoring parameters from ./model/model_4\-10000
[[0, 3, 23, 1, 4, 2]]


---

In [5]:
import numpy as np

path='./input_data/index_model.csv'

xy = np.loadtxt(path, delimiter=',', dtype=np.float32)

type(xy[0])

numpy.ndarray

In [6]:
import numpy as np

path='./input_data/index_model.csv'

xy = np.loadtxt(path, delimiter=',', dtype=np.float32)
xy2=xy[0].astype(np.int)
xy3=xy2[1:-1]

temp=''
for i in range(len(xy3)):
    temp += (str(xy3[i])+',')
temp=temp[:-1]
print(temp)

0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

In [7]:
temp2 = temp.split(",")
temp3 = np.array(temp2)
temp4 = temp3.astype(np.float32)
df = (pd.DataFrame(temp4)).T
df
# temp4.to

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
X_data = temp4[1:-1]
y_data2 = [[-1]]
y_data_temp,y_data=[],[]
y_data2

for i in range(len(y_data2)):
    temp=[]
    if y_data2[i][0] < 5: temp.append(y_data2[i][0])
    else : temp.append(5)
    y_data_temp.append(temp)

In [9]:

from sklearn.model_selection import train_test_split
import numpy as np

path='./input_data/index_model.csv'

xy = np.loadtxt(path, delimiter=',', dtype=np.float32)


X_data = xy[:, 1:-1]
y_data2 = xy[:, [-1]]
y_data_temp,y_data=[],[]

for i in range(len(y_data2)):
    temp=[]
    if y_data2[i][0] < 5: temp.append(y_data2[i][0])
    else : temp.append(5)
    y_data_temp.append(temp)

y_data = np.asarray(y_data_temp,dtype=np.float32)

x_train_data, x_test,  y_train_data, y_test = train_test_split(X_data, y_data, test_size=0.2)
x_train, x_valid,  y_train, y_valid = train_test_split(x_train_data, y_train_data)

nb_classes = len(np.unique(y_data))
x_colum = X_data.shape[1]



In [10]:
nb_classes

6

In [11]:
x_colum

1444

In [12]:
print(X_data[0])

[0. 0. 0. ... 0. 0. 0.]


In [13]:
type(y_data)

numpy.ndarray

In [14]:
type(y_data2)

numpy.ndarray

---

In [15]:
t=X_data[0].tolist()

In [16]:
y=X_data[0].astype(np.int)

In [17]:
yy=y.tolist()

In [18]:
yy


[0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [19]:
np.savetxt("./aaaaa.txt", yy, newline=" ")

In [20]:
float32

NameError: name 'float32' is not defined