In [None]:
import numpy as np
import chainer

from chainer import cuda,utils,Variable
from chainer import optimizers
from chainer import Link,Chain
import chainer.functions as F
import chainer.links as L
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
##GPU環境の設定を確認
print('GPU availability:', chainer.cuda.available)
print('cuDNN availablility:', chainer.cuda.cudnn_enabled)

In [None]:
##train,testの読み込み
x_train_r=np.load("./red_wine_x_train.npy")
y_train_r=np.load("./red_wine_y_train.npy")
x_train_w=np.load("./white_wine_x_train.npy")
y_train_w=np.load("./white_wine_y_train.npy")


x_test_r=np.load("./red_wine_x_test.npy")
y_test_r=np.load("./red_wine_y_test.npy")
x_test_w=np.load("./white_wine_x_test.npy")
y_test_w=np.load("./white_wine_y_test.npy")


##x_train,x_testを各特徴量ごとに標準化
import scipy.stats
x_train_zscore_r=scipy.stats.zscore(x_train_r)
x_test_zscore_r=scipy.stats.zscore(x_test_r)
x_train_zscore_w=scipy.stats.zscore(x_train_w)
x_test_zscore_w=scipy.stats.zscore(x_test_w)

In [None]:
##trainとtestをcupy配列に変換

import cupy as cp

x_train_r = cp.asarray(x_train_zscore_r , dtype=cp.float32)
y_train_r = cp.asarray(y_train_r, dtype=cp.float32)
x_train_w = cp.asarray(x_train_zscore_w , dtype=cp.float32)
y_train_w = cp.asarray(y_train_w, dtype=cp.float32)

x_test_r = cp.asarray(x_test_zscore_r, dtype=cp.float32)
y_test_r = cp.asarray(y_test_r, dtype=cp.float32)
x_test_w = cp.asarray(x_test_zscore_w, dtype=cp.float32)
y_test_w = cp.asarray(y_test_w, dtype=cp.float32)

In [None]:
def ndcg(y_true, y_score, k):
    '''
    評価関数としてndcgの定義→これは順位含めての評価関数
    予測結果のindexを降順で取得して、その並び順でy_trueを並び替えてndcgの計算をする
    '''
    
    y_true = y_true.ravel()
    y_score = y_score.ravel()
    y_true_sorted = sorted(y_true, reverse=True)
    ideal_dcg = 0
    for i in range(k):
        ideal_dcg += (2 ** y_true_sorted[i] - 1.) / cp.log2(i + 2)
    dcg = 0
    argsort_indices = cp.argsort(y_score)[::-1]
    for i in range(k):
        dcg += (2 ** y_true[argsort_indices[i]] - 1.) / cp.log2(i + 2)
    ndcg = dcg / ideal_dcg
    return ndcg
    

In [None]:
import math

class NN(Chain):
    def __init__(self,n_in,n_hidden1,n_hidden2):
        super(NN,self).__init__(
        l1=L.Linear(n_in,n_hidden1),
        l2=L.Linear(n_hidden1,n_hidden2),
        ##first task
        l3=L.Linear(n_hidden2,1),
        ##second task
        l4=L.Linear(n_hidden2,1)
            
        )
        
    def __call__(self,x):
        h1=F.tanh(self.l1(x))
        h2=F.tanh(self.l2(h1))
        
        ##first task ##white wine
        h3=F.tanh(self.l3(h2))
        ##second task ##red wine
        h4=F.tanh(self.l4(h2))
        
        return h3,h4

##loss   
class Ranknet1(Chain):
    def __init__(self,predictor):
        super(Ranknet1,self).__init__(
        predictor=predictor
        )
        
    def __call__(self,x_i,x_j,t_i,t_j,x_ii,x_jj,t_ii,t_jj):
        
        s_i=self.predictor(x_i)[0]
        s_j=self.predictor(x_j)[0]
        s_ii=self.predictor(x_ii)[1]
        s_jj=self.predictor(x_jj)[1]
        diff1=s_i-s_j
        diff2=s_ii-s_jj
        
        ##first loss
        sigma1=2.0
        p_ij1 = 1/(1+F.exp(-sigma1*(diff1)))
        
        if t_i.data > t_j.data:
            S=1
        if t_i.data < t_j.data:
            S=0
        if t_i.data == t_j.data:
            S=0.5
        
        self.loss1 = -S*F.log(p_ij1)-(1-S)*F.log(1-p_ij1)
        
        ##second loss
        sigma2=2.5
        p_ij2 = 1/(1+F.exp(-sigma2*(diff2)))
        
        if (t_ii.data > t_jj.data):
            T=1
        if t_ii.data < t_jj.data:
            T=0
        if t_ii.data == t_jj.data:
            T=0.5
        self.loss2 = -T*F.log(p_ij2)-(1-T)*F.log(1-p_ij2)
        
        ##all loss
        self.all_loss = self.loss1 + self.loss2

        return self.all_loss

In [None]:
n_in = 11
n_hidden1 = 9
n_hidden2 = 7

##ネットワーク作成
model = Ranknet1(NN(n_in , n_hidden1,n_hidden2))

##GPU設定
uses_device = 0
if uses_device >=0:
    cuda.get_device(0).use()
    model.to_gpu()

    
##誤差逆伝搬の設定
optimizer = optimizers.Adam()
optimizer.setup(model)

##学習方法の記入
n_iter = 100000
loss_step = 5000

train_score_accum_r = []
test_score_accum_r = []
train_score_accum_w = []
test_score_accum_w = []


for step in range(n_iter):
        
        
        ##first task
        train_length = x_train_w.shape[0]
        i,j=cp.random.uniform(low=0.0, high=train_length, size=2, dtype=float)
        i=int(i)
        j=int(j)
        x_i = Variable(x_train_w[i].reshape(1,-1))
        x_j = Variable(x_train_w[j].reshape(1,-1))
        y_i = Variable(y_train_w[i])
        y_j = Variable(y_train_w[j])
        
        ##second task
        train_length = x_train_r.shape[0]
        ii,jj=cp.random.uniform(low=0.0, high=train_length, size=2, dtype=float)
        ii=int(ii)
        jj=int(jj)
        x_ii = Variable(x_train_r[ii].reshape(1,-1))
        x_jj = Variable(x_train_r[jj].reshape(1,-1))
        y_ii = Variable(y_train_r[ii])
        y_jj = Variable(y_train_r[jj])
        
        model.cleargrads()
        loss = model(x_i, x_j, y_i, y_j,x_ii, x_jj, y_ii, y_jj)
        loss.backward()
        optimizer.update()
    

        if ((step +1) % loss_step ==0):
            
            '''
            ここでtrainとtestからランダムサンプリングしたクエリデータでNDCGを計算する
            '''
            train_score_w = model.predictor(Variable(x_train_w))[0]
            test_score_w = model.predictor(Variable(x_test_w))[0]
            train_score_r = model.predictor(Variable(x_train_r))[1]
            test_score_r = model.predictor(Variable(x_test_r))[1]
            
            ##train,testのndcgの評価数
            k=3
            l=3
            ###kとlで評価順位数を設定
            train_ndcg_r = ndcg(y_train_r , train_score_r.data , k)
            test_ndcg_r = ndcg(y_test_r , test_score_r.data , l)
            train_ndcg_w = ndcg(y_train_w , train_score_w.data , k)
            test_ndcg_w = ndcg(y_test_w , test_score_w.data , l)
            
            train_score_accum_r.append(train_ndcg_r)
            test_score_accum_r.append(test_ndcg_r)
            train_score_accum_w.append(train_ndcg_w)
            test_score_accum_w.append(test_ndcg_w)
            
    ##エポックの終了回数を表示
        if (step % 50000 == 0):
            print (step, "/", n_iter," finished")
        if(step == n_iter):
            outfile = "ranknet-" + str(step) + ".model"

latest= int(n_iter/loss_step)-1
print('mean_test_last10_redwine_ndcg@3: %f ' % np.mean(test_score_accum_r[::-1][:10]))
print('std_test_last10_redwine_ndcg@3: %f' % np.std(test_score_accum_r[::-1][:10]))
print('mean_test_last10_whitewine_ndcg@3: %f ' % np.mean(test_score_accum_w[::-1][:10]))
print('std_test_last10_whitewine_ndcg@3: %f' % np.std(test_score_accum_w[::-1][:10]))

plt.figure(figsize=(14,9))

plt.plot(train_score_accum_r,label="Train_red", linewidth=2 , linestyle="dotted")
plt.plot(test_score_accum_r,label="Test_red" ,linewidth=2)
plt.plot(train_score_accum_w,label="Train_white", linewidth=2 , linestyle="dotted")
plt.plot(test_score_accum_w,label="Test_white" ,linewidth=2)
plt.title("sigma2.0_sigma2.5_epoch100000")
plt.xlabel('loss_step')
plt.ylabel('ndcg@3')
plt.ylim(0, 1.1)
plt.legend(shadow=True, prop={'size' : 10})
plt.savefig("Result_sigma2.0_sigma2.5_multi_modify2.pdf")
plt.show()

###best<br>
##sigma:2.0 sigma2:2.5<br>
mean_test_last10_redwine_ndcg@3:0.877359<br>
mean_test_last10_whitewine_ndcg@3: 0.863796