In [None]:
import numpy as np
import chainer

from chainer import cuda,Function,report,training,utils,Variable
from chainer import datasets,iterators,optimizers,serializers
from chainer import Link,Chain,ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
##GPU環境の設定を確認
print('GPU availability:', chainer.cuda.available)
print('cuDNN availablility:', chainer.cuda.cudnn_enabled)

In [None]:
ls

In [None]:
##train,testの読み込み
x_train=np.load("./white_wine_x_train.npy")
y_train=np.load("./white_wine_y_train.npy")

x_test=np.load("./white_wine_x_test.npy")
y_test=np.load("./white_wine_y_test.npy")

##x_train,x_testを各特徴量ごとに標準化
import scipy.stats
x_train_zscore=scipy.stats.zscore(x_train)
x_test_zscore=scipy.stats.zscore(x_test)

In [None]:
x_train_zscore.shape

In [None]:
##trainとtestをcupy配列に変換

import cupy as cp

x_train = cp.asarray(x_train_zscore , dtype=cp.float32)
y_train = cp.asarray(y_train, dtype=cp.float32)

x_test = cp.asarray(x_test_zscore, dtype=cp.float32)
y_test = cp.asarray(y_test, dtype=cp.float32)

In [None]:
###評価関数としてndcgの定義→これは順位含めての評価関数
###予測結果のindexを降順で取得して、その並び順でy_trueを並び替えてndcgの計算をする

def ndcg(y_true, y_score, k):
    y_true = y_true.ravel()
    y_score = y_score.ravel()
    y_true_sorted = sorted(y_true, reverse=True)
    ideal_dcg = 0
    for i in range(k):
        ideal_dcg += (2 ** y_true_sorted[i] - 1.) / cp.log2(i + 2)
    dcg = 0
    argsort_indices = cp.argsort(y_score)[::-1]
    for i in range(k):
        dcg += (2 ** y_true[argsort_indices[i]] - 1.) / cp.log2(i + 2)
    ndcg = dcg / ideal_dcg
    return ndcg
    

In [None]:
import math

class NN(Chain):
    def __init__(self,n_in,n_hidden1,n_hidden2):
        super(NN,self).__init__(
        l1=L.Linear(n_in,n_hidden1),
        l2=L.Linear(n_hidden1,n_hidden2),
        ##first task only
        l3=L.Linear(n_hidden2,1)
            
        )
        
    def __call__(self,x):
        h1=F.tanh(self.l1(x))
        h2=F.tanh(self.l2(h1))
        
        ##first task only
        h3=F.tanh(self.l3(h2))
        
        return h3

##loss   
class Ranknet1(Chain):
    def __init__(self,predictor):
        super(Ranknet1,self).__init__(
        predictor=predictor
        )
        
    def __call__(self,x_i,x_j,t_i,t_j):
        s_i=self.predictor(x_i)
        s_j=self.predictor(x_j)
        diff=s_i-s_j


        if t_i.data > t_j.data:
            S=1
        if t_i.data < t_j.data:
            S=0
        if t_i.data == t_j.data:
            S=0.5
            
        ##sigmoidの傾きの設定
        sigma=1.5
        p_ij = 1/(1+F.exp(-sigma*(diff)))
        self.loss = -S*F.log(p_ij)-(1-S)*F.log(1-p_ij)

        return self.loss

In [None]:
n_in = 11
n_hidden1 = 9
n_hidden2 = 7

##ネットワーク作成

model = Ranknet1(NN(n_in , n_hidden1,n_hidden2))

##GPU設定

uses_device = 0
if uses_device >=0:
    cuda.get_device(0).use()
    model.to_gpu()

    
##誤差逆伝搬の設定
optimizer = optimizers.Adam()
optimizer.setup(model)

##学習方法の記入
n_iter = 100000
loss_step = 5000

train_score_accum = []
test_score_accum = []


for step in range(n_iter):
        
        train_length = x_train.shape[0]
        i,j=cp.random.uniform(low=0.0, high=train_length, size=2, dtype=float)
        i=int(i)
        j=int(j)
        x_i = Variable(x_train[i].reshape(1,-1))
        x_j = Variable(x_train[j].reshape(1,-1))
        y_i = Variable(y_train[i])
        y_j = Variable(y_train[j])
        
        model.cleargrads()
        loss = model(x_i, x_j, y_i, y_j)
        loss.backward()
        optimizer.update()
    

        if ((step +1) % loss_step ==0):
            
            ###ここでtrainとtestからランダムサンプリングしたクエリデータで
            ###誤差を計算する
            ###それをNDCGで評価する
            
            ##train,testのndcgのk
            k=3
            l=3
            
            ####ndcgの計算
            train_score = model.predictor(Variable(x_train))
            test_score = model.predictor(Variable(x_test))
            
            ###kとlで評価順位の最大値を設定
            train_ndcg = ndcg(y_train , train_score.data , k)
            test_ndcg = ndcg(y_test , test_score.data , l)
            
            train_score_accum.append(train_ndcg)
            test_score_accum.append(test_ndcg)
            
            
    ##エポックの終了回数を表示
        if (step % 50000 == 0):
            print (step, "/", n_iter," finished")
        if(step == n_iter):
            outfile = "ranknet-" + str(step) + ".model"

latest= int(n_iter/loss_step)-1
print('mean_train_ndcg@3: %f '% np.mean(train_score_accum))
print('mean_test_ndcg@3: %f '% np.mean(test_score_accum))
print('mean_test_last10_ndcg@3: %f ' % np.mean(test_score_accum[::-1][:10]))
print('std_test_last10_ndcg@3: %f' % np.std(test_score_accum[::-1][:10]))
print('latest_train_ndcg@3: %f '% train_score_accum[latest])
print('latest_test_ndcg@3: %f '% test_score_accum[latest])


plt.figure(figsize=(14,9))

plt.plot(train_score_accum,label="Train", linewidth=2)
plt.plot(test_score_accum,label="Test" ,linewidth=2)
plt.title("sigma1.5_whitewine_epoch100000")
plt.xlabel('loss_step')
plt.ylabel('ndcg@3')
plt.ylim(0, 1.1)
plt.legend(shadow=True, prop={'size' : 10})
plt.savefig("Result_sigma1.5_only_whitewine.pdf")
plt.show()

###sigma:1.5
###white wine <br>
mean_test_last10_ndcg@3: 0.834430<br>
###red wine<br>
mean_test_last10_ndcg@3:0.830437