### 0. 準備

In [1]:
import pandas as pd
import numpy as np
us_con = pd.read_csv('../data/us_con.csv')

In [2]:
base_line = us_con.sort_values(by=['query_id', 'product_id'], ascending=False)
base_line.head()

Unnamed: 0.1,Unnamed: 0,product_id,product_title,product_description,product_bullet_point,product_brand,product_color,product_locale_x,example_id,query,query_id,product_locale_y,esci_label,small_version,large_version,split
1803952,1803952,B08QMZ4TGF,Hieha Car Stereo Compatible with Apple Carplay...,<b>7 INCH UNIVERSAL DOUBLE DIN CAR STEREO/CAR ...,✔❶【Compatible with Apple Carplay & Android Aut...,Hieha,Black,us,2618561,자전거트레일러,130539,us,I,1,1,train
1199277,1199277,B085NMRZC3,Osmo - Little Genius Starter Kit for Fire Tabl...,,Holiday gift for boys and girls. OSMO IS MAGIC...,Osmo,Plus,us,2618562,자전거트레일러,130539,us,I,1,1,train
1653234,1653234,B07ZFJJZLF,"LivTee 5 pcs Auto Trim Removal Tool Kit, Inter...","5 pcs Auto Trim Removal Tool Kit, Interior Doo...",Made of super durable plastic material for lon...,LivTee,Blue,us,2618563,자전거트레일러,130539,us,E,1,1,train
1199274,1199274,B07G5VLMN1,MAXXHAUL 50025 Hitch Mounted 2-Bike Rack-100 l...,,Improved durable hitch adapter to fit standard...,MAXXHAUL,,us,2618564,자전거트레일러,130539,us,E,1,1,train
1217543,1217543,B06XTZYJL3,Nilight - ZH003 20Inch 126W Spot Flood Combo L...,,Super bright beam: High intensity 3W LED chips...,Nilight,,us,2618565,자전거트레일러,130539,us,I,1,1,train


### 1. ランキング改善

In [3]:
import numpy as np

def count_query_in_desc(row):
    query = row['query'].split()
    product_description = str(row['product_description']).split()
    count = 0

    if not product_description:
        return 0

    for word in product_description:
        if word in query:
            count += 1
    return count

query_in_desc = us_con.apply(count_query_in_desc, axis=1)


In [4]:
improved_ver = base_line
improved_ver['query_in_description'] = query_in_desc
improved_ver= improved_ver.sort_values(by=['query_id','query_in_description'], ascending=False)
improved_ver.head()

Unnamed: 0.1,Unnamed: 0,product_id,product_title,product_description,product_bullet_point,product_brand,product_color,product_locale_x,example_id,query,query_id,product_locale_y,esci_label,small_version,large_version,split,query_in_description
1803952,1803952,B08QMZ4TGF,Hieha Car Stereo Compatible with Apple Carplay...,<b>7 INCH UNIVERSAL DOUBLE DIN CAR STEREO/CAR ...,✔❶【Compatible with Apple Carplay & Android Aut...,Hieha,Black,us,2618561,자전거트레일러,130539,us,I,1,1,train,0
1199277,1199277,B085NMRZC3,Osmo - Little Genius Starter Kit for Fire Tabl...,,Holiday gift for boys and girls. OSMO IS MAGIC...,Osmo,Plus,us,2618562,자전거트레일러,130539,us,I,1,1,train,0
1653234,1653234,B07ZFJJZLF,"LivTee 5 pcs Auto Trim Removal Tool Kit, Inter...","5 pcs Auto Trim Removal Tool Kit, Interior Doo...",Made of super durable plastic material for lon...,LivTee,Blue,us,2618563,자전거트레일러,130539,us,E,1,1,train,0
1199274,1199274,B07G5VLMN1,MAXXHAUL 50025 Hitch Mounted 2-Bike Rack-100 l...,,Improved durable hitch adapter to fit standard...,MAXXHAUL,,us,2618564,자전거트레일러,130539,us,E,1,1,train,0
1217543,1217543,B06XTZYJL3,Nilight - ZH003 20Inch 126W Spot Flood Combo L...,,Super bright beam: High intensity 3W LED chips...,Nilight,,us,2618565,자전거트레일러,130539,us,I,1,1,train,0


### 2. 適合率

In [5]:
query_id_list = set(base_line['query_id'])

def precision(data, id_list):
    precision = 0
    for id in id_list:
        df = data[data['query_id'] == id].head(10) # 上位10位
        tp = (df['esci_label'] != 'I').sum()
        precision += tp / 10
    return precision / len(id_list)

print(f'ベースラインの適合率 : {precision(base_line, query_id_list)}')
print(f'改善版の適合率 : {precision(improved_ver, query_id_list)}')

ベースラインの適合率 : 0.9179207971646878
改善版の適合率 : 0.9199095998766902


### 3. MAP

In [21]:
query_id_list = set(base_line['query_id'])

def MAP(data, id_list):
    precision = 0
    for id in id_list[:10]:
        df = data[data['query_id'] == id]
        tem_precision = 0
        not_I = 0
        i = 0
        for _, x in df.iterrows():
            i += 1
            if x["esci_label"] == "I":  
                continue
            else:
                not_I += 1
                tem_precision += not_I / i

        precision += tem_precision / not_I

    return precision / 10


print(f'ベースラインの平均適合率 : {MAP(base_line, list(query_id_list))}')
print(f'改善版の平均適合率 : {MAP(improved_ver, list(query_id_list))}')


ベースラインの平均適合率 : 0.8180237644481378
改善版の平均適合率 : 0.8123426949796769


### 4. DCG

In [7]:
gain = {'E': 4, 'S': 2, 'C': 1, 'I': 0}
base_line['int_label'] = base_line['esci_label'].apply(lambda x: gain[x])
improved_ver['int_label'] = improved_ver['esci_label'].apply(lambda x: gain[x])

In [None]:
# DCGの計算
def dcg(scores, k=None, powered=False):
        if k is None:
            k = scores.shape[0]
        if not powered:
            ret = scores[0]
            for i in range(1, k):
                ret += scores[i] / np.log2(i + 1)
            return ret
        else:
            ret = 0  # 2のスコア乗をする時
            for i in range(k):
                ret += (2 ** scores[i] - 1) / np.log2(i + 2)
            return ret

def dcg_10(data, id_list):
    sum = 0
    for id in id_list:
        df = data[data['query_id'] == id]

        if len(df) < 10:
             dcg_10 = dcg(np.array(df['int_label']), k=len(df), powered=True)
        else:
             dcg_10 = dcg(np.array(df['int_label']), k=10, powered=True)
        sum += dcg_10
    return sum / len(id_list)


query_id_list = set(base_line['query_id'])
base_line_dcg = dcg_10(base_line, query_id_list)
improved_dcg = dcg_10(improved_ver, query_id_list)
print(f'ベースラインのDCG@10 : {base_line_dcg}')
print(f'改善版のDCG@10 : {improved_dcg}')

### 5. nDCG

In [None]:
def ideal_dcg(data, id_list):
    sum = 0
    for id in id_list:
        df = data[data['query_id'] == id]
        ideal_dcg = np.sort(df['int_label'])[::-1]
        if len(df) < 10:
             ideal_dcg_10 = dcg(ideal_dcg, k=len(df), powered=True)
        else:
             ideal_dcg_10 = dcg(ideal_dcg, k=10, powered=True)
        sum += ideal_dcg_10
    return sum / len(id_list)

query_id_list = set(base_line['query_id'])

base_line_ndcg = dcg_10(base_line, query_id_list) / ideal_dcg(base_line, query_id_list)
improved_ndcg = dcg_10(improved_ver, query_id_list) / ideal_dcg(improved_ver, query_id_list)

print(f'ベースラインのnDCG@10 : {base_line_ndcg}')
print(f'改善版のnDCG@10 : {improved_ndcg}')

ベースラインのnDCG@10 : 0.8288028367702119
改善版のnDCG@10 : 0.8231608010630691


### 6. 符号検定

In [None]:
def dcg(scores, k=None, powered=False):
        if k is None:
            k = scores.shape[0]
        if not powered:
            ret = scores[0]
            for i in range(1, k):
                ret += scores[i] / np.log2(i + 1)
            return ret
        else:
            ret = 0  # 2のスコア乗をする時
            for i in range(k):
                ret += (2 ** scores[i] - 1) / np.log2(i + 2)
            return ret

def dcg_10_compare(base, improved, id_list):
    result = []
    for id in id_list:
        df_base = base[base['query_id'] == id]
        df_impr = improved[improved['query_id'] == id]

        ideal_dcg = np.sort(base[base['query_id'] == id]['int_label'].values)[::-1] # 理想DCG

        k = min(len(df_base), 10)
        
        dcg_len_base = dcg(df_base['int_label'].values, k=k, powered=True)
        dcg_len_impr = dcg(df_impr['int_label'].values, k=k, powered=True)
        dcg_len_idea = dcg(ideal_dcg, k=k, powered=True)
        
        ndcg_base = dcg_len_base / dcg_len_idea
        ndcg_impr = dcg_len_impr / dcg_len_idea

        result.append(0 if ndcg_base < ndcg_impr else 1)

    return result

query_id_list = set(base_line['query_id'])
compare = dcg_10_compare(base_line, improved_ver, query_id_list)

In [None]:
# 符号検定 (scipyを使用)
from scipy import stats

base_win = np.array(compare).sum() # 改善版が勝った回数
n = len(compare)

stats.binomtest(base_win, n, 0.5, alternative='two-sided').pvalue

0.0

p < 0.05より、ベースラインと改善版のランキングが五分五分であるという仮説を棄却する

### 7. t検定

In [None]:
def dcg_avg(base, improved, id_list):
    ndcg_base = 0
    ndcg_impr = 0
    for id in id_list:
        df_base = base[base['query_id'] == id]
        df_impr = improved[improved['query_id'] == id]

        ideal_dcg = np.sort(base[base['query_id'] == id]['int_label'].values)[::-1] # 理想DCG

        k = min(len(df_base), 10)
        
        dcg_len_base = dcg(df_base['int_label'].values, k=k, powered=True)
        dcg_len_impr = dcg(df_impr['int_label'].values, k=k, powered=True)
        dcg_len_idea = dcg(ideal_dcg, k=k, powered=True)
        
        ndcg_base += dcg_len_base / dcg_len_idea
        ndcg_impr += dcg_len_impr / dcg_len_idea

    return ndcg_base / len(id_list), ndcg_impr / len(id_list)

query_id_list = set(base_line['query_id'])
avg_base, avg_impr = dcg_avg(base_line, improved_ver, query_id_list)

### 8. 多重検定

In [9]:
import numpy as np

def count_query_in_desc(row):
    query = row['query'].split()
    product_title = str(row['product_title']).split()
    count = 0

    if not product_title:
        return 0

    for word in product_title:
        if word in query:
            count += 1
    return count

query_in_title = us_con.apply(count_query_in_desc, axis=1)

In [11]:
# 再改善バージョン
improved_ver2 = base_line
improved_ver2['query_in_title'] = query_in_title
improved_ver2= improved_ver2.sort_values(by=['query_id','query_in_title'], ascending=False)
improved_ver2.head()

Unnamed: 0.1,Unnamed: 0,product_id,product_title,product_description,product_bullet_point,product_brand,product_color,product_locale_x,example_id,query,query_id,product_locale_y,esci_label,small_version,large_version,split,query_in_description,int_label,query_in_title
1803952,1803952,B08QMZ4TGF,Hieha Car Stereo Compatible with Apple Carplay...,<b>7 INCH UNIVERSAL DOUBLE DIN CAR STEREO/CAR ...,✔❶【Compatible with Apple Carplay & Android Aut...,Hieha,Black,us,2618561,자전거트레일러,130539,us,I,1,1,train,0,0,0
1199277,1199277,B085NMRZC3,Osmo - Little Genius Starter Kit for Fire Tabl...,,Holiday gift for boys and girls. OSMO IS MAGIC...,Osmo,Plus,us,2618562,자전거트레일러,130539,us,I,1,1,train,0,0,0
1653234,1653234,B07ZFJJZLF,"LivTee 5 pcs Auto Trim Removal Tool Kit, Inter...","5 pcs Auto Trim Removal Tool Kit, Interior Doo...",Made of super durable plastic material for lon...,LivTee,Blue,us,2618563,자전거트레일러,130539,us,E,1,1,train,0,4,0
1199274,1199274,B07G5VLMN1,MAXXHAUL 50025 Hitch Mounted 2-Bike Rack-100 l...,,Improved durable hitch adapter to fit standard...,MAXXHAUL,,us,2618564,자전거트레일러,130539,us,E,1,1,train,0,4,0
1217543,1217543,B06XTZYJL3,Nilight - ZH003 20Inch 126W Spot Flood Combo L...,,Super bright beam: High intensity 3W LED chips...,Nilight,,us,2618565,자전거트레일러,130539,us,I,1,1,train,0,0,0


In [12]:
def dcg(scores, k=None, powered=False):
        if k is None:
            k = scores.shape[0]
        if not powered:
            ret = scores[0]
            for i in range(1, k):
                ret += scores[i] / np.log2(i + 1)
            return ret
        else:
            ret = 0  # 2のスコア乗をする時
            for i in range(k):
                ret += (2 ** scores[i] - 1) / np.log2(i + 2)
            return ret

def dcg_10(data, id_list):
    result = []
    for id in id_list:
        df = data[data['query_id'] == id]

        if len(df) < 10:
             dcg_10 = dcg(np.array(df['int_label']), k=len(df), powered=True)
        else:
             dcg_10 = dcg(np.array(df['int_label']), k=10, powered=True)
        result.append(dcg_10)
    return result

query_id_list = set(base_line['query_id'])
# ベースライン
base_resulut = dcg_10(base_line, query_id_list)
# 改善版ver1
imp_result = dcg_10(improved_ver, query_id_list)
# 改善版ver2
imp2_result = dcg_10(improved_ver2, query_id_list)

In [20]:
from scipy.stats import ttest_rel

# ボンフェローニ補正を適用した有意水準の計算
alpha = 0.05  # もとの有意水準
num_comparisons = 3  # ランキング間の比較数
alpha_corrected = alpha / num_comparisons

baseline_ndcg_mean = sum(base_resulut)/ len(base_resulut)
imp_ndcg_mean = sum(imp_result)/ len(imp_result)
imp2_ndcg_mean = sum(imp2_result)/ len(imp2_result)

x = [ i-baseline_ndcg_mean for i in base_resulut]
y = [ i-imp_ndcg_mean for i in imp_result]
z = [ i-imp2_ndcg_mean for i in imp2_result]

# 対応する t 検定を実行して差の有意性を評価
statistic_baseline_vs_improved, p_value_baseline_vs_improved = ttest_rel(base_resulut, imp_result)
statistic_improved_vs_further, p_value_improved_vs_further = ttest_rel(imp_result, imp2_result)
statistic_baseline_vs_further, p_value_baseline_vs_further = ttest_rel(base_resulut, imp2_result)

# 補正後の有意水準と比較して、差の有意性を判定
if p_value_baseline_vs_improved < alpha_corrected:
    print("ベースラインと改善版の間の差は有意です。")
else:
    print("ベースラインと改善版の間の差は有意ではありません。")

if p_value_improved_vs_further < alpha_corrected:
    print("改善版と再改善版の間の差は有意です。")
else:
    print("改善版と再改善版の間の差は有意ではありません。")

if p_value_baseline_vs_further < alpha_corrected:
    print("ベースラインと再改善版の間の差は有意です。")
else:
    print("ベースラインと再改善版の間の差は有意ではありません。")


ベースラインと改善版の間の差は有意です。
改善版と再改善版の間の差は有意です。
ベースラインと再改善版の間の差は有意です。


### 9. 正規性の検定

In [31]:
import scipy.stats as st

_, pa = st.shapiro(imp2_result)
_, pb = st.shapiro(imp_result)

print('シャピロ・ウィルク検定')
print(f'標本A p値 = {pa:.3f}')
print(f'標本B p値 = {pb:.3f}')

シャピロ・ウィルク検定
標本A p値 = 0.000
標本B p値 = 0.000


有意水準が1%よりも小さいことから、どちらの分布も正規性がない