In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2022/11/07 16:36
# @Author  : Wang Yujia
# @File    : PT_gen_oneforall.ipynb
# @Description : calculating U&P with inferred param

# 0. What for
1. 用infer的结果来算U和P
2. 使用的params是common parameter
3. 设置threshold = K = 300
    - T < 300的padding（补零）
    - T > 300的求和到最后一个值

# 1. Preparations
## 1.1 全局设置

In [2]:
# Small dataset
data_small_np_path = r'../../data/small_auctions_np.csv'
settings_small_NN_path = r"../../data/small_settings_NN.csv"

# Large data
data_large_np_path = r'E:\DATA\large_dta\large_auctions_np.csv'
settings_large_NN_path = r'E:\DATA\large_dta\large_settings_NN.csv'

# inferred params
params_opitim_oneforall_path = "../../data/SA_PT/params_opitim_oneforall.csv"

unique_setting_NN = ['desc','bidincrement','bidfee','retail','flg_fixedprice']
LEN = 300

# output path
data_path_root = "../../data/SA_PT/results/"
filename_head = "GT_2_small_LEN="
# filename_head = "GT_2_large_LEN="
filename_tail = ".csv"

import numpy as np
import csv
import pandas as pd
from visdom import Visdom
from SA_for_PT_funcs_delta_eq1 import *
from tqdm import tqdm
import json


## 1.2 data 读取
1. data_key不变
2. 并且提取成功infer的参数结果`params_all`

In [15]:
data = pd.read_csv(data_small_np_path, encoding="utf-8")
# data = pd.read_csv(data_large_np_path, encoding="utf-8")
# data = pd.concat([data_small,data_large],axis=0,ignore_index=True)
# 由于columns不同，合并后data中肯定有nan，不过我们需要的columns不是nan就好

data_key = pd.read_csv(settings_small_NN_path, encoding="utf-8")
# data_key = pd.read_csv(settings_large_NN_path, encoding="utf-8")
# data_key = pd.concat([data_key_small,data_key_large],axis=0,ignore_index=True)

params_all = pd.read_csv(params_opitim_oneforall_path, encoding="utf-8")

# 有`N_uniq_auction`组setting
N_uniq_auction= data_key.shape[0]
print("For PT model, there are *{}* settings waiting to be inferred.".format(N_uniq_auction))
data_key.head()

For PT model, there are *1226* settings waiting to be inferred.


Unnamed: 0,desc,bidincrement,bidfee,retail,flg_fixedprice
0,Sony Ericsson S500i Unlocked Mysterious Green,0.15,0.75,499.99,0
1,PSP Slim & Lite Sony Piano Black,0.15,0.75,169.99,0
2,iPod Touch Apple 8GB with Software Upgrade,0.15,0.75,299.99,0
3,LG KU990 Viewty Unlocked Black,0.0,0.75,899.99,1
4,Logitech Cordless Wave Keyboard and Mouse,0.15,0.75,89.99,0


## 1.3 functions about 'key'

In [16]:
# unique_setting_NN = ['desc','bidincrement','bidfee','retail','flg_fixedprice']
def select_data_fromkey(i):
    return data[(data['desc'] == data_key.iloc[i,0]) &
                (data['bidincrement'] == data_key.iloc[i,1]) &
                (data['bidfee'] == data_key.iloc[i,2]) &
                (data['retail'] == data_key.iloc[i,3]) &
                (data['flg_fixedprice'] == data_key.iloc[i,4])].copy()

## 1.4 get alpha, lambda
1. 得到avg_loss最小值对应的alpha和lambda

In [17]:
params = params_all[min(params_all.avg_loss) == params_all.avg_loss]
alpha = np.float64(params.alpha)
delta = 1
labda = np.float64(params.labda)
print(f"The alpha = {alpha}, delta = 1, lambda = {labda}")
data_key['T'] = np.array((data_key.retail - data_key.bidfee) / data_key.bidincrement, dtype=int)

The alpha = -0.013581112, delta = 1, lambda = 3.312402533


# 2. U & P
## 2.1 generate

1. 对不同的auction settings做generate
2. generate过程: u-->p，然后把P存到dict里
3. U 由 `f_equi`得到
4. 注意P最后删去了第一个值P[0]，因此现在P[i]表示duration=i+1的概率

In [18]:
# col_names = ['bidincrement','bidfee','retail']
col_names = []
tmp = np.arange(0,LEN)
tmp_str = [str(x) for x in tmp]
col_names.extend(tmp_str)

P_df = pd.DataFrame(columns=col_names)

P_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299


In [19]:
# solve for U & P
for i in tqdm(range(data_key.shape[0])):

    data_i = select_data_fromkey(i)
    data_i.reset_index(drop=True,inplace=True)

    # Get params
    # cnt_n_2_i = data_i['cnt_n_2'].astype(int)       # Number of occurrences of different durations
    v = float(data_i['retail'].unique())
    d = float(data_i['bidincrement'].unique())
    b = float(data_i['bidfee'].unique())

    # d==0 suggests a fixed-price auction
    if d == 0:
        T = np.inf                                    # duration limitation
    else:
        T = np.floor((v-b)/d)                         # duration limitation

    # Solve for U
    U = [0] * (LEN + 2)                               # the prob. that someone offers a bid in t_th round
    U[0],U[1] = 1,1
    for t in range(1,len(U)):
        if(t < T):  # 不超过理论上限T_i，可计算
            U[t] = f_Equi(t, v, d, b, alpha, labda)
        else:
            U[t] = 0
        assert U[t]>=0, "U[t]<0! when t ={},and b = {},v = {}, d = {}".format(t,b,v,d)
        assert U[t]<=1, "U[t]>1! when t ={},and b = {},v = {}, d = {}".format(t,b,v,d)

    # Solve for P with length of LEN
    P = np.array([0.0]*(LEN+1))
    P[0] = 0.0                                            # auction duration==0的概率=0
    tmp = np.array([0.0]*(LEN+3))                         # tmp的大小不需要太精确
    tmp[0] = 1.0

    # 注意：P[i][t] = U[i][1]*U[i][2]*...*(1-U[i][t+1])
    for t in range(1,len(P)):
        tmp[t] = tmp[t-1]*U[t]                          # tmp[t]存了U从1到(t)的连乘积
        P[t] = (1-U[t+1])*tmp[t]

    # Dele the P[0]
    P = np.delete(P,[0],axis=0)
    assert len(P)==LEN,"P has wrong length (should be LEN)"

    # if np.floor((v-b)/d) > LEN:                     # 理论upper bound比较高，存在截断的情况，做归一化？
    #     scale_sum = np.sum(P)
    #     P = P/scale_sum

    # Concat with dataframe
    # pd_tmp = pd.DataFrame(data=[[d,b,v]])
    # pd_tmp = pd.concat([pd_tmp,pd.DataFrame(P).T],axis=1)
    pd_tmp = pd.DataFrame(P).T
    pd_tmp.columns = col_names
    P_df = pd.concat([P_df,pd_tmp],ignore_index=True)

    # # Plot
    # if(plot_flag& (i%10 == 0)):
    #     viz.line(p,np.arange(0,p.shape[0]),win = 'P_'+str(i),env=env_str, opts= dict(title = f'P_{i}_v={v}_b={b}_d={d}'))
print("Done")


100%|██████████| 1226/1226 [00:08<00:00, 151.36it/s]

Done





# 3. save

In [20]:
filename_P = data_path_root + filename_head + str(LEN) + filename_tail
P_df.to_csv(filename_P,header=True,index=False,encoding="utf-8")
print(filename_P)
print("DONE")

../../data/SA_PT/results/GT_2_small_LEN=300.csv
DONE


# 4. 读取json并且还原viz环境

In [11]:

viz_path = "../data/vis/P.json"
with open(viz_path,"r") as f:
    pre_data=json.load(f)

tmp = pre_data['jsons'].values()
for i in range(0,len((pre_data['jsons']).keys())):
    tmpi = list(tmp)[i]
    viz.line(
        X=np.array(tmpi['content']["data"][0]["x"]),
        Y=np.array(tmpi['content']["data"][0]["y"]),
        env = "P_oneforall",
        win = tmpi['id'],
        opts= dict(title=tmpi['title']),
    )