# 0. Intro
1. 试图整合fixed和ascending两种拍卖
2. 直接输出LEN=300的data

# 1. Global settings
## 1.1 data path

In [34]:
# Small dataset
# data_np_path = r'../../data/small_auctions_np.csv'
# settings_NN_path = r"../../data/small_settings_NN.csv"

# Large data
data_np_path = r'E:\DATA\large_dta\large_auctions_np.csv'                 #
settings_NN_path = r'E:\DATA\large_dta\large_settings_NN.csv'

# output path
data_path_root = "../../data/info_asymm/results/"
# target data is from method-2
filename_head = "GT_1_large_LEN="
filename_tail = ".csv"

# 衡量一场auction是否unique的标志
unique_setting_GT = ['bidincrement','bidfee','retail','flg_fixedprice']
unique_setting_NN = ['desc','bidincrement','bidfee','retail','flg_fixedprice']

# threshold
LEN=300

import numpy as np
#import cupy as np
import pandas as pd
import csv
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from visdom import Visdom

## 1.2 read

In [30]:
data = pd.read_csv(data_np_path, encoding="utf-8")
data_key = pd.read_csv(settings_NN_path, encoding="utf-8")

print("For GT-1, there are *{}* settings waiting to be inferred.".format(data_key.shape[0]))

For GT-1, there are *80* settings waiting to be inferred.


# 2. U&P
1. There is $n \leq T_i $
2. `U[i][0]`初始化为1，方便后续`P`的计算
3. `U[i][j]`表示在setting i下：
> The probability that somebody makes the jth bid (given that j − 1 previous bids have been made)
4. `P`作为一个**dict**，它的key是`features_GT`,每一个key对应一个大小为(T+1)的list.
5. 由于threshold的存在，`P[key_i]`的大小设置为`K+1`，其中`p[key_i][K]`记录的是sum(P[i]) when i > K
    - 如果threshold> T_i，则用0去padding

In [31]:
# col_names = ['bidincrement','bidfee','retail']
col_names = []
tmp = np.arange(0,LEN)
tmp_str = [str(x) for x in tmp]
col_names.extend(tmp_str)

P_df = pd.DataFrame(columns=col_names)

P_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299


In [32]:
# for every uniq setting i
for i in tqdm(range(0,data_key.shape[0])):

    # Get params
    v = float(data_key.loc[i,'retail'].item())            # retail price = valuation
    d = float(data_key.loc[i,'bidincrement'].item())      # bid increment
    b = float(data_key.loc[i,'bidfee'].item())            # bid fee

    # d==0 suggests a fixed-price auction
    if d == 0:
        T_i = np.inf                                    # duration limitation
    else:
        T_i = np.floor((v-b)/d)                         # duration limitation

    # Solve for U with length of LEN
    U = [0] * (LEN + 2)                                 # the prob. that someone offers a bid in t_th round
    U[0],U[1] = 1,1                                     # 实际上u[0]用不到,u[1]=1保证auction至少1轮
    for t in range(2,len(U)):
        if(t<T_i):  # 如果不超过理论上限T_i，可计算
            U[t] = 1.0-(b/(v-d*(t-1)))
        else:       # 超过理论上限T_i，不可计算，置为0
            U[t] = 0.0
        assert U[t]>=0, "U[t]<0 when t ={},and b = {},v = {}, d = {}".format(t,b,v,d)

    # Solve for P with length of LEN
    P = np.array([0.0]*(LEN+1))
    P[0] = 0.0                                            # auction duration==0的概率=0
    tmp = np.array([0.0]*(LEN+3))                         # tmp的大小不需要太精确
    tmp[0] = 1.0

    # 注意：P[i][t] = U[i][1]*U[i][2]*...*(1-U[i][t+1])
    for t in range(1,len(P)):
        tmp[t] = tmp[t-1]*U[t]                          # tmp[t]存了U从1到(t)的连乘积
        P[t] = (1-U[t+1])*tmp[t]

    # Dele the P[0]
    P = np.delete(P,[0],axis=0)
    assert len(P)==LEN,"P has wrong length (should be LEN)"

    # if np.floor((v-b)/d) > LEN:                     # 理论upper bound比较高，存在截断的情况，做归一化？
    #     scale_sum = np.sum(P)
    #     P = P/scale_sum

    # Concat with dataframe
    # pd_tmp = pd.DataFrame(data=[[d,b,v]])
    # pd_tmp = pd.concat([pd_tmp,pd.DataFrame(P).T],axis=1)
    pd_tmp = pd.DataFrame(P).T
    pd_tmp.columns = col_names
    P_df = pd.concat([P_df,pd_tmp],ignore_index=True)

    # draw
    #
    # if(plot_flag& (i%10 == 0)):
    #     viz.line(p,np.arange(0,p.shape[0]),win = 'P_'+str(i),env=env_str, opts= dict(title = f'P_{i}_v={v}_b={b}_d={d}'))

print("Done")

  0%|          | 0/80 [00:00<?, ?it/s]

Done


# 3. save

In [35]:
filename_P = data_path_root + filename_head + str(LEN) + filename_tail
P_df.to_csv(filename_P,header=True,index=False,encoding="utf-8")
print(filename_P)
print("DONE")

../../data/info_asymm/results/GT_1_large_LEN=300.csv
DONE
