In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2022/10/09 16:19
# @Author  : Wang Yujia
# @File    : SA_for_PT_model.ipynb
# @Description : Parameter estimation for PT_model using Simulated Annealing

# 0. What for

# 1. Preparations
1. infer参数一是需要data，二是需要把p表示出来才能写出来loss func
2. data来自`data_selected_path`

## 1.1 全局设置
1. 除了表示uniq auction的features，还引入了
    - 'cnt_uniq':表示paper里的Loss function公式里的A

In [66]:
data_selected_path = "../data/info_asymm/datawithnp_asc_symmetry_2_selected.csv"

# for PT
alpha = 1
delta = 1
labda = 2.25

# GT需要的features
features_GT = ['product_id','bidincrement','bidfee','retail']
features_GT_infer = ['cnt_uniq']

from sko.SA import SA
import numpy as np
import pandas as pd
import sympy
from functools import reduce
import seaborn as sns
from tqdm.notebook import tqdm

## 1.2 data 读取
1. 读取data以做SA
2. 提取出来`data_key`，以及其他计算需要的features

In [46]:
data = pd.read_csv(data_selected_path, encoding="utf-8")
data_key = data[features_GT].copy()
data_key.drop_duplicates(inplace=True)

b = np.array(data.bidfee)          # bid fee (cent to dollar)
d = np.array(data.bidincrement)    # bid increment (cent to dollar)
v = np.array(data.retail)               # valuation
# 需要计算`N_uniq_auction`组setting下的结果
N_uniq_auction= data_key.shape[0]

print("For PT model, there are *{}* settings waiting to be inferred.".format(N_uniq_auction))

For PT model, there are *1303* settings waiting to be inferred.


## 1.3 functions about 'key'



In [62]:
# get key from i in 'data_key'
def get_key_from_index(i):
   key_i = data_key.iloc[i,:]
   return key_i

#features_GT = ['product_id','bidincrement','bidfee','retail']
def select_data_fromkey(key_i_str):
    #idx =
    return data[(data['product_id'] == key_i_str[0]) & (data['bidincrement'] == key_i_str[1]) & (data['bidfee'] == key_i_str[2]) & (data['retail'] == key_i_str[3])].copy()

# 2. PT model
## 2.1 prob. weighting func
1. 根据Eq(5)

In [61]:
def OMEGA(p,delta):
    tmp = p**delta
    return tmp*((tmp + (1-p)**delta)**(-1/delta))


## 2.2 C_{t-1}
1. 根据5.1.2

In [60]:
def C(t,b):
    return 0.2*t*b

## 2.3 value functions
1. 根据Eq(7)-(9)
2. 注意这里把(-labda)(1-sympy.E**(alpha*x))/alpha的`labda`拿到外面去了，方便写

In [59]:
def valuation(x,alpha):
    if x>=0:
        return (1-sympy.E**(-alpha*x))/alpha
    else :
        # shoule be : (-labda)(1-sympy.E**(alpha*x))/alpha
        return (1-sympy.E**(alpha*x))/alpha

# The value of placing a bid in period t
# It's a winning bid, the auction stops
def v_bid_win(t,v,d,b,alpha):
        return valuation((v-d*t-C(t-1,b)-b),alpha)

# The value of placing a bid in period t
# It's not a winning bid, the auction continues
def v_bid_notwin(t,b,alpha):
    return valuation((-C(t-1,b)-b),alpha)

# The value of not bidding in period t/ forgone bidding fees
def v_notbid(t,b,alpha):
    return valuation(-C(t-1,b),alpha)


## 2.4 Equi. condition
1. 根据Eq(6)
2. 注意分辨怎么代入上面的公式

In [80]:
def f_Equi(t,v,d,b,alpha,labda,delta):
    u = sympy.Symbol('u')

    func_1 = (labda*valuation(-C(t-1,b),alpha) - labda*OMEGA(u,delta)*valuation((-C(t-1,b)-b),alpha) + OMEGA(1-u,delta)*valuation((v-d*t-C(t-1,b)-b),alpha))
    func_2 = (-valuation(-C(t-1,b),alpha) + OMEGA(u,delta)*valuation((-C(t-1,b)-b),alpha) + (1-OMEGA(u,delta))*valuation(-(v-d*t-C(t-1,b)-b),alpha))

    tmp = v - d*t-C(t-1,b) - b

    if(tmp > 0):
        return sympy.nsolve(func_1,(0,1),solver='bisect', verify=False)
    else:
        return sympy.nsolve(func_2,(0,1),solver='bisect', verify=False)

# 3. SA
## 3.1 define loss function
1. loss function: likelihood for auctions with same `features_GT`
2.

In [None]:
def loss_func(P):
    return reduce(lambda x, y: x*y, P)

## 3.1 do SA
1. 要对每一个setting做一次infer == 对每一个setting执行一次SA。
    - 可以并行吗？YES
2. 具体的：对每个setting `i`
    - 每一个setting `i` 可以提取出来一个`data_i`，代表所有auction
    - 每一个`data_i`中的`cnt_uniq`值`A`是相同的，表示setting `i` 进行的拍卖总次数
    - `N`表示duration，因此paper公式里的$T_a$即`N[a]`
    - 因此有`A = sum(data_i['cnt_n_2'])`，其中的'cnt_n_2'表示了该行对应的`duration=N`发生的次数
    - 按照上文，求解`U[i]_t` which is a array inwith shape of (max(N)),也就是求解paper里的`p_t`
    - 求ll时，记得

In [81]:
key_i = get_key_from_index(0)
print(key_i)
data_i = select_data_fromkey(key_i)
data_i.reset_index(drop=True,inplace=True)

#### solve for U
T = data_i['N']
max_T = max(T)
min_T = min(T)
v = float(data_i['retail'].unique())
d = float(data_i['bidincrement'].unique())
b = float(data_i['bidfee'].unique())

# solve for u from Equi. condt.
U_tmp = [0]*(max_T+1)
U_tmp[0] = 1
for t in tqdm(range(1,max_T+1),desc="solve for u"):
    U_tmp[t] = f_Equi(t,v,d,b,alpha,labda,delta)

# calculate Loss with u
# There are A different auctions
A = data_i['cnt_uniq'].unique()
# # the a_th auction ends after T[a] bids
# for a in range(0,A):
#     sum_of_minlen = np.sum(np.log(U_tmp[0:min_T]))
#     ll =

product_id      10009881.00
bidincrement           0.15
bidfee                 0.75
retail               169.99
Name: 0, dtype: float64


solve for u:   0%|          | 0/831 [00:00<?, ?it/s]

KeyError: 'pop from an empty set'

In [None]:
# Perform SA respectively for all settings
for i in range(0,N_uniq_auction):
    # get i_th data_key
    key_i = get_key_from_index(i)
    # extract data with same `key_i` into a table
    data_i = select_data_fromkey(key_i)
    data_i.reset_index(drop=True,inplace=True)

    # for a certain auction(like 'data_i'), 'cnt_uniq' should be all the same
    A = data_i['cnt_uniq'].unique()
    T = data_i['N']
    assert(A == sum(data_i['cnt_n_2']),"'cnt_uniq' does not match with sum of 'cnt_n_2'!")
    max_T = max(T)

    # solve u
    # for every auction under setting `i`
    for a in range(0,A):
        T[a]