4 regimes - Weathers Portfolio Data Process 

original data

regime 1: high GDP, high CPI

regime 2: high GDP, low CPI

regime 3: low GDP, high CPI

regime 4: low GDP, low CPI

In [11]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [12]:
# original dataset
# regime 1: high GDP, high CPI; regime 2: high GDP, low CPI; 
# regime 3: low GDP, high CPI; regime 4: low GDP, low CPI
df0 = pd.read_csv('weathers_state.csv')
df0['state_name'] = df0['state'].map({1: 'HGHC', 2: 'HGLC', 3: 'LGHC', 4: 'LGLC'})
df0

Unnamed: 0,Date,state,state_name
0,19930201,4,LGLC
1,19930301,4,LGLC
2,19930401,1,HGHC
3,19930501,2,HGLC
4,19930601,4,LGLC
...,...,...,...
310,20181201,2,HGLC
311,20190101,2,HGLC
312,20190201,4,LGLC
313,20190301,1,HGHC


In [13]:
# to make the rate \lambda in increasing order (1/25, 1/5, 1, 5)
# and make state index as 0, 1, 2, 3 to keep the same with original synthetic data
df0['state'] = df0['state'].replace({2: 0, 4: 2})
df0

Unnamed: 0,Date,state,state_name
0,19930201,2,LGLC
1,19930301,2,LGLC
2,19930401,1,HGHC
3,19930501,0,HGLC
4,19930601,2,LGLC
...,...,...,...
310,20181201,0,HGLC
311,20190101,0,HGLC
312,20190201,2,LGLC
313,20190301,1,HGHC


In [14]:
# given + upcoming: 200905-201704, 201705-201904 (96+24=120)
df_new = df0[(df0['Date'] >= 20090501) & (df0['Date'] < 20190501)].reset_index(drop=True)
df_new

Unnamed: 0,Date,state,state_name
0,20090501,2,LGLC
1,20090601,2,LGLC
2,20090701,0,HGLC
3,20090801,1,HGHC
4,20090901,1,HGHC
...,...,...,...
115,20181201,0,HGLC
116,20190101,0,HGLC
117,20190201,2,LGLC
118,20190301,1,HGHC


In [18]:
counts = df_new['state'].value_counts().sort_index()
for state in range(4):
    print(f"State {state}: {counts.get(state, 0)} times")

State 0: 36 times
State 1: 28 times
State 2: 23 times
State 3: 33 times


In [19]:
# 前96个
counts_first96 = df_new['state'].iloc[:96].value_counts().sort_index()
print("👉 前96个状态出现次数：")
for state in range(4):
    print(f"State {state}: {counts_first96.get(state, 0)} times")

👉 前96个状态出现次数：
State 0: 29 times
State 1: 21 times
State 2: 19 times
State 3: 27 times


In [20]:
# 后24个
counts_last24 = df_new['state'].iloc[-24:].value_counts().sort_index()
print("\n👉 后24个状态出现次数：")
for state in range(4):
    print(f"State {state}: {counts_last24.get(state, 0)} times")


👉 后24个状态出现次数：
State 0: 7 times
State 1: 7 times
State 2: 4 times
State 3: 6 times


In [10]:
S = df_new['state'].to_numpy()
rng = np.random.default_rng(seed=50)
xi = np.empty(len(S))
scale_map = {0: 30, 1: 18, 2: 12, 3: 1}
# regime 0: rate 1/30, regime 1: rate 1/18, regime 2: rate 1/12, regime 3: rate 1 (increasing rate lambda)
for i, s in enumerate(S):
    xi[i] = stats.expon(scale=scale_map[s]).rvs(random_state=rng)

df_new['xi'] = xi
df_new.groupby('state')['xi'].mean()

state
0    31.887967
1    13.074849
2     9.858685
3     0.727808
Name: xi, dtype: float64

In [4]:
# given + upcoming: 200001-200712, 200801-200912 (96+24=120)
df_used = df0[(df0['Date'] >= 20000101) & (df0['Date'] < 20100101)].reset_index(drop=True)
df_used

Unnamed: 0,Date,state,state_name
0,20000101,3,LGHC
1,20000201,3,LGHC
2,20000301,1,HGHC
3,20000401,0,HGLC
4,20000501,3,LGHC
...,...,...,...
115,20090801,1,HGHC
116,20090901,1,HGHC
117,20091001,1,HGHC
118,20091101,3,LGHC


In [5]:
S = df_used['state'].to_numpy()
S

array([3, 3, 1, 0, 3, 1, 2, 0, 3, 2, 2, 2, 1, 2, 2, 3, 1, 2, 0, 0, 2, 0,
       2, 0, 0, 2, 1, 3, 2, 0, 1, 3, 0, 3, 1, 3, 3, 1, 3, 0, 0, 1, 2, 1,
       1, 0, 0, 3, 3, 0, 1, 3, 1, 3, 0, 2, 2, 1, 3, 0, 0, 3, 3, 1, 2, 0,
       3, 1, 3, 2, 0, 0, 3, 0, 0, 3, 1, 3, 2, 0, 0, 2, 1, 3, 2, 1, 3, 0,
       3, 1, 2, 0, 3, 3, 3, 0, 1, 2, 0, 2, 1, 1, 3, 2, 2, 2, 2, 2, 0, 1,
       2, 2, 2, 2, 0, 1, 1, 1, 3, 1])

In [6]:
counts = df_used['state'].value_counts().sort_index()
for state in range(4):
    print(f"State {state}: {counts.get(state, 0)} times")

State 0: 30 times
State 1: 28 times
State 2: 31 times
State 3: 31 times


In [8]:
# 前96个
counts_first96 = df_used['state'].iloc[:96].value_counts().sort_index()
print("👉 前96个状态出现次数：")
for state in range(4):
    print(f"State {state}: {counts_first96.get(state, 0)} times")

👉 前96个状态出现次数：
State 0: 27 times
State 1: 20 times
State 2: 20 times
State 3: 29 times


In [9]:
# 后24个
counts_last24 = df_used['state'].iloc[-24:].value_counts().sort_index()
print("\n👉 后24个状态出现次数：")
for state in range(4):
    print(f"State {state}: {counts_last24.get(state, 0)} times")


👉 后24个状态出现次数：
State 0: 3 times
State 1: 8 times
State 2: 11 times
State 3: 2 times


In [6]:
# 设定随机种子，确保结果可重复
rng = np.random.default_rng(seed=40)

# 初始化 xi 数组
xi = np.empty(len(S))

scale_map = {0: 27, 1: 9, 2: 3, 3: 1}

# 遍历生成 xi
# regime 0: rate 1/25, regime 1: rate 1/5, regime 2: rate 1, regime 3: rate 5 (increasing rate lambda)
for i, s in enumerate(S):
    xi[i] = stats.expon(scale=scale_map[s]).rvs(random_state=rng)

In [7]:
df_used['xi'] = xi
df_used

Unnamed: 0,Date,state,state_name,xi
0,20000101,3,LGHC,1.044443
1,20000201,3,LGHC,1.081783
2,20000301,1,HGHC,12.228168
3,20000401,0,HGLC,3.049605
4,20000501,3,LGHC,3.577926
...,...,...,...,...
115,20090801,1,HGHC,3.642819
116,20090901,1,HGHC,37.734735
117,20091001,1,HGHC,6.669465
118,20091101,3,LGHC,1.127751


In [8]:
df_used.head(50)

Unnamed: 0,Date,state,state_name,xi
0,20000101,3,LGHC,1.044443
1,20000201,3,LGHC,1.081783
2,20000301,1,HGHC,12.228168
3,20000401,0,HGLC,3.049605
4,20000501,3,LGHC,3.577926
5,20000601,1,HGHC,57.62153
6,20000701,2,LGLC,5.207583
7,20000801,0,HGLC,10.934506
8,20000901,3,LGHC,0.414132
9,20001001,2,LGLC,4.766138


In [9]:
df_used.groupby('state')['xi'].mean()

state
0    27.699707
1    10.426432
2     2.996756
3     0.933353
Name: xi, dtype: float64

In [18]:
# demand: decreasing order
# rate: increasing order
# satisfy the demand