In [140]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
import joblib
import datetime

In [141]:
def get_bondPremRatio(conv_price,current_price):
    inner_value = get_conv_value(conv_price,current_price)
    return (100/inner_value-1)*100

def convert_permratio_inner(bondPremRatio):
    return 100/(bondPremRatio/100+1)

# 转股价值
def get_conv_value(conv_price,current_price):
    return 100/conv_price*current_price

In [142]:
file = '/home/xda/hub/convertible_bond/data/new_bond_v1.xlsx'
data = pd.read_excel(file,index_col=0)

rate_map = {
    'A':0,
    'A+':1,
    'AA-':2,
    'AA':3,
    'AA+':4,
    'AAA':5
}

data['rate'] = data['rate'].map(lambda x:rate_map.get(x))

industry_map = {
'化工':0,
'医药生物':    1,
'机械设备':    2,
'电子':      3,
'电气设备':   4,
'轻工制造':    5,
'汽车':      6,
'有色金属':   7,
'计算机':     8,
'建筑装饰':    9,
'公用事业':    10,
'农林牧渔':    11,
'银行':    12,
'纺织服装':    13,
'食品饮料':    14,
'国防军工':     15,
'通信':       16,
'非银金融':     17,
'交通运输':     18,
'家用电器':     19,
'建筑材料':     20,
'采掘':       21,
'钢铁':       22,
'传媒':       23,
'商业贸易':    24,
'休闲服务':  25,
}
data['industry']=data['industry'].map(lambda x:industry_map.get(x))

data['innerValue'] = data['bondPremRatio'].map(lambda x:convert_permratio_inner(x))
train_data = data.copy()
train_data.drop('secShortNameBond',inplace=True,axis=1)
train_data.drop('bondPremRatio',inplace=True,axis=1)

data_source = train_data.copy()
target = data_source['closePriceBond']
data_source.drop('closePriceBond',axis=1,inplace=True)
data_source.drop('firstDate',axis=1,inplace=True)
data_source['put_trigger_date'].fillna(0,inplace=True)
data_source['put_trigger_rate'].fillna(0,inplace=True)
data_source.drop('code',axis=1,inplace=True)

In [143]:
X_train,X_test,y_train,y_test = train_test_split(data_source,target, test_size=0.2,random_state=4)

In [146]:
def evaluate(y_test,y_test_pre):
    import sklearn.metrics as sm
    print('mean absolute error=',round(sm.mean_absolute_error(y_test,y_test_pre),2))
    print('mean squared error=',round(sm.mean_squared_error(y_test,y_test_pre),2))
    print('median absolute error=',round(sm.median_absolute_error(y_test,y_test_pre),2))
    print('explained variance score=',round(sm.explained_variance_score(y_test,y_test_pre),2))
    print('R2 score=',round(sm.r2_score(y_test,y_test_pre),2))



    errors = abs(y_test_pre - y_test)
    mape = 100 * np.mean(errors / y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))

In [155]:
param_grid = {

    'max_depth': [4,5,6,7,8,9],
    'max_features': [2,3,4,5,6,7,8,9],
    'min_samples_leaf': [2,3, 4, 5,6,7,8],
    'min_samples_split': [2,4,8, 10, 12],
    'n_estimators': [50,70,80]
}
# Create a based model
rf = RandomForestRegressor()# Instantiate the grid search model
# grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
#                           cv = 3, n_jobs = -1, verbose = 2)

random_search = RandomizedSearchCV(estimator = rf, param_distributions = param_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
random_search.fit(X_train,y_train)
print(random_search.best_params_)
y_pred_grid = random_search.predict(X_test)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
{'n_estimators': 70, 'min_samples_split': 2, 'min_samples_leaf': 6, 'max_features': 4, 'max_depth': 8}


In [148]:
evaluate(y_test,y_pred_grid)

mean absolute error= 9.51
mean squared error= 131.91
median absolute error= 8.05
explained variance score= 0.08
R2 score= 0.05
Model Performance
Average Error: 9.5080 degrees.
Accuracy = 91.38%.


In [156]:
rf_model = RandomForestRegressor(**random_search.best_params_)
rf_model.fit(X_train,y_train)
y_pred_grid = rf_model.predict(X_test)
evaluate(y_test,y_pred_grid)

mean absolute error= 9.69
mean squared error= 136.16
median absolute error= 8.51
explained variance score= 0.05
R2 score= 0.02
Model Performance
Average Error: 9.6895 degrees.
Accuracy = 91.21%.


In [149]:
today_fmt = datetime.datetime.now().strftime('%Y%m%d')
joblib.dump(rf_model, f"train_model_{today_fmt}.m")

['train_model_20210722.m']

In [112]:
date='20210722'
rf_model = joblib.load(f"train_model_{date}.m")
y_pred_grid = rf_model.predict(X_test)
evaluate(y_test,y_pred_grid)

mean absolute error= 9.56
mean squared error= 134.43
median absolute error= 8.58
explained variance score= 0.06
R2 score= 0.03
Model Performance
Average Error: 9.5600 degrees.
Accuracy = 91.33%.


In [157]:
importances = rf_model.feature_importances_
importances=list(map(lambda x:round(x*100,0),importances))
indices = np.argsort(importances)[::-1]
for f in range(X_train.shape[1]):
    print("%2d) %-*s %.2f" % (f + 1, 30, X_train.columns[indices[f]], importances[indices[f]]))

 1) innerValue                     37.00
 2) industry                       24.00
 3) remainSize                     21.00
 4) coupon                         9.00
 5) rate                           5.00
 6) put_trigger_rate               2.00
 7) put_trigger_date               1.00
 8) call_trigger_rate              0.00
 9) call_trigger_date              0.00


In [128]:
type(rf_model.feature_importances_)

numpy.ndarray

In [158]:
# rf_model = None

zg_price =8.29
convert_price =8.35

innerValue = get_conv_value(conv_price=convert_price,current_price = zg_price)
print(f'转股价值{innerValue}')
input_data = {
'call_trigger_date':30,
'call_trigger_rate':130, # 强赎
'coupon':0.2,
'industry':industry_map.get('交通运输'),
'put_trigger_date':30,
'put_trigger_rate':70, # triggerRate 回售触发票面比例（%）
'rate':rate_map.get('AAA'),
'remainSize':30.00*10**9,
'innerValue':innerValue,

}



转股价值99.2814371257485


In [159]:
date='20210722'
rf_model = joblib.load(f"train_model_{date}.m")
y_predict_real = rf_model.predict(pd.DataFrame([input_data]))
print(y_predict_real[0])

111.10628703361485


In [160]:
# 不加载模型, 用训练出来的模型
df_real = pd.DataFrame([input_data])
print(df_real)
y_predict_real = rf_model.predict(df_real)
print(y_predict_real[0])

   call_trigger_date  call_trigger_rate  coupon  industry  put_trigger_date  \
0                 30                130     0.2        18                30   

   put_trigger_rate  rate    remainSize  innerValue  
0                70     5  3.000000e+10   99.281437  
111.10628703361485


In [99]:
df_real

Unnamed: 0,call_trigger_date,call_trigger_rate,coupon,industry,put_trigger_date,put_trigger_rate,rate,remainSize,innerValue
0,30,130,0.2,18,30,70,0,30000000000.0,99.281437


In [79]:
X_test.iloc[0]

call_trigger_date     30.000000
call_trigger_rate    130.000000
coupon                 0.400000
industry               2.000000
put_trigger_date      30.000000
put_trigger_rate      70.000000
rate                   3.000000
remainSize            12.000000
innerValue            96.185846
Name: 221, dtype: float64