In [None]:
import numpy as np
import pandas as pd
#pd.set_option('display.max_rows', None)
import time
from datetime import datetime
from scipy.stats import norm
from scipy.integrate import quad
from sko.PSO import PSO
from sko.GA import GA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from hyperopt import hp
from hyperopt import fmin, tpe
import xgboost as xgb
from joblib import Parallel,delayed

import warnings
warnings.filterwarnings("ignore")

In [None]:
train_data=pd.read_excel('./stacking_train_data.xlsx',index_col=0)
test_data=pd.read_excel('./stacking_test_data.xlsx',index_col=0)
train_x=train_data[train_data.columns[1:]]
train_y=train_data['price']
test_x=test_data[test_data.columns[1:]]
test_y=test_data['price']

In [None]:
print(len(train_data))
print(len(test_data))
print(len(train_data)+len(test_data))

In [None]:
def my_heston_call(S,K,T,r,params):
    
    def characteristic_function(S,K,T,r,params,phi,type_j):
        
        v0,kappa, theta, sigma, rho=params
        
        if type_j==1:
            u = 0.5
            b = kappa - rho*sigma
        else: 
            u = -0.5
            b = kappa
        
        a = kappa*theta
        x = np.log(S)
        d = np.sqrt((rho*sigma*phi*1j-b)**2 - sigma**2*(2*u*phi*1j-phi**2))
        g = (b-rho*sigma*phi*1j+d)/(b-rho*sigma*phi*1j-d)
        D = r*phi*1j*T + (a/sigma**2)*((b-rho*sigma*phi*1j+d)*T - 2*np.log((1-g*np.exp(d*T))/(1-g)))
        E = ((b-rho*sigma*phi*1j+d)/sigma**2)*(1-np.exp(d*T))/(1-g*np.exp(d*T))
        
        return np.exp(D + E*v0 + 1j*phi*x)
    
    def integral_1(S,K,T,r,params,phi):
        integral_1 = (np.exp(-1*1j*phi*np.log(K))*characteristic_function(S,K,T,r,params,phi,1))/(1j*phi)    
        return integral_1.real
    
    def integral_2(S,K,T,r,params,phi):
        integral_2 = (np.exp(-1*1j*phi*np.log(K))*characteristic_function(S,K,T,r,params,phi,2))/(1j*phi)    
        return integral_2.real
    
    ifun_1 = lambda phi: integral_1(S,K,T,r,params,phi)
    p1 = 0.5 + (1 / np.pi) * quad(ifun_1, 0, 100)[0]
    ifun_2 = lambda phi: integral_2(S,K,T,r,params,phi)
    p2 = 0.5 + (1 / np.pi) * quad(ifun_2, 0, 100)[0]
    
    return S* p1 - np.exp(-r * T) * K * p2


def my_func_train(params):
    heston_train=[]
    for i in range(len(train_x)):
        S=train_x['S'].iloc[i]
        K=train_x['K'].iloc[i]
        T=train_x['T'].iloc[i]
        r=train_x['r'].iloc[i]
        result=my_heston_call(S,K,T,r,params)
        heston_train.append(result)
    heston_train=np.array(heston_train)
    se=mean_squared_error(heston_train,train_y.values)
    return se


def my_func_test(params):
    heston_test=[]
    for i in range(len(test_x)):
        S=test_x['S'].iloc[i]
        K=test_x['K'].iloc[i]
        T=test_x['T'].iloc[i]
        r=test_x['r'].iloc[i]
        result=my_heston_call(S,K,T,r,params)
        heston_test.append(result)
    heston_test=np.array(heston_test)
    se=mean_squared_error(heston_test,test_y.values)
    return se

In [None]:
space=[hp.uniform('v0', 0, 2), hp.uniform('kappa', 0, 20),hp.uniform('theta', 0, 2),hp.uniform('sigma', 0, 2),hp.uniform('rho', 0, 1)]

def heston_data(data,params):
    hestondata=[]
    for i in range(len(data)):
        S=data['S'].iloc[i]
        K=data['K'].iloc[i]
        T=data['T'].iloc[i]
        r=data['r'].iloc[i]
        result=my_heston_call(S,K,T,r,params) 
        hestondata.append(result)
    hestondata=np.array(hestondata)
    return hestondata

# up : useless params
def heston_model(up):
    current_round_data=train_data.sample(frac=0.8,replace=True,axis=0)
    current_train_x=current_round_data[current_round_data.columns[1:]]
    current_train_y=current_round_data['price']
    def my_func_current_train(params):
        heston_train=[]
        for i in range(len(current_train_x)):
            S=current_train_x['S'].iloc[i]
            K=current_train_x['K'].iloc[i]
            T=current_train_x['T'].iloc[i]
            r=current_train_x['r'].iloc[i]
            result=my_heston_call(S,K,T,r,params)
            heston_train.append(result)
        heston_train=np.array(heston_train)
        se=mean_squared_error(heston_train,current_train_y.values)
        return se
    best = fmin(my_func_current_train, space, algo=tpe.suggest, max_evals=100)
    return best

def xgb_model(params_list):
    all_data=pd.concat([train_data,test_data])
    for i in range(len(params_list)):
        if i ==0:
            sh_data=pd.DataFrame(heston_data(all_data,params_list.iloc[i].values))
            sh_data.columns=['params'+str(i)]
        else:
            new_sh_data=pd.DataFrame(heston_data(all_data,params_list.iloc[i].values))
            new_sh_data.columns=['params'+str(i)]
            sh_data=pd.merge(sh_data,new_sh_data,left_index=True, right_index=True)
    sh_data['price']=all_data['price'].values
    xgb_train_x=sh_data.iloc[:1919,:10]
    xgb_test_x=sh_data.iloc[1919:,:10]
    xgb_train_y=sh_data.iloc[:1919,10]
    xgb_test_y=sh_data.iloc[1919:,10]

    xgb_model=xgb.XGBRegressor()
    xgb_model.fit(xgb_train_x,xgb_train_y)
    
    return mean_squared_error(xgb_model.predict(xgb_train_x),xgb_train_y),mean_squared_error(xgb_model.predict(xgb_test_x),xgb_test_y)

In [None]:
time_list=[]
test_se=[]

In [None]:
###注意更改
for j in range(5):
    print('model:%s'%j)
    start=time.time()
    params_list=Parallel(n_jobs=-1)(delayed(heston_model)(i) for i in range(10))
    params_list=pd.DataFrame(params_list)[['v0','kappa','theta','sigma','rho']]
    train_error,test_error=xgb_model(params_list)
    end=time.time()
    time_list.append((end-start)/3600)
    test_se.append(test_error)

In [None]:
sbo_result=pd.DataFrame(time_list,columns=['time'])
sbo_result['test se']=test_se