# 导入模块并改变工作目录

In [2]:
import pandas as pd
import numpy  as np
from sklearn import svm
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.decomposition import PCA
from prettytable import PrettyTable 
import talib
import time
% matplotlib inline

In [3]:
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *
init_notebook_mode() 

In [4]:
os.chdir('J:\Lifelong Learning\QUANT\SVM')

# 一、导入数据

定义数据导入并格式化的函数

In [5]:
def import_data(file):
    data = pd.read_csv(file, index_col=0, squeeze=True)
    data.index = pd.to_datetime(data.index)
    return data

#### 沪深300基本交易数据

In [6]:
asset = 'HS300'

In [7]:
data = import_data('%s.csv' % asset)  # 数据来源于wind

In [8]:
data.head()

Unnamed: 0_level_0,open,high,low,close,volume,value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-04,994.769,994.769,980.658,982.794,741286894,4431977000.0
2005-01-05,981.577,997.323,979.877,992.564,711910898,4529208000.0
2005-01-06,993.331,993.788,980.33,983.174,628802905,3921015000.0
2005-01-07,983.045,995.711,979.812,983.958,729869409,4737469000.0
2005-01-10,983.76,993.959,979.789,993.879,579169799,3762933000.0


In [9]:
x = data.index
y = data['close']
iplot({'data':[Scatter(x=x, y=y, mode='line', name=asset)],'layout':Layout(title=asset)},link_text='')

#### 标普指数、道琼指数、人民币兑美元汇率中间价、美元指数

In [10]:
international_data = import_data('international_data.csv')  # 数据来源于wind

In [None]:
international_data.head()

#### cpi与pmi月度数据

In [11]:
cpi = import_data('cpi.csv')
pmi = import_data('pmi.csv')  # 数据来源于国家统计局

注：cpi与pmi均是以去年同月为100计算的

In [None]:
cpi.head()

#### 货币供应量：M0，M1，M2

In [12]:
money_supply =import_data('money_supply.csv')  # 数据来源于国家统计局

数据中包含M0，M1，M2的数据以及同比增长率

In [None]:
money_supply.head()

#### 固定资产投资同比增长率，月度

In [13]:
fixed_asset_inv = import_data('fixed_asset_inv.csv')  # 数据来源于国家统计局

这里，固定资产投资为累计量，并且为了避免1月份春节给数据准确度造成影响，1/2月一起统计，所以1月份数据是缺失的。为了填补1月分数据的缺失，我们假定1/2月投资量是一样的。然后计算出每月新增固定资产投资，最后计算出每月的同比增长率。

In [14]:
Jan = fixed_asset_inv.isnull()
Feb = fixed_asset_inv.isnull().shift().fillna(False)
fixed_asset_inv[Jan] = list(fixed_asset_inv[Feb] / 2)
new_inv = fixed_asset_inv.diff()
new_inv[Jan] = list(fixed_asset_inv[Feb] / 2)
new_inv_pct = new_inv.pct_change(12).dropna()

In [None]:
new_inv_pct.head()

#### 工业增加值同比增长率，月度

这里，工业增加值也没有1月份的数据，1/2月一起统计的。由于数据本身已是同比增长率了，所以我们直接用2月份的同比增长率填充1月份的数据。

In [15]:
value_added = import_data('ind_value_added_change.csv')  # 数据来源于国家统计局
value_added.fillna(method='bfill', inplace=True)

In [None]:
value_added.head()

# 二、SVM训练并预测

#### 关于数据标准化

这里有一点需要注意，关于标准化的相关问题。在《SVM商品期货择时策略.pdf》文章中，作者提到了同一资产最高价与最低价标准化后，最低价反而比最高价大的情况（如下图）。这种情况的存在不足为奇，标准化后这两个数据谁高谁低并没有一个统一的答案，但这却反映了另外一个问题：就是各个特征标准化后，他们之间具有的大小关系（或者其他关系）被忽略掉了。就拿最高价和最低价为例，就每一个时点而言，最高价肯定不小于最低价，这种关系的变化可能是未来的价格走势的某种信号：最高价与最低价相等可能以为着未来价格震荡（瞎说的），而最高价远大于最低价，则可能意味着未来价格会上涨（也是瞎说的）。标准化后，最高价与最低价共同传达出的信号被剔除了，相当于我们舍弃掉了一个有用的信号。  
如何解决这个问题呢？ 
1. 特征之间具有关系且存在有用信号的，不对其做标准化处理。但是，如果存在特征没有进行标准化，其他特征的标准化就没有任何意义；另外，如何判断两个特征之间是否具有关系，该关系是否存在有用的信号？
2. 另一种更为可行的方法是，加入具有有用关系的特征的组合特征。比如最高价与最低价的比值，最高价与最低价的差值。但这个方法也有一个问题，就是如何构建特征之间的组合特征？

In [16]:
temp = (data - data.mean()) / data.std()
iplot({'data':[Scatter(x=temp.index, y=temp['high'], mode='line', name='high'), 
               Scatter(x=temp.index, y=temp['low'], mode='line', name='low')],
       'layout':Layout(title=asset)},link_text='')

#### 定义分析所用函数

- 滚动训练svm模型，并预测下一期涨跌

In [217]:
def SvmPredict(char_values, pol, window, z, decom, kernel_function, C, output, mode='rolling'):
    print('========'+ kernel_function + '=========', file=output)
    print('========'+ kernel_function + '=========')
    prediction = []
    pca_components = []
    for t in char_values.index[window:]:
        clf = svm.SVC(kernel=kernel_function, C=C)
        if mode == 'rolling':
            x = char_values[:t].ix[-(window+1):-1]
            y = pol[:t].ix[-(window+1):-1]
        elif mode == 'all':  # 此模式下用之前所有数据来训练模型
            x = char_values[:t].ix[:-1]
            y = pol[:t].ix[:-1]
        new_feature = char_values.ix[t]
        if z:
            x = (x - x.mean()) / x.std()
            new_feature = (new_feature - x.mean()) / x.std()
            if decom:
                pca = PCA(n_components=0.95)
                x = pca.fit_transform(x)
                pca_components.append(pca.n_components_)
                new_feature = pca.transform([list(new_feature)])
        else:
            new_feature = [list(new_feature)]
        clf.fit(x, y)
        prediction.append(clf.predict(new_feature))
    pca_components = pd.Series(pca_components, index=char_values.index[window:], name='pca_components')
    prediction = pd.DataFrame(prediction, index=char_values.index[window:], columns=['prediction'])
    return prediction, pca_components

SVM的代码做法参考：  
[scikit-learn官方文档](http://scikit-learn.org/stable/modules/svm.html)  
[sklearn.svm.SVC](http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)  
PCA的代码做法参考：  
[scikit-learn中PCA的使用方法](http://doc.okbase.net/u012162613/archive/120946.html)  
[sklearn.decomposition.PCA](http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)

- 结果输出函数

In [18]:
def table_print(df, output):
    x = PrettyTable([df.index.name] + list(df.columns))
    x.align[df.index.name] = "l"
    for i in df.index:
        x.add_row([i] + list(df.ix[i]))
    print(x, file=output)
    print(x)

- 预测胜率统计函数

In [31]:
def analysis(pol, output, prediction, kernel_function):
    prediction['real'] = pol[window:]
    bingo = prediction['prediction'] == prediction['real']
    wrong = prediction['prediction'] != prediction['real']
    
    print('预测的准确率为：' + str(bingo.sum() / prediction.shape[0]), file=output)
    print('预测的准确率为：' + str(bingo.sum() / prediction.shape[0]))
    print('上涨次数占比为：' + str(prediction['real'].value_counts()[1] / prediction.shape[0]), file=output)
    print('上涨次数占比为：' + str(prediction['real'].value_counts()[1] / prediction.shape[0]))
    
    pre_rise = prediction['prediction'] == 1
    pre_fall = prediction['prediction'] == -1
    rea_rise = prediction['real'] == 1
    rea_fall = prediction['real'] == -1
    
    a = np.logical_and(rea_rise, pre_rise).sum()
    b = np.logical_and(rea_rise, pre_fall).sum()
    c = np.logical_and(rea_fall, pre_rise).sum()
    d = np.logical_and(rea_fall, pre_fall).sum()

    summary = pd.DataFrame([[a,c],[b,d]], columns=['real_rise', 'real_fall'], index=['predict_rise', 'predict_fall'])
    summary['sum'] = summary.sum(axis=1)

    temp = pd.DataFrame(summary.sum(), columns=['sum'])
    summary = pd.concat([summary, temp.T], axis=0)
    summary.index.name = kernel_function
    ratio = summary/summary.ix['sum']
    ratio.columns = ['rise', 'fall', 'total']
    summary = pd.concat([summary, ratio], axis=1)
    table_print(summary, output)
    return summary

- 绘图函数

In [20]:
def combine_plot(close, prediction, kernel, window):
    signals = prediction['prediction'].shift(1)
    close = close[signals.index]
    close_rise = close[signals==1]
    close_rise.name = 'close_rise'
    close_fall = close[signals==-1]
    close_fall.name = 'close_fall'
    close = pd.concat([close, close_rise, close_fall], axis=1)
    x = close.index
    iplot({'data':[Scatter(x=x, y=close['close'], mode='line', name='HS300'), 
                   Scatter(x=x, y=close['close_rise'], mode='markers', name='predict to rise'), 
                   Scatter(x=x, y=close['close_fall'], mode='markers', name='predict to fall')],
           'layout':Layout(title=kernel + '  ' + str(window))},link_text='')

- 回测函数

In [203]:
def backTest(signals, open_close, start_value, fee_rate, margin):
    date = [i.strftime('%Y-%m-%d') for i in signals.index]
    position = dict()
    position[date[0]] = 0
    deal = dict()
    deal[date[0]] = 0
    Volume = dict()
    Volume[date[0]] = 0
    net_value = dict()
    net_value[date[0]] = start_value
    stop = False
    for t in date[:-1]:
        next_day = date[date.index(t) + 1]
        signal = signals.ix[t]
        if net_value[t] <= 0:
            stop = True
            stop_date = t
            stop_date1 = date[date.index(t) - 1]
            break
        if (position[t] == 0) and (signal == -1):
            position[next_day] = -1
            deal[next_day] = -1
            volume = int(net_value[t] / (open_close['open'][next_day] * (margin + fee_rate)) / 100.) * 100
            Volume[next_day] = volume
            net_value[next_day] = net_value[t] - volume * open_close['open'][next_day] * fee_rate
        elif (position[t] == 0) and (signal == 1):
            position[next_day] = 1
            deal[next_day] = 1
            volume = int(net_value[t] / (open_close['open'][next_day] * (margin + fee_rate)) / 100.) * 100
            Volume[next_day] = volume
            net_value[next_day] = net_value[t] - volume * open_close['open'][next_day] * fee_rate
        elif (position[t] == -1) and (signal == -1):
            position[next_day] = -1
            deal[next_day] = 0
            net_value[next_day] = net_value[t] + Volume[t] * position[t] * (open_close['open'][next_day] - open_close['open'][t])
            Volume[next_day] = Volume[t]
        elif (position[t] == -1) and (signal == 1):
            position[next_day] = 1
            deal[next_day] = 1
            value = (net_value[t] + 
                     Volume[t] * position[t] * (open_close['open'][next_day] - open_close['open'][t]) - 
                     Volume[t] * open_close['open'][next_day] * fee_rate)
            volume = int(value / (open_close['open'][next_day] * (margin + fee_rate)) / 100.) * 100
            Volume[next_day] = volume
            net_value[next_day] = value - volume * open_close['open'][next_day] * fee_rate
        elif (position[t] == 1) and (signal == -1):
            position[next_day] = -1
            deal[next_day] = -1
            value = (net_value[t] + 
                     Volume[t] * position[t] * (open_close['open'][next_day] - open_close['open'][t]) - 
                     Volume[t] * open_close['open'][next_day] * fee_rate)
            volume = int(value / (open_close['open'][next_day] * (margin + fee_rate)) / 100) * 100
            Volume[next_day] = volume
            net_value[next_day] = value - volume * open_close['open'][next_day] * fee_rate
        elif (position[t] == 1) and (signal == 1):
            position[next_day] =1
            deal[next_day] = 0
            net_value[next_day] = net_value[t] + Volume[t] * position[t] * (open_close['open'][next_day] - open_close['open'][t])
            Volume[next_day] = Volume[t]
    if stop:
        del position[stop_date], Volume[stop_date], net_value[stop_date]
        del position[stop_date1], Volume[stop_date1], net_value[stop_date1]
        del deal[stop_date]
        del deal[stop_date1]
    a = pd.Series(position)
    a.index = pd.to_datetime(a.index)
    b = pd.Series(deal)
    b.index = pd.to_datetime(b.index)
    c = pd.Series(Volume)
    c.index = pd.to_datetime(c.index)
    return a, b, c

- 净值计算函数

In [104]:
def cum_gain(deal, Volume, position, open_close, start_value, fee_rate, margin):
    '''
    open_close 为日度的数据
    deal, Volume, position 可以是日度、周度、月度
    需要先把非日度数据调整为日度，再进行净值计算
    '''
    date = [i.strftime('%Y-%m-%d') for i in open_close.index]
    deal = deal.shift(-1).reindex(open_close.index, fill_value=0).shift(1)
    Volume = Volume.reindex(open_close.index, method='bfill')
    position = position.reindex(open_close.index, method='bfill')
    net_value = dict()
    net_value[date[0]] = start_value
    for t in date[1:]:
        yesd = date[date.index(t) - 1]
        if deal[t] == 0:
            net_value[t] = net_value[yesd] + Volume[yesd] * position[yesd] * (open_close['open'][t] - open_close['open'][yesd])
        elif (deal[t] == 1) and (position[yesd] == 0):
            net_value[t] = net_value[yesd] - Volume[t] * open_close['open'][t] * fee_rate
        elif (deal[t] == 1) and (position[yesd] == -1):
            value = (net_value[yesd] + 
                     Volume[yesd] * position[yesd] * (open_close['open'][t] - open_close['open'][yesd]) - 
                     Volume[yesd] * open_close['open'][t] * fee_rate)
            net_value[t] = value - Volume[t] * open_close['open'][t] * fee_rate
        elif (deal[t] == -1) and (position[yesd] == 0):
            net_value[t] = net_value[yesd] - Volume[t] * open_close['open'][t] * fee_rate
        elif (deal[t] == -1) and (position[yesd] == 1):
            value = (net_value[yesd] + 
                     Volume[yesd] * position[yesd] * (open_close['open'][t] - open_close['open'][yesd]) - 
                     Volume[yesd] * open_close['open'][t] * fee_rate)
            net_value[t] = value - Volume[t] * open_close['open'][t] * fee_rate
    temp = pd.Series(net_value)
    temp.index = pd.to_datetime(temp.index)
    return temp

- 回撤函数

In [22]:
def draw_back(cum_gain, mode):
    if mode == 'A':  # 回撤数值，绝对量
        cum_max = cum_gain.cummax()
        draw_back_amount = cum_max - cum_gain
        return draw_back_amount
    elif mode == 'R':  # 回撤比例，相对值
        cum_max = cum_gain.cummax()
        draw_back_portion = (cum_max - cum_gain) / cum_max
        return draw_back_portion

- 回测结果汇总函数

In [175]:
def analysis_2(cum_gain, output):
    day_num = len(cum_gain)  # 回测时长（天）
    
    net_gain = int(cum_gain.ix[-1] - cum_gain.ix[0])  # 净利润
    annualized_return = (cum_gain.ix[-1] / cum_gain.ix[0] - 1) * 250. / float(day_num) * 100  # 年化收益率
    compound_annualized_return = ((cum_gain[-1] / cum_gain[0]) ** (250 / day_num) - 1) * 100  # 复合年化收益率
    max_draw_back = draw_back(cum_gain, mode='R').max() * 100  # 最大回撤
    annualized_std = cum_gain.pct_change().std() * 250 ** 0.5 * 100  # 年化波动率
    sharpe_ratio = float(compound_annualized_return / annualized_std)  # 夏普比率
    start_value = int(cum_gain.ix[0])  # 期初权益
    end_value = int(cum_gain.ix[-1])  # 期末权益
    start_time = cum_gain.index[0].strftime('%Y-%m-%d')  # 起始时间
    end_time = cum_gain.index[-1].strftime('%Y-%m-%d')  # 终止时间
    return_analysis = pd.DataFrame([net_gain,'%d%s' % (int(compound_annualized_return), '%'),'%d%s' %(int(max_draw_back),'%'),
                                    '%d%s'%(int(annualized_std),'%'),round(sharpe_ratio,2),start_value,end_value,
                                    start_time,end_time,'%d天'%day_num])
    return_analysis.index = ['净利润','复合年化收益率','最大回撤','波动率','夏普比率','期初权益','期末权益',
                             '起始时间','终止时间','回测时长']
    return_analysis.columns = ['收益率分析']
    table_print(return_analysis.T, output)
    return return_analysis.T

# 尝试一

#### 计算相关特征值

In [230]:
try_id = '尝试一'
cycle = 'D'

In [231]:
feature_set = 'group3'
predict_window = 'W'

In [232]:
if (feature_set == 'group1') or (feature_set == 'group2') or (feature_set == 'group3'):
    feature = data.copy(deep=True)
    # 基本指标

if (feature_set == 'group2') or (feature_set == 'group3'):
    feature['returns'] = feature['close'].pct_change()
    feature['5returns'] = feature['close'].pct_change(5)
    feature['15returns'] = feature['close'].pct_change(15)
    feature['ma1'] = feature['close'].rolling(window=10).mean()
    feature['ma2'] = feature['close'].rolling(window=20).mean()
    feature['var'] = feature['close'].rolling(window=20).std()
    feature['max_high'] = feature['high'].rolling(window=10).max()
    feature['min_low'] = feature['low'].rolling(window=10).min()
    feature['ma_volume'] = feature['volume'].rolling(window=10).mean()
    feature['ma_value'] = feature['value'].rolling(window=10).mean()
    # 衍生指标
    
if feature_set == 'group3':
    feature['rVol'] = feature['volume'] / feature['ma_volume']
    feature['rVal'] = feature['value'] / feature['ma_value']
    feature['rClose'] = feature['close'] / feature['ma1']
    feature['rHigh'] = feature['high'] / feature['ma1']
    feature['rLow'] = feature['low'] / feature['ma1']
    
    a, b, c = talib.MACD(np.array(feature['close']))
    feature['MACD'] = pd.Series(a, index=feature.index)
    feature['MACDsignal'] = pd.Series(b, index=feature.index)
    feature['MACDhist'] = pd.Series(c, index=feature.index)

    sar = talib.SAR(np.array(feature['high']), np.array(feature['low']))
    feature['SAR'] = pd.Series(sar, index=feature.index)

    mom = talib.MOM(np.array(feature['close']))
    feature['MOM'] = pd.Series(mom, index=feature.index)

    rsi = talib.MOM(np.array(feature['close']))
    feature['RSI'] = pd.Series(rsi, index=feature.index)

    x, y, z = talib.BBANDS(np.array(feature['close']))
    feature['BBdist'] = (pd.Series(x, index=feature.index) - feature['close']) / feature['var']
    
    # 技术指标
if predict_window == 'D':
    feature['follow_return'] = feature['close'].pct_change().shift(-1)
elif predict_window == 'W':
    feature['follow_return'] = feature['close'].pct_change(5).shift(-5)

feature.dropna(axis=0, how='any', inplace=True)
pol = feature['follow_return'].copy(deep=True)
pol[pol > 0] = 1
pol[pol <= 0] = -1
del feature['follow_return']

In [233]:
feature.head()

Unnamed: 0_level_0,open,high,low,close,volume,value,returns,5returns,15returns,ma1,...,rClose,rHigh,rLow,MACD,MACDsignal,MACDhist,SAR,MOM,RSI,BBdist
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-03-01,1039.351,1042.737,1031.168,1035.934,996209016,6288791000.0,-0.003899,-0.010327,0.068845,1033.471,...,1.002383,1.008966,0.997772,15.990289,11.630691,4.359598,1023.260732,19.076,19.076,0.479483
2005-03-02,1036.368,1045.76,1021.004,1021.32,1422513553,8971308000.0,-0.014107,-0.021672,0.069581,1033.2446,...,0.988459,1.012113,0.988153,14.409325,12.186417,2.222908,1059.483,-2.264,-2.264,1.161834
2005-03-03,1019.876,1028.402,1014.752,1027.71,920308564,5582812000.0,0.006257,-0.016978,0.075066,1033.955,...,0.99396,0.994629,0.981428,13.516213,12.452376,1.063836,1058.71342,7.104,7.104,0.816764
2005-03-04,1027.927,1031.847,1022.176,1023.667,817364167,5383868000.0,-0.003934,-0.022062,0.016639,1035.7162,...,0.988366,0.996264,0.986927,12.339932,12.429888,-0.089956,1056.954963,17.612,17.612,0.684946
2005-03-07,1024.48,1031.344,1024.48,1029.871,793294979,5316473000.0,0.006061,-0.009729,0.036906,1036.14,...,0.99395,0.995371,0.988747,11.772624,12.298435,-0.525811,1055.266845,4.238,4.238,0.281951


In [234]:
close = data['close']

#### 滚动训练预测

In [235]:
start_value = 1000000
fee_rate = 0.001
margin = 1

In [236]:
z = True  # 是否标准化特征数据
decom = True  # 是否进行pca降维
window_list = [60, 120]  # 设定训练数据时间长度（周或者日）
kernel_list = ['rbf', 'linear', 'sigmoid']  # 核函数
C = 10  # [1, 5, 10]

In [237]:
output = open('分析对象%s_%s_数据频率%s_预测长度%s_标准化%s_PCA%s.txt' % (asset,try_id, cycle,predict_window, str(z), str(decom)), 'w+')
for window in window_list:
    print('\n训练数据长度：%d' % window, file=output)
    print('\n训练数据长度：%d' % window)
    for f in kernel_list:
        prediction, pca_components = SvmPredict(feature, pol, window, z, decom, f, C, output)
        iplot({'data':[Scatter(x=pca_components.index, y=pca_components, mode='line', name='PCA 95% 特征数量')],
               'layout': Layout(title='PCA 95% 主成分数量')})
        signals = prediction['prediction'].copy(deep=True)
        _ = analysis(pol, output, prediction, f)
        combine_plot(close, prediction, f, window)
        open_close = feature.ix[signals.index][['open', 'close']]
        position, deal, Volume = backTest(signals, open_close, start_value, fee_rate, margin)
        open_close = data[deal.index[0]:deal.index[-1]][['open', 'close']]
        net_value = cum_gain(deal, Volume, position, open_close, start_value, fee_rate, margin)
        _ = analysis_2(net_value, output)
        iplot({'data':[Scatter(x=net_value.index, y=net_value, mode='line', name='cum_gain')],
               'layout':Layout(title='累积净值 \n' + f + ' ' + str(window))},link_text='')
output.close()


训练数据长度：60


预测的准确率为：0.571278825996
上涨次数占比为：0.556953179595
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| rbf          | real_rise | real_fall |  sum   |      rise      |      fall      |     total      |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| predict_rise |   1006.0  |   639.0   | 1645.0 | 0.631116687578 | 0.503943217666 | 0.574772886094 |
| predict_fall |   588.0   |   629.0   | 1217.0 | 0.368883312422 | 0.496056782334 | 0.425227113906 |
| sum          |   1594.0  |   1268.0  | 2862.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+


+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润  | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | 80986432 |      46%       |   21%    |  27%   |   1.73   | 1000000  | 81986432 | 2005-05-31 | 2017-03-06 |  2862天  |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+




预测的准确率为：0.477987421384
上涨次数占比为：0.556953179595
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| linear       | real_rise | real_fall |  sum   |      rise      |      fall      |     total      |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| predict_rise |   726.0   |   626.0   | 1352.0 | 0.455457967378 | 0.493690851735 | 0.472396925227 |
| predict_fall |   868.0   |   642.0   | 1510.0 | 0.544542032622 | 0.506309148265 | 0.527603074773 |
| sum          |   1594.0  |   1268.0  | 2862.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+


+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | -764457 |      -11%      |   89%    |  23%   |   -0.5   | 1000000  |  235542  | 2005-05-31 | 2017-03-06 |  2862天  |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+




预测的准确率为：0.566736547869
上涨次数占比为：0.556953179595
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| sigmoid      | real_rise | real_fall |  sum   |      rise      |      fall      |     total      |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| predict_rise |   1041.0  |   687.0   | 1728.0 | 0.653074027604 | 0.541798107256 | 0.603773584906 |
| predict_fall |   553.0   |   581.0   | 1134.0 | 0.346925972396 | 0.458201892744 | 0.396226415094 |
| sum          |   1594.0  |   1268.0  | 2862.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+


+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润  | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | 68004329 |      44%       |   32%    |  27%   |   1.62   | 1000000  | 69004329 | 2005-05-31 | 2017-03-06 |  2862天  |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+



训练数据长度：120


预测的准确率为：0.563526052819
上涨次数占比为：0.555317630264
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| rbf          | real_rise | real_fall |  sum   |      rise      |      fall      |     total      |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| predict_rise |   1002.0  |   669.0   | 1671.0 | 0.643958868895 | 0.536918138042 | 0.596359743041 |
| predict_fall |   554.0   |   577.0   | 1131.0 | 0.356041131105 | 0.463081861958 | 0.403640256959 |
| sum          |   1556.0  |   1246.0  | 2802.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+


+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润  | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | 23604721 |      33%       |   29%    |  27%   |   1.18   | 1000000  | 24604721 | 2005-08-23 | 2017-03-06 |  2802天  |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+




预测的准确率为：0.485724482512
上涨次数占比为：0.555317630264
+--------------+-----------+-----------+--------+---------------+----------------+----------------+
| linear       | real_rise | real_fall |  sum   |      rise     |      fall      |     total      |
+--------------+-----------+-----------+--------+---------------+----------------+----------------+
| predict_rise |   821.0   |   706.0   | 1527.0 | 0.52763496144 | 0.566613162119 | 0.544967880086 |
| predict_fall |   735.0   |   540.0   | 1275.0 | 0.47236503856 | 0.433386837881 | 0.455032119914 |
| sum          |   1556.0  |   1246.0  | 2802.0 |      1.0      |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+---------------+----------------+----------------+


+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | -790877 |      -13%      |   83%    |  18%   |   -0.7   | 1000000  |  209122  | 2005-08-23 | 2017-03-06 |  2802天  |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+




预测的准确率为：0.573875802998
上涨次数占比为：0.555317630264
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| sigmoid      | real_rise | real_fall |  sum   |      rise      |      fall      |     total      |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+
| predict_rise |   1069.0  |   707.0   | 1776.0 | 0.687017994859 | 0.567415730337 | 0.633832976445 |
| predict_fall |   487.0   |   539.0   | 1026.0 | 0.312982005141 | 0.432584269663 | 0.366167023555 |
| sum          |   1556.0  |   1246.0  | 2802.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+--------+----------------+----------------+----------------+


+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润  | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | 25336158 |      33%       |   42%    |  27%   |   1.22   | 1000000  | 26336158 | 2005-08-23 | 2017-03-06 |  2802天  |
+------------+----------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+


## 尝试二

#### 计算相关特征值

In [166]:
try_id = '尝试二'
cycle = 'W'

In [167]:
feature_set = 'group3'
predict_window = 'M'

In [168]:
if (feature_set == 'group1') or (feature_set == 'group2') or (feature_set == 'group3'):
    resample_data = data.resample('W')
    
    feature = pd.DataFrame()
    feature['open'] = resample_data['open'].first().dropna()  # 周开盘价
    feature['close'] = resample_data['close'].last().dropna()  # 周收盘价
    feature['high'] = resample_data['high'].max().dropna()  # 周最高价
    feature['low'] = resample_data['low'].min().dropna()  # 周最低价
    feature['volume'] = resample_data['volume'].sum().dropna()  # 周成交量
    feature['value'] = resample_data['value'].sum().dropna()  # 周成交额
    # 基本指标

if (feature_set == 'group2') or (feature_set == 'group3'):
    feature['returns'] = feature['close'].pct_change()
    feature['5returns'] = feature['close'].pct_change(5)
    feature['15returns'] = feature['close'].pct_change(15)
    feature['ma1'] = feature['close'].rolling(window=10).mean()
    feature['ma2'] = feature['close'].rolling(window=20).mean()
    feature['var'] = feature['close'].rolling(window=20).std()
    feature['max_high'] = feature['high'].rolling(window=10).max()
    feature['min_low'] = feature['low'].rolling(window=10).min()
    feature['ma_volume'] = feature['volume'].rolling(window=10).mean()
    feature['ma_value'] = feature['value'].rolling(window=10).mean()
    # 衍生指标
    
if feature_set == 'group3':
    feature['rVol'] = feature['volume'] / feature['ma_volume']
    feature['rVal'] = feature['value'] / feature['ma_value']
    feature['rClose'] = feature['close'] / feature['ma1']
    feature['rHigh'] = feature['high'] / feature['ma1']
    feature['rLow'] = feature['low'] / feature['ma1']
    
    a, b, c = talib.MACD(np.array(feature['close']))
    feature['MACD'] = pd.Series(a, index=feature.index)
    feature['MACDsignal'] = pd.Series(b, index=feature.index)
    feature['MACDhist'] = pd.Series(c, index=feature.index)

    sar = talib.SAR(np.array(feature['high']), np.array(feature['low']))
    feature['SAR'] = pd.Series(sar, index=feature.index)

    mom = talib.MOM(np.array(feature['close']))
    feature['MOM'] = pd.Series(mom, index=feature.index)

    rsi = talib.MOM(np.array(feature['close']))
    feature['RSI'] = pd.Series(rsi, index=feature.index)

    x, y, z = talib.BBANDS(np.array(feature['close']))
    feature['BBdist'] = (pd.Series(x, index=feature.index) - feature['close']) / feature['var']
    
    # 技术指标
if predict_window == 'W':
    feature['follow_return'] = feature['close'].pct_change().shift(-1)
elif predict_window == 'M':
    feature['follow_return'] = feature['close'].pct_change(4).shift(-4)

feature.dropna(axis=0, how='any', inplace=True)
pol = feature['follow_return'].copy(deep=True)
pol[pol > 0] = 1
pol[pol <= 0] = -1
del feature['follow_return']

In [169]:
close = feature['close']

#### 滚动训练预测

In [170]:
start_value = 1000000
fee_rate = 0.001
margin = 1

In [173]:
z = True  # 是否标准化特征数据
decom = True  # 是否进行pca降维
window_list = [50, 100]  # 设定训练数据时间长度（周）
kernel_list = ['rbf', 'linear', 'sigmoid']  # 核函数
C = 10  # [1, 5, 10]

In [174]:
output = open('分析对象%s_%s_数据频率%s_预测长度%s_标准化%s_PCA%s.txt' % (asset, try_id, cycle,predict_window, str(z), str(decom)), 'w+')
for window in window_list:
    print('\n训练数据长度：%d' % window, file=output)
    print('\n训练数据长度：%d' % window)
    for f in kernel_list:
        prediction, pca_components = SvmPredict(feature, pol, window, z, decom, f, C, output)
        iplot({'data':[Scatter(x=pca_components.index, y=pca_components, mode='line', name='PCA 95% 特征数量')],
               'layout': Layout(title='PCA 95% 主成分数量')})
        signals = prediction['prediction'].copy(deep=True)
        _ = analysis(pol, output, prediction, f)
        combine_plot(close, prediction, f, window)
        open_close = feature.ix[signals.index][['open', 'close']]
        position, deal, Volume = backTest(signals, open_close, start_value, fee_rate, margin)
        open_close = data[deal.index[0]:deal.index[-1]][['open', 'close']]
        net_value = cum_gain(deal, Volume, position, open_close, start_value, fee_rate, margin)
        _ = analysis_2(net_value, output)
        iplot({'data':[Scatter(x=net_value.index, y=net_value, mode='line', name='cum_gain')],
               'layout':Layout(title='累积净值 \n' + f + ' ' + str(window))},link_text='')
output.close()


训练数据长度：50
预测的准确率为：0.544090056285
上涨次数占比为：0.555347091932
+--------------+-----------+-----------+-------+----------------+----------------+---------------+
| rbf          | real_rise | real_fall |  sum  |      rise      |      fall      |     total     |
+--------------+-----------+-----------+-------+----------------+----------------+---------------+
| predict_rise |   168.0   |   115.0   | 283.0 | 0.567567567568 | 0.485232067511 | 0.53095684803 |
| predict_fall |   128.0   |   122.0   | 250.0 | 0.432432432432 | 0.514767932489 | 0.46904315197 |
| sum          |   296.0   |   237.0   | 533.0 |      1.0       |      1.0       |      1.0      |
+--------------+-----------+-----------+-------+----------------+----------------+---------------+


+------------+----------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    |  净利润  | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+----------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 10993503 |      27%       |   44%    |  28%   |   0.99   | 1000000  | 11993503 | 2006-09-18 00:00:00 | 2017-02-17 00:00:00 |  2531天  |
+------------+----------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+


预测的准确率为：0.562851782364
上涨次数占比为：0.555347091932
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| linear       | real_rise | real_fall |  sum  |      rise      |      fall      |     total      |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| predict_rise |   141.0   |    78.0   | 219.0 | 0.476351351351 | 0.329113924051 | 0.410881801126 |
| predict_fall |   155.0   |   159.0   | 314.0 | 0.523648648649 | 0.670886075949 | 0.589118198874 |
| sum          |   296.0   |   237.0   | 533.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+


+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    | 净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 496048 |       4%       |   71%    |  24%   |   0.17   | 1000000  | 1496048  | 2006-09-18 00:00:00 | 2017-02-17 00:00:00 |  2531天  |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+


预测的准确率为：0.542213883677
上涨次数占比为：0.555347091932
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| sigmoid      | real_rise | real_fall |  sum  |      rise      |      fall      |     total      |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| predict_rise |   174.0   |   122.0   | 296.0 | 0.587837837838 | 0.514767932489 | 0.555347091932 |
| predict_fall |   122.0   |   115.0   | 237.0 | 0.412162162162 | 0.485232067511 | 0.444652908068 |
| sum          |   296.0   |   237.0   | 533.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+


+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    |  净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 4362840 |      18%       |   44%    |  28%   |   0.64   | 1000000  | 5362840  | 2006-09-18 00:00:00 | 2017-02-17 00:00:00 |  2531天  |
+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+



训练数据长度：100
预测的准确率为：0.47619047619
上涨次数占比为：0.51966873706
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| rbf          | real_rise | real_fall |  sum  |      rise      |      fall      |     total      |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| predict_rise |   121.0   |   123.0   | 244.0 | 0.482071713147 | 0.530172413793 | 0.505175983437 |
| predict_fall |   130.0   |   109.0   | 239.0 | 0.517928286853 | 0.469827586207 | 0.494824016563 |
| sum          |   251.0   |   232.0   | 483.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+


+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    | 净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 174971 |       1%       |   47%    |  22%   |   0.08   | 1000000  | 1174971  | 2007-09-17 00:00:00 | 2017-02-17 00:00:00 |  2289天  |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+


预测的准确率为：0.544513457557
上涨次数占比为：0.51966873706
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| linear       | real_rise | real_fall |  sum  |      rise      |      fall      |     total      |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+
| predict_rise |    97.0   |    66.0   | 163.0 | 0.386454183267 | 0.284482758621 | 0.337474120083 |
| predict_fall |   154.0   |   166.0   | 320.0 | 0.613545816733 | 0.715517241379 | 0.662525879917 |
| sum          |   251.0   |   232.0   | 483.0 |      1.0       |      1.0       |      1.0       |
+--------------+-----------+-----------+-------+----------------+----------------+----------------+


+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    |  净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 4330955 |      20%       |   36%    |  26%   |   0.74   | 1000000  | 5330955  | 2007-09-17 00:00:00 | 2017-02-17 00:00:00 |  2289天  |
+------------+---------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+


预测的准确率为：0.538302277433
上涨次数占比为：0.51966873706
+--------------+-----------+-----------+-------+----------------+---------------+----------------+
| sigmoid      | real_rise | real_fall |  sum  |      rise      |      fall     |     total      |
+--------------+-----------+-----------+-------+----------------+---------------+----------------+
| predict_rise |   177.0   |   149.0   | 326.0 | 0.705179282869 | 0.64224137931 | 0.674948240166 |
| predict_fall |    74.0   |    83.0   | 157.0 | 0.294820717131 | 0.35775862069 | 0.325051759834 |
| sum          |   251.0   |   232.0   | 483.0 |      1.0       |      1.0      |      1.0       |
+--------------+-----------+-----------+-------+----------------+---------------+----------------+


+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
|    None    | 净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |       起始时间      |       终止时间      | 回测时长 |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+
| 收益率分析 | 459995 |       4%       |   45%    |  21%   |   0.2    | 1000000  | 1459995  | 2007-09-17 00:00:00 | 2017-02-17 00:00:00 |  2289天  |
+------------+--------+----------------+----------+--------+----------+----------+----------+---------------------+---------------------+----------+


## 尝试三

#### 计算相关特征值

In [208]:
try_id = '尝试三'
cycle = 'M'

In [209]:
feature_set = 'group3'
predict_window = 'M'

In [210]:
if (feature_set == 'group1') or (feature_set == 'group2') or (feature_set == 'group3'):
    resample_data = data.resample('M')
    
    feature = pd.DataFrame()
    feature['open'] = resample_data['open'].first().dropna()  # 月开盘价
    feature['close'] = resample_data['close'].last().dropna()  # 月收盘价
    feature['high'] = resample_data['high'].max().dropna()  # 月最高价
    feature['low'] = resample_data['low'].min().dropna()  # 月最低价
    feature['volume'] = resample_data['volume'].sum().dropna()  # 月成交量
    feature['value'] = resample_data['value'].sum().dropna()  # 月成交额
    # 基本指标

if (feature_set == 'group2') or (feature_set == 'group3'):
    feature['returns'] = feature['close'].pct_change()
    feature['5returns'] = feature['close'].pct_change(6)
    feature['15returns'] = feature['close'].pct_change(12)
    feature['ma1'] = feature['close'].rolling(window=12).mean()
    feature['ma2'] = feature['close'].rolling(window=24).mean()
    feature['var'] = feature['close'].rolling(window=24).std()
    feature['max_high'] = feature['high'].rolling(window=12).max()
    feature['min_low'] = feature['low'].rolling(window=12).min()
    feature['ma_volume'] = feature['volume'].rolling(window=12).mean()
    feature['ma_value'] = feature['value'].rolling(window=12).mean()
    # 衍生指标
    
if feature_set == 'group3':
    feature['rVol'] = feature['volume'] / feature['ma_volume']
    feature['rVal'] = feature['value'] / feature['ma_value']
    feature['rClose'] = feature['close'] / feature['ma1']
    feature['rHigh'] = feature['high'] / feature['ma1']
    feature['rLow'] = feature['low'] / feature['ma1']
    
    a, b, c = talib.MACD(np.array(feature['close']))
    feature['MACD'] = pd.Series(a, index=feature.index)
    feature['MACDsignal'] = pd.Series(b, index=feature.index)
    feature['MACDhist'] = pd.Series(c, index=feature.index)

    sar = talib.SAR(np.array(feature['high']), np.array(feature['low']))
    feature['SAR'] = pd.Series(sar, index=feature.index)

    mom = talib.MOM(np.array(feature['close']))
    feature['MOM'] = pd.Series(mom, index=feature.index)

    rsi = talib.MOM(np.array(feature['close']))
    feature['RSI'] = pd.Series(rsi, index=feature.index)

    x, y, z = talib.BBANDS(np.array(feature['close']))
    feature['BBdist'] = (pd.Series(x, index=feature.index) - feature['close']) / feature['var']
    # 技术指标
    feature = pd.concat([feature, money_supply.shift(1), international_data.resample(cycle).last()], axis=1)
    feature['CPI'] = cpi.shift(1)
    feature['PMI'] = pmi.shift(1)
    feature['value_added'] = value_added.shift(1)
    feature['inv'] = fixed_asset_inv.shift(1)
    
if predict_window == 'M':
    feature['follow_return'] = feature['close'].pct_change().shift(-1)
elif predict_window == 'M':
    feature['follow_return'] = feature['close'].pct_change(4).shift(-4)

feature.dropna(axis=0, how='any', inplace=True)
pol = feature['follow_return'].copy(deep=True)
pol[pol > 0] = 1
pol[pol <= 0] = -1
del feature['follow_return']

In [211]:
close = feature['close']

In [212]:
start_value = 1000000
fee_rate = 0.001
margin = 1

In [214]:
z = True  # 是否标准化特征数据
decom = True  # 是否进行pca降维
window_list = [24]  # 设定训练数据时间长度（周）
kernel_list = ['rbf']  # 核函数
C = 10  # [1, 5, 10]

In [215]:
output = open('分析对象%s_%s_数据频率%s_预测长度%s_标准化%s_PCA%s.txt' % (asset,try_id, cycle,predict_window, str(z), str(decom)), 'w+')
for window in window_list:
    print('\n训练数据长度：%d' % window, file=output)
    print('\n训练数据长度：%d' % window)
    for f in kernel_list:
        prediction, pca_components = SvmPredict(feature, pol, window, z, decom, f, C, output)
        iplot({'data':[Scatter(x=pca_components.index, y=pca_components, mode='line', name='PCA 95% 特征数量')],
               'layout': Layout(title='PCA 95% 主成分数量')})
        signals = prediction['prediction'].copy(deep=True)
        _ = analysis(pol, output, prediction, f)
        combine_plot(close, prediction, f, window)
        open_close = feature.ix[signals.index][['open', 'close']]
        position, deal, Volume = backTest(signals, open_close, start_value, fee_rate, margin)
        open_close = data[deal.index[0]:deal.index[-1]][['open', 'close']]
        net_value = cum_gain(deal, Volume, position, open_close, start_value, fee_rate, margin)
        _ = analysis_2(net_value, output)
        iplot({'data':[Scatter(x=net_value.index, y=net_value, mode='line', name='cum_gain')],
               'layout':Layout(title='累积净值 \n' + f + ' ' + str(window))},link_text='')
output.close()


训练数据长度：24
预测的准确率为：0.48275862069
上涨次数占比为：0.494252873563
+--------------+-----------+-----------+------+---------------+------+---------------+
| rbf          | real_rise | real_fall | sum  |      rise     | fall |     total     |
+--------------+-----------+-----------+------+---------------+------+---------------+
| predict_rise |    20.0   |    22.0   | 42.0 | 0.46511627907 | 0.5  | 0.48275862069 |
| predict_fall |    23.0   |    22.0   | 45.0 | 0.53488372093 | 0.5  | 0.51724137931 |
| sum          |    43.0   |    44.0   | 87.0 |      1.0      | 1.0  |      1.0      |
+--------------+-----------+-----------+------+---------------+------+---------------+


+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
|    None    |  净利润 | 复合年化收益率 | 最大回撤 | 波动率 | 夏普比率 | 期初权益 | 期末权益 |  起始时间  |  终止时间  | 回测时长 |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
| 收益率分析 | -839012 |      -23%      |   87%    |  29%   |  -0.78   | 1000000  |  160987  | 2009-11-02 | 2016-12-30 |  1744天  |
+------------+---------+----------------+----------+--------+----------+----------+----------+------------+------------+----------+
