# 投资组合专题

In [1]:
import sys
import pandas as pd
import numpy as np

# 自行编写的包
sys.path.append('/home/ubuntu/notebooks/pycharm_projects/Investor-Sentiment')
sys.path.append('/usr/local/stata17/utilities')

# Stata
from pystata import config

config.init('mp')


  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      17.0
___/   /   /___/   /   /___/       MP—Parallel Edition

 Statistics and Data Science       Copyright 1985-2021 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: Single-user 8-core , expiring  1 Jan 2025
Serial number: 501709301094
  Licensed to: Colin's Stata 17 MP
               Love you

Notes:
      1. Unicode is supported; see help unicode_advice.
      2. More than 2 billion observations are allowed; see help obs_advice.
      3. Maximum number of variables is set to 5,000; see help set_maxvar.


In [2]:
df_data = pd.read_csv('/data/DataSets/investor_sentiment/FINAL_DATA_2014_2021.csv')
df_data

Unnamed: 0.1,Unnamed: 0,trade_date,ts_code,CON_YEAR,CON_OR,CON_NP,CON_EPS,CON_NA,CON_PB,CON_PS,...,NIPO_R,RIPO_R,SR_R,l1_TURN_R,lnPD_R,SENT_INDEX_R,SENT_INDEX_EX,SENT_INDEX_DEX,SENT_INDEX_R_EX,SENT_INDEX_R_DEX
0,7,20140102,000985.SH,2014,3.034892e+09,27421.678403,0.6857,2.017385e+09,1.3334,0.8863,...,-0.993200,-0.429099,15.745780,-0.003943,-0.208500,-1.191905,-0.571980,-0.588495,-0.435412,-0.451046
1,18,20140103,000985.SH,2014,3.033876e+09,27421.601209,0.6857,2.017584e+09,1.3213,0.8787,...,-0.993200,-0.429099,15.745780,-0.004333,-0.208500,-1.182604,-0.598652,-0.588830,-0.502679,-0.452930
2,29,20140106,000985.SH,2014,3.033345e+09,27412.012060,0.6854,2.027389e+09,1.2895,0.8619,...,-0.993200,-0.429099,15.745780,-0.003813,-0.208500,-1.175354,-0.623006,-0.589509,-0.563137,-0.455863
3,40,20140107,000985.SH,2014,3.028546e+09,27384.608096,0.6846,2.027386e+09,1.2917,0.8647,...,-0.993200,-0.429099,15.745780,-0.003643,-0.208500,-1.172984,-0.644795,-0.590505,-0.617575,-0.459753
4,51,20140108,000985.SH,2014,3.028892e+09,27355.547003,0.6838,2.028490e+09,1.2935,0.8663,...,-0.993200,-0.429099,15.745780,-0.004513,-0.208500,-1.185114,-0.664723,-0.591815,-0.666961,-0.464538
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1939,21350,20211227,000985.SH,2021,6.389869e+09,53799.927579,0.7131,5.236956e+09,1.7835,1.4617,...,-0.044895,-0.374305,-2.832969,0.001832,0.097983,0.390557,1.170102,1.302731,0.960139,1.050876
1940,21361,20211228,000985.SH,2021,6.391559e+09,53788.880275,0.7129,5.237495e+09,1.7933,1.4695,...,2.955105,-0.346122,-2.832969,0.000582,0.097983,1.186737,1.127746,1.298598,0.909492,1.046500
1941,21372,20211229,000985.SH,2021,6.389955e+09,53725.674838,0.7120,5.237052e+09,1.7779,1.4571,...,-1.044895,-0.453585,-2.832969,0.000302,0.097983,0.013389,1.149856,1.293518,0.934144,1.040757
1942,21383,20211230,000985.SH,2021,6.390729e+09,53695.625032,0.7116,5.236812e+09,1.7905,1.4672,...,1.955105,-0.193822,-2.832969,0.000072,0.097983,0.972127,1.079838,1.288670,0.852272,1.035358


## MA-多空策略

In [48]:
def ma_strategy(df, factor_list, window_list):
    """
    用均值策略作为买入卖出信号
    :param df: 因子数据
    :param factor_list: 待选决策因子
    :param window_list: 待选决策窗口
    :return:
    """
    out_list = []
    # 循环因子和窗口
    for w in window_list:
        for f in factor_list:
            # MA均值计算
            df[f'{f}_ma_{w}'] = (df['img_neg'].rolling(w).mean())

            # 形成投资信号
            df[f'{f}_sell_signal_ma_{w}'] = df[f] >= df[f'{f}_ma_{w}']
            df[f'{f}_sell_signal_ma_{w}'] = df[f'{f}_sell_signal_ma_{w}'].shift(1)  # 向前移动一天,使用历史信息

            # 卖空指数操作
            df[f'{f}_return_ma_{w}'] = np.where(df[f'{f}_sell_signal_ma_{w}'], -1 * (df[f'{f}_sell_signal_ma_{w}'] * df['close_chg']),
                                                df['close_chg'])

            # 计算累积市值
            df[f'{f}_mv_ma_{w}'] = ((df[f'{f}_return_ma_{w}'] + 100) / 100).cumprod(axis=0)

            # 计算最大回撤
            df[f'{f}_mdd_ma_{w}'] = df[f'{f}_return_ma_{w}'].cummin(axis=0)

            # 输出值列表
            out_list += [f'{f}_mv_ma_{w}', f'{f}_mdd_ma_{w}']

    # 去掉空行(MA行)
    df.dropna(axis=0, inplace=True)

    # 对比基准
    df['mv_shareindex'] = ((df['close_chg'] + 100) / 100).cumprod(axis=0)
    df['mdd_shareindex'] = df['close_chg'].cummin(axis=0)

    return df[['trade_date', 'ts_code', 'mv_shareindex', 'mdd_shareindex'] + out_list]


#ma策略
ma_strategy(df_data, ['img_neg', 'tex_neg', 'SENT_INDEX', 'SENT_INDEX_R'], [5, 10, 15, 20])


Unnamed: 0,trade_date,ts_code,mv_shareindex,mdd_shareindex,img_neg_mv_ma_5,img_neg_mdd_ma_5,tex_neg_mv_ma_5,tex_neg_mdd_ma_5,SENT_INDEX_mv_ma_5,SENT_INDEX_mdd_ma_5,...,SENT_INDEX_R_mv_ma_15,SENT_INDEX_R_mdd_ma_15,img_neg_mv_ma_20,img_neg_mdd_ma_20,tex_neg_mv_ma_20,tex_neg_mdd_ma_20,SENT_INDEX_mv_ma_20,SENT_INDEX_mdd_ma_20,SENT_INDEX_R_mv_ma_20,SENT_INDEX_R_mdd_ma_20
1206,20181217,000985.SH,0.998586,-0.141405,0.998215,-3.119619,0.946975,-3.119619,0.970662,-3.119619,...,0.970662,-3.119619,0.970662,-3.119619,0.970662,-3.119619,0.970662,-3.119619,0.970662,-3.119619
1207,20181218,000985.SH,0.990405,-0.819247,0.990038,-3.119619,0.954733,-3.119619,0.96271,-3.119619,...,0.96271,-3.119619,0.96271,-3.119619,0.978614,-3.119619,0.96271,-3.119619,0.96271,-3.119619
1208,20181219,000985.SH,0.978085,-1.243940,1.002353,-3.119619,0.942857,-3.119619,0.950734,-3.119619,...,0.950734,-3.119619,0.974685,-3.119619,0.966441,-3.119619,0.950734,-3.119619,0.950734,-3.119619
1209,20181220,000985.SH,0.976226,-1.243940,1.000447,-3.119619,0.941064,-3.119619,0.948927,-3.119619,...,0.948927,-3.119619,0.972832,-3.119619,0.964603,-3.119619,0.948927,-3.119619,0.948927,-3.119619
1210,20181221,000985.SH,0.966645,-1.243940,0.990629,-3.119619,0.931828,-3.119619,0.939614,-3.119619,...,0.939614,-3.119619,0.98238,-3.119619,0.955136,-3.119619,0.939614,-3.119619,0.939614,-3.119619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1939,20211227,000985.SH,1.641510,-8.183736,1.629029,-6.666832,0.818046,-8.183736,1.155046,-6.666832,...,1.345123,-6.666832,1.130841,-6.666832,0.765333,-8.183736,1.23279,-6.666832,1.354292,-6.666832
1940,20211228,000985.SH,1.653273,-8.183736,1.617356,-6.666832,0.823907,-8.183736,1.146769,-6.666832,...,1.335485,-6.666832,1.122737,-6.666832,0.770817,-8.183736,1.223956,-6.666832,1.344587,-6.666832
1941,20211229,000985.SH,1.638211,-8.183736,1.602622,-6.666832,0.831413,-8.183736,1.157216,-6.666832,...,1.347651,-6.666832,1.112509,-6.666832,0.777839,-8.183736,1.235106,-6.666832,1.356836,-6.666832
1942,20211230,000985.SH,1.652119,-8.183736,1.589016,-6.666832,0.824355,-8.183736,1.147391,-6.666832,...,1.359092,-6.666832,1.103064,-6.666832,0.771236,-8.183736,1.224621,-6.666832,1.368355,-6.666832
