# 金融市场情绪专题
PCA

In [1]:
import sys
import os
import cudf
import cupy
import pandas as pd

# ------------------------------数据集路径----------------------------------#
# /usr/local/miniconda3/envs/Rapids/bin/python
DATASETS_PATH = '/data/DataSets/BW_INDEX_CSMAR/BW_5_CSMAR/'

### 股利溢价 lnPD
SgnYear [统计年度] - YYYY
Dnum [分红公司数目] - 指当年发生了现金分红的上市公司数量
NDnum [未分红公司数目] - 指当年没有现金分红的上市公司数量
DMB [分红公司平均市值账面比] - 计算公式为：所有分红的公司的市值账面比的平均数
NDMB [未分红公司平均市值账面比] - 计算公式为：所有未分红的公司的市值账面比的平均数
PDND [股利溢价] - 计算公式为：分红公司平均市值账面比-未分红公司平均市值账面比
LogPDND [对数股利溢价] - 计算公式为：股利溢价的对数

In [3]:
def cal_pd():
    df = cudf.read_parquet(f'{DATASETS_PATH}QX_DIVIDENDPREMIUM.parquet')
    return df.rename(columns={'LogPDND': 'lnPD'})[['lnPD']]


cal_pd()

Unnamed: 0_level_0,lnPD
SgnYear,Unnamed: 1_level_1
1991,-0.9005
1992,-0.272
1993,-0.0592
1994,-0.1019
1995,-0.0237
1996,-0.0648
1997,-0.0639
1998,-0.0479
1999,-0.0426
2000,-0.0986


### 股票融资占股票债券融资比例 SR
SgnYear [统计年度] - YYYY-MM
IPO [A股首次发行金额] - 当年IPO的A股融资额
APublic [A股公开增发金额] -
APrivate [A股非公开增发金额] -
Rs [A股配股金额] -
Refinance [A股再筹资金额] - 通过增发配股等方式融资额
Ashares [A股股票融资总额] - 计算公式为：IPO+再融资之和
Bond [债券市场筹资总额] - 计算公式为：债券市场各类债券发行的总额
SRate [股票融资占股票债券融资比例(%)] - 计算公式为：股票融资*100/(股票融资+债券融资)

In [4]:
def cal_sr():
    """
    :return:
    """
    df = cudf.read_parquet(f'{DATASETS_PATH}QX_STOCKRATE.parquet')
    return df.rename(columns={'SRate': 'SR'})[['SR']]


cal_sr()

Unnamed: 0_level_0,SR
SgnYear,Unnamed: 1_level_1
1990,5.9
1991,2.39
1992,27.94
1993,55.02
1994,33.01
1995,11.96
1996,12.99
1997,49.09
1998,88.27
1999,67.59


### 基金折溢价率 CEFD
Symbol [基金代码] -
TradingDate [交易日期] -
CategoryID [基金类别] - S0601=股票型基金;S0602=债券型基金;S0603=货币型基金;S0604=混合型基金;S0605=FOF;S0606=股指期货型基金;S0699=其他
FundTypeID [基金运作方式] - S0501=契约型开放式;S0502=契约型封闭式
IsETF [是否ETF] - 1=是；2=否
IsLOF [是否LOF] - 1=是；2=否
IsIndexFund [是否指数基金] - 1=是；2=否
IsInnovative [是否创新型基金] - 1=是；2=否
NAV [基金份额净值] -
AccumulativeNAV [基金份额累计净值] -
ClosePrice [收盘价] -
CovertRate [溢价率(%)] - 计算公式为：（收盘价-基金份额净值）*101/基金份额净值
TurnoverRate [换手率] -

In [5]:

def cal_cefd():
    """
    计算封闭式基金溢价
    """

    '''
    保留 封闭式基金
    剔除 LOF基金,创新型基金,股票指数基金
    '''

    df = (
            cudf.from_pandas(
                    cudf.read_parquet(f'{DATASETS_PATH}QX_FUNDDISCOUNTPREMIUM.parquet').sort_index().to_pandas()
                    .query(" FundTypeID=='S0502'& IsLOF==2 & IsInnovative==2 & IsIndexFund==2")
            )
    )

    '''
    CovertRate 平均加权
    '''
    df['DayAvgCR'] = df.groupby(level=['TradingDate'])['CovertRate'].transform('mean')

    '''
    时间序列
    '''
    df = df.groupby(level=['TradingDate']).first()

    return df.rename(columns={'DayAvgCR': 'CEFD'})[['CEFD']]


cal_cefd()

Unnamed: 0_level_0,CEFD
TradingDate,Unnamed: 1_level_1
19980430,101.500000
19980630,60.272500
19980731,41.500000
19980831,28.418000
19980930,31.054000
...,...
20221216,4.303333
20221219,1.501667
20221220,0.239091
20221221,0.583000


### 市场换手率 TURN
TradingDate [交易日期] -
MarketType [市场类型] - 1=上证A股市场；2=上证B股市场；4=深证A股市场；8=深证B股市场；6=主板；5=沪深A股市场；7=中小板；10=沪深B股市场；15=沪深AB股市场；16=创业板；20=深证A股和创业板；21=沪深A股和创业板；31=沪深AB股和创业板；32=科创板；33=上证A股和科创板；37=沪深A股和科创板；47=沪深AB股和科创板；53=沪深A股和创业板和科创板；63=沪深AB股和创业板和科创板；64=北证A股市场；69=沪深京A股市场；79=沪深京AB股市场；85=沪深京A股和创业板；95=沪深京AB股和创业板；101=沪深京A股和科创板；111=沪深京AB股和科创板；117=沪深京A股和创业板和科创板；127=沪深京AB股和创业板和科创板；
Parameter [参数值] - 字段说明见说明书“附录 参数说明”
Unit [参数单位] -
TurnoverRate1 [换手率(总股本)] - 计算公式为：交易量/总股本
TurnoverRate2 [换手率(流通股本)] - 计算公式为：交易量/流通股本

In [6]:
def transform():
    """
    转换函数
    """
    df_turn = pd.concat([pd.read_csv(f'{DATASETS_PATH}QX_TRM.csv'), pd.read_csv(f'{DATASETS_PATH}QX_TRM1.csv')])
    df_turn['TradingDate'] = df_turn['TradingDate'].str.replace('-', '').astype('uint32')
    df_turn = df_turn.set_index(['TradingDate', 'MarketType']).sort_index()
    df_turn.to_parquet(f'{DATASETS_PATH}QX_TRM.parquet')
    return df_turn


def cal_turn():
    """
    计算换手率
    :return:
    """
    """
    筛选参数
    """
    df = cudf.read_parquet(f'{DATASETS_PATH}QX_TRM.parquet').query("MarketType==5&Parameter==1").sort_index()

    """
    计算滑动指标
    """
    # df['TurnoverRate2_MA'] = df['TurnoverRate2'].rolling(240 * 5).mean()
    # df['lnTURN'] = cupy.log((df['TurnoverRate2'].rolling(240 * 5).mean().fillna(1)))
    # df['lnTURN'] = df['lnTURN'] - df['TurnoverRate2_MA']
    # df['lnTURN'] = cupy.log((df['lnTURN']))
    df['TURN_MA'] = (df['TurnoverRate2'] - df['TurnoverRate2'].rolling(240 * 5).mean())
    # df['lnTURN'] = cupy.log(df['lnTURN'].fillna(1))

    return (
            df.reset_index(['MarketType']).rename(columns={'TurnoverRate2': 'TURN'})
            [['TURN', 'TURN_MA']]
    )


cal_turn()

Unnamed: 0_level_0,TURN,TURN_MA
TradingDate,Unnamed: 1_level_1,Unnamed: 2_level_1
19901219,0.00062,
19901220,0.00012,
19901221,0.00003,
19901224,0.00004,
19901225,0.00001,
...,...,...
20221215,0.00468,-0.001257842
20221216,0.00498,-0.00095675
20221219,0.00538,-0.000556983
20221220,0.00422,-0.001715833


### IPO股票信息 RIPO,NIPO
Symbol [股票代码] -
ListedDate [上市日期] -
ListedYear [上市年份] -
ABSymbol [AB股交叉码] - 指公司同时发行了A和B股的情况
HSymbol [H股交叉码] - 指的是公司同时发行了H股的情况
EstablishDate [公司成立日期] -
IpoDate [首次招股日期] -
CompanyListedDate [公司上市日期] -
IsIPO [是否首次发行] - A=是；B=否
TotalShares [实际发行总量] -
IssuePrice [发行价格] -
CurrencyCode [发行价格币种] -
ClosePrice [上市首日收盘价] -
ReturnRate [上市首日回报率] -
TurnoverRate1 [上市首日换手率(总股本)] -
TurnoverRate2 [上市首日换手率(流通股本)] -
PE [上市首日市盈率] -
PB [上市首日市净率] -

In [7]:
def cal_ipo():
    """
    计算IPO信息
    """
    df = cudf.read_parquet(f'{DATASETS_PATH}QX_IPO.parquet')

    '''
    计算平均首日收益
    '''
    df['DayAvgRR'] = df.groupby(level=['ListedDate'])['ReturnRate'].transform('mean')
    df['DayTotalN'] = df.groupby(level=['ListedDate'])['ReturnRate'].transform('count')
    df = df.groupby(level=['ListedDate']).first()

    return df.rename(columns={'DayAvgRR': 'RIPO', 'DayTotalN': 'NIPO'})[['RIPO', 'NIPO']]


cal_ipo()

Unnamed: 0_level_0,RIPO,NIPO
ListedDate,Unnamed: 1_level_1,Unnamed: 2_level_1
19901210,0.803,1
19901219,48.89698229,7
19910114,14.98,1
19910129,13.58,1
19910403,0.225,1
...,...,...
20221216,0.1284755,2
20221221,0.156567,1
20221222,0.157636,2
20221223,,0


## 合成

In [8]:
def merge_bw():
    """
    合成函数
    """
    df = cal_cefd().to_pandas().join(cal_ipo().to_pandas(), how='left').join(cal_turn().to_pandas(), how='left').reset_index()

    """
    只有年份
    """
    df['TradingYear'] = (df['TradingDate'] // 10000).astype('int64')
    df = (df.set_index('TradingYear').join(cal_sr().to_pandas(), how='left').join(cal_pd().to_pandas(), how='left')
          .reset_index().set_index('TradingDate').sort_index())

    """
    空值处理,主要是IPO,
    """
    df = df.fillna(0)
    return df


df_pca = merge_bw()
df_pca


Unnamed: 0_level_0,index,CEFD,RIPO,NIPO,TURN,TURN_MA,SR,lnPD
TradingDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
19980430,1998,101.500000,0.000000,0.0,0.01515,-0.001191,88.27,-0.0479
19980630,1998,60.272500,1.081585,1.0,0.00732,-0.008927,88.27,-0.0479
19980731,1998,41.500000,0.000000,0.0,0.00712,-0.009170,88.27,-0.0479
19980831,1998,28.418000,0.000000,0.0,0.00798,-0.008155,88.27,-0.0479
19980930,1998,31.054000,0.000000,0.0,0.00848,-0.007518,88.27,-0.0479
...,...,...,...,...,...,...,...,...
20221216,2022,4.303333,0.128475,2.0,0.00498,-0.000957,0.00,0.0000
20221219,2022,1.501667,0.000000,0.0,0.00538,-0.000557,0.00,0.0000
20221220,2022,0.239091,0.000000,0.0,0.00422,-0.001716,0.00,0.0000
20221221,2022,0.583000,0.156567,1.0,0.00363,-0.002305,0.00,0.0000
