# 计算点度中心性因子

## 导入模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
from tqdm.notebook import tqdm
import os

## 读入数据

### 读入调仓日

In [2]:
start_date = '2019-08-01'
end_date = '2025-01-01'

df_adj_date = feather.read_dataframe('../data/adj_date_daily.feather')
idx_date = (df_adj_date['adj_date'] >= start_date) & (df_adj_date['adj_date'] <= end_date)
adj_date = df_adj_date.loc[idx_date, 'adj_date'].to_list()

### 读入公司列表

In [3]:
issues = feather.read_dataframe('../data/issues_daily.feather')

## 计算点度中心性

In [4]:
def N_connect_calc(connect_method, dirname, *args, **kwargs):
    remove_num = []
    remove_size = []
    df_num = None
    df_size = None
    os.makedirs(dirname, exist_ok=True)
    
    for ad in tqdm(adj_date):
        year = ad.year
        ad_str = ad.strftime('%Y%m%d')
        issues_date = issues.loc[ad, 'issue'].to_numpy()
        
        corr_num = feather.read_dataframe(f'../data/corr_daily/{year}/corr_num_{ad_str}.feather')
        nc_num = connect_method(corr_num, *args, **kwargs)
        r_num = (nc_num == 0).sum()
        remove_num.append(r_num / len(issues_date))
        df_num_ad = pd.DataFrame({'issue': issues_date, 'date': ad, 'N_connect': nc_num})
        df_num = pd.concat([df_num, df_num_ad])
        
        corr_size = feather.read_dataframe(f'../data/corr_daily/{year}/corr_size_{ad_str}.feather')
        nc_size = connect_method(corr_size, *args, **kwargs)
        r_size = (nc_size == 0).sum()
        remove_size.append(r_size / len(issues_date))
        df_size_ad = pd.DataFrame({'issue': issues_date, 'date': ad, 'N_connect': nc_size})
        df_size = pd.concat([df_size, df_size_ad])
    
    feather.write_dataframe(df_num, dirname + '/N_connect_num.feather')
    feather.write_dataframe(df_size, dirname + '/N_connect_size.feather')
    
    plt.plot(adj_date, remove_num, label='num')
    plt.plot(adj_date, remove_size, label='size')
    plt.legend()
    plt.show()

## 方法0: 稀疏化处理后取单向连接

In [5]:
def connect_0(corr, axis=1):
    '''
    axis=1 表示统计焦点股票的关联股票个数
    axis=0 表示统计与该股票关联的焦点股票个数
    '''
    w = corr.values.copy()
    np.fill_diagonal(w, 0)
    thresh = np.median(w[w > 0])
    w[w <= thresh] = 0
    a = (w > 0).astype(int)
    N_connect = a.sum(axis=axis)
    return N_connect

In [None]:
N_connect_calc(connect_0, '../data/N_connect_daily/N_connect_0_1/', axis=1)

  0%|          | 0/1315 [00:00<?, ?it/s]

In [None]:
N_connect_calc(connect_0, '../data/N_connect_daily/N_connect_0_0/', axis=0)

## 方法1: 稀疏化处理后建立双向连接

In [None]:
def connect_1(corr, method='both'):
    w = corr.values.copy()
    np.fill_diagonal(w, 0)
    thresh = np.median(w[w > 0])
    w[w <= thresh] = 0
    a = (w > 0).astype(int)
    if method == 'any':
        a = np.maximum(a, a.T) # 单向连接就可以
    elif method == 'both':
        a = np.minimum(a, a.T) # 需要双向都有连接
    N_connect = a.sum(axis=1)
    return N_connect

In [None]:
N_connect_calc(connect_1, '../data/N_connect_daily/N_connect_1_both/', method='both')

In [None]:
N_connect_calc(connect_1, '../data/N_connect_daily/N_connect_1_any/', method='any')

## 方法2: 先对双向相关度取平均值 / 最大值再稀疏化处理

In [None]:
def connect_2(corr, method='mean'):
    w = corr.values.copy()
    np.fill_diagonal(w, 0)
    
    if method == 'mean':
        w = np.mean([w, w.T], axis=0)
    elif method == 'max':
        w = np.maximum(w, w.T)
    elif method == 'min':
        w = np.minimum(w, w.T)
    
    thresh = np.median(w[w > 0])
    w[w <= thresh] = 0
    a = (w > 0).astype(int)
    N_connect = a.sum(axis=1)
    return N_connect

In [None]:
N_connect_calc(connect_2, '../data/N_connect_daily/N_connect_2_mean/', method='mean')

In [None]:
N_connect_calc(connect_2, '../data/N_connect_daily/N_connect_2_max/', method='max')

In [None]:
N_connect_calc(connect_2, '../data/N_connect_daily/N_connect_2_min/', method='min')

## 等权复合频率 / 幅度因子

In [None]:
dirnames = [
    '../data/N_connect_daily/N_connect_0_0/', '../data/N_connect_daily/N_connect_0_1/',
    '../data/N_connect_daily/N_connect_1_any/', '../data/N_connect_daily/N_connect_1_both/',
    '../data/N_connect_daily/N_connect_2_max/', '../data/N_connect_daily/N_connect_2_min/', '../data/N_connect_daily/N_connect_2_mean/'
]

for dirname in dirnames:
    num = feather.read_dataframe(dirname + '/N_connect_num.feather')
    size = feather.read_dataframe(dirname + '/N_connect_size.feather')
    N_connect = pd.DataFrame(data={
        'date': num['date'],
        'issue': num['issue'],
        'N_connect': num['N_connect'] + size['N_connect']
    })
    feather.write_dataframe(N_connect, dirname + '/N_connect.feather')