In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

def process_sign_sequence(df, column_name, output_column=None):


    if output_column is None:
        output_column = f"{column_name}_processed"
    
    ### 创建掩码判断当前值和下一个值的符号关系
    same_sign = (df[column_name] * df[column_name].shift(-1) > 0) & (~df[column_name].shift(-1).isna())
    diff_sign = (df[column_name] * df[column_name].shift(-1) < 0) & (~df[column_name].shift(-1).isna())
    

    df[output_column] = df[column_name].copy()
    
    ### 处理当前值和下一个值符号相同的情况（当前值变为正数）
    df.loc[same_sign, output_column] = df.loc[same_sign, column_name].abs()
    
    ### 处理当前值和下一个值符号不同的情况（当前值变为负数）
    df.loc[diff_sign, output_column] = -df.loc[diff_sign, column_name].abs()
    
    return df


file_name = 'RB99_120m_2010.1.1_2025.1.31.csv'         ###############  原始数据文件 ############### 
bars = pd.read_csv(file_name)
bars.set_index("datetime", inplace=True)


bars['returns_1'] = np.log(bars['close']).diff().dropna()           ##############  一阶差分，同时删除缺失值  ############### 
bars['returns_2'] = np.log(bars['close']).diff(periods=2).dropna()
bars['returns_3'] = np.log(bars['close']).diff(periods=3).dropna()
bars['returns_4'] = np.log(bars['close']).diff(periods=4).dropna()
bars['returns_5'] = np.log(bars['close']).diff(periods=5).dropna()


process_sign_sequence(bars, 'returns_1')
process_sign_sequence(bars, 'returns_2')
process_sign_sequence(bars, 'returns_3')
process_sign_sequence(bars, 'returns_4')
process_sign_sequence(bars, 'returns_5')


bars.dropna(inplace=True)


standard_1 = (bars['returns_1_processed'] - bars['returns_1_processed'].mean()) / bars['returns_1_processed'].std()
standard_2 = (bars['returns_2_processed'] - bars['returns_2_processed'].mean()) / bars['returns_2_processed'].std()
standard_3 = (bars['returns_3_processed'] - bars['returns_3_processed'].mean()) / bars['returns_3_processed'].std()
standard_4 = (bars['returns_4_processed'] - bars['returns_4_processed'].mean()) / bars['returns_4_processed'].std()
standard_5 = (bars['returns_5_processed'] - bars['returns_5_processed'].mean()) / bars['returns_5_processed'].std()


plt.figure(figsize=(16, 12))

sns.kdeplot(standard_1, label="1", color='darkred')
sns.kdeplot(standard_2, label="2", color='green')
sns.kdeplot(standard_3, label="3", color='blue')
sns.kdeplot(standard_4, label="4", color='orange')
sns.kdeplot(standard_5, label="5", color='magenta')

sns.kdeplot(np.random.normal(size=1000000), label="Normal", color='black', linestyle="--")

plt.xticks(range(-5, 6))
plt.legend(loc=8, ncol=5)
plt.title(file_name + '_' + str(len(bars)) + "-1", loc='center', fontsize=20, fontweight="bold", fontname="Times New Roman")
plt.xlim(-5, 5)
plt.grid(1)

plt.savefig(file_name + '_' + str(len(bars)) + "-1.jpg")
plt.close()

In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt

def process_sign_sequence(df, column_name, output_column=None):


    if output_column is None:
        output_column = f"{column_name}_processed"
    
    ### 创建掩码判断当前值和上一个值的符号关系
    same_sign = (df[column_name] * df[column_name].shift(1) > 0) & (~df[column_name].shift(1).isna())
    diff_sign = (df[column_name] * df[column_name].shift(1) < 0) & (~df[column_name].shift(1).isna())
    

    df[output_column] = df[column_name].copy()
    
    ### 处理当前值和上一个值符号相同的情况（当前值变为正数）
    df.loc[same_sign, output_column] = df.loc[same_sign, column_name].abs()
    
    ### 处理当前值和上一个值符号不同的情况（当前值变为负数）
    df.loc[diff_sign, output_column] = -df.loc[diff_sign, column_name].abs()
    
    return df


file_name = 'RB99_120m_2010.1.1_2025.1.31.csv'         ###############  原始数据文件 ############### 
bars = pd.read_csv(file_name)
bars.set_index("datetime", inplace=True)


bars['returns_1'] = np.log(bars['close']).diff().dropna()           ##############  一阶差分，同时删除缺失值  ############### 
bars['returns_2'] = np.log(bars['close']).diff(periods=2).dropna()
bars['returns_3'] = np.log(bars['close']).diff(periods=3).dropna()
bars['returns_4'] = np.log(bars['close']).diff(periods=4).dropna()
bars['returns_5'] = np.log(bars['close']).diff(periods=5).dropna()


process_sign_sequence(bars, 'returns_1')
process_sign_sequence(bars, 'returns_2')
process_sign_sequence(bars, 'returns_3')
process_sign_sequence(bars, 'returns_4')
process_sign_sequence(bars, 'returns_5')


bars.dropna(inplace=True)


standard_1 = (bars['returns_1_processed'] - bars['returns_1_processed'].mean()) / bars['returns_1_processed'].std()
standard_2 = (bars['returns_2_processed'] - bars['returns_2_processed'].mean()) / bars['returns_2_processed'].std()
standard_3 = (bars['returns_3_processed'] - bars['returns_3_processed'].mean()) / bars['returns_3_processed'].std()
standard_4 = (bars['returns_4_processed'] - bars['returns_4_processed'].mean()) / bars['returns_4_processed'].std()
standard_5 = (bars['returns_5_processed'] - bars['returns_5_processed'].mean()) / bars['returns_5_processed'].std()


plt.figure(figsize=(16, 12))

sns.kdeplot(standard_1, label="1", color='darkred')
sns.kdeplot(standard_2, label="2", color='green')
sns.kdeplot(standard_3, label="3", color='blue')
sns.kdeplot(standard_4, label="4", color='orange')
sns.kdeplot(standard_5, label="5", color='magenta')

sns.kdeplot(np.random.normal(size=1000000), label="Normal", color='black', linestyle="--")

plt.xticks(range(-5, 6))
plt.legend(loc=8, ncol=5)
plt.title(file_name + '_' + str(len(bars)) + "+1", loc='center', fontsize=20, fontweight="bold", fontname="Times New Roman")
plt.xlim(-5, 5)
plt.grid(1)

plt.savefig(file_name + '_' + str(len(bars)) + "+1.jpg")
plt.close()