In [1]:
import datetime
from math import inf, nan
import sys
import pandas as pd
import talib

window_size = 5

# 读取数据，
def read_data(file_path):
    data = pd.read_csv(file_path)
    return data

def build_sma(data):
    data['close_sma'] = talib.SMA(data['close'], timeperiod=window_size)
    return data

def build_ema(data):
    data['close_ema'] = talib.EMA(data['close'], timeperiod=window_size)
    return data

def build_rsi(data):
    data['close_rsi'] = talib.RSI(data['close'], timeperiod=window_size)
    return data

def build_macd(data):
    MACD, MACD_Signal, MACD_Hist =  talib.MACD(data['close'], fastperiod=5, slowperiod=10, signalperiod=4)
    data['close_macd_hist'] = MACD_Hist
    return data


def build_bb_bands(data):
    upper_band,middle_band,lower_band = talib.BBANDS(data['close'], timeperiod=window_size)
    # data['close_bb_upper_band'] = upper_band
    # data['close_bb_middle_band'] = middle_band
    # data['close_bb_lower_band'] = lower_band
    data['close_bb_position'] = ((data['close'] - lower_band) / (upper_band - lower_band))
    # 替换无穷大和负无穷大值
    data['close_bb_position'].replace([inf, -inf], 0.5, inplace=True)
    return data

def build_atr(data):
    data['atr'] = talib.ATR(data['high'], data['low'], data['close'], timeperiod=window_size)
    return data

def build_stochastic(data):
    data['slowk'], data['slowd'] = talib.STOCH(data['high'], data['low'], data['close'], fastk_period=window_size, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    return data

def build_vwap(data):
    # VWAP计算公式: (典型价格 * 成交量)的累积和 / 成交量的累积和
       # 计算典型价格
    data['typical_price'] = (data['high'] + data['low'] + data['close']) / 3
    
    # 计算典型价格乘以成交量
    data['tp_vol'] = data['typical_price'] * data['volume']
    
    # 使用滚动窗口计算累积和
    data['rolling_tp_vol'] = data['tp_vol'].rolling(window=window_size, min_periods=1).sum()
    data['rolling_vol'] = data['volume'].rolling(window=window_size, min_periods=1).sum()
    
    # 计算VWAP
    data['vwap'] = data['rolling_tp_vol'] / data['rolling_vol']
    
   
    # 最终还是要把数据整理为百分比，数据归一化，这里把vwap 与当前的close价格做比较
    data['vwap_position'] = (data['close'] - data['vwap']) / data['vwap']

     # 删除临时列
    data.drop(columns=['typical_price', 'tp_vol', 'rolling_tp_vol', 'rolling_vol','vwap'], inplace=True)


    
    return data

def build_williams_r(data):
    data['williams_r'] = talib.WILLR(data['high'], data['low'], data['close'], timeperiod=window_size)
    return data




In [2]:

def go_clean_data(file_path):
    data_all = read_data(file_path)
    data_all = build_sma(data_all)
    data_all = build_ema(data_all)
    data_all = build_bb_bands(data_all)
    data_all = build_rsi(data_all)
    data_all = build_macd(data_all)
    data_all = build_atr(data_all)
    data_all = build_stochastic(data_all)
    data_all = build_vwap(data_all)
    data_all = build_williams_r(data_all)
    return data_all

file_name ="/Users/zengyan/Excelsior/ai-trader/temp/doge_5m_0701_0705"

file_path = f'{file_name}.csv'
data_cleaned = go_clean_data(file_path)
data_cleaned.head(50)
data_cleaned.to_csv(f'{file_name}_1_featured.csv', index=False)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['close_bb_position'].replace([inf, -inf], 0.5, inplace=True)
