## Build Back Testing
Follow the buy and sell decisions from both the retail investors or the institution investors as per fund flow data provided, build two trading strategies Smart Retail Flow FT5009 and Smart Insti Flow FT5009. That is, you buy and sell the same stocks as retail investors or institutional investors from the fund flow report.

The strategy Smart Retail Flow FT5009 strictly follows the weekly buy sell decisions of retail investors and the strategy Smart Insti Flow FT5009 strictly follows the weekly buy sell decisions of institution investors.
- Prepare the return of the stocks (1%)
- Identify the position of stocks to buy and sell (2%)

In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# 生成模拟日期范围：从2019年1月到2024年10月的每周日期
start_date = datetime(2019, 1, 1)
end_date = datetime(2024, 10, 1)
dates = pd.date_range(start=start_date, end=end_date, freq='W-MON')

# 股票名称和代码
stock_names = [f"STK{i+1:03d}" for i in range(100)]

# 生成模拟数据
data = {
    'Date': [],
    'Stock Name': [],
    'Stock Code': [],
    'Open': [],
    'Close': [],
    'Adj Close': []
}

# 随机生成股票价格数据
np.random.seed(42)
for date in dates:
    for stock_name in stock_names:
        stock_code = stock_name  # 股票名称和股票代码一致
        open_price = np.random.uniform(50, 150)
        close_price = open_price * np.random.uniform(0.95, 1.05)
        adj_close = close_price  # 假设调整后的收盘价与收盘价相同
        data['Date'].append(date)
        data['Stock Name'].append(stock_name)
        data['Stock Code'].append(stock_code)
        data['Open'].append(open_price)
        data['Close'].append(close_price)
        data['Adj Close'].append(adj_close)

# 创建 DataFrame
df = pd.DataFrame(data)

# 保存为 CSV 文件
stock_prices_path = './data/simulated_stock_prices.csv'
df.to_csv(stock_prices_path, index=False) 
print(f"Simulated stock price data saved to {stock_prices_path}")
# 生成 Top 10 买入/卖出股票数据
top_stocks_data = []
for date in dates:
    for investor_type in ["Retail", "Institution"]:
        for action in ["buy", "sell"]:
            for i in range(1, 11):  # Top 10 stocks
                stock_code = f"STK{random.randint(1, 100):03d}"
                amount = round(random.uniform(1, 50), 2) * (-1 if action == "sell" else 1)  # 买入为正，卖出为负
                top_stocks_data.append([date, investor_type, action, stock_code, amount])

# 转换为DataFrame
top_stocks_df = pd.DataFrame(top_stocks_data, columns=["Date", "Investor Type", "Action", "Stock Code", "Amount"])

# 保存为CSV文件
top_stocks_path = './data/simulated_top_stocks_data.csv'
top_stocks_df.to_csv(top_stocks_path, index=False)

print(f"Simulated top stocks data saved to {top_stocks_path}")

close or open 

In [14]:
import pandas as pd
import numpy as np

top_stocks_path = './data/cleaned_data.csv'
stock_prices_path = './data/stock_info.csv'
# 读取模拟的资金流数据
top_stocks_data = pd.read_csv(top_stocks_path)
top_stocks_data['Date'] = pd.to_datetime(top_stocks_data['Date']).dt.strftime('%Y-%m-%d')
stock_prices_data = pd.read_csv(stock_prices_path)
# stock_prices_data['Open Return'] = stock_prices_data.groupby('Stock Code')['Open'].pct_change()

# 假设初始资金和相关参数
initial_capital = 1_000_000  # 初始资金1M
min_trade_units = 100        # 最小购买单位 100股
holding_period_limit = 4     # 最长持仓4周
borrow_cost_rate = 0.001     # 借贷利率 0.1%
weekly_capital_interval = 4  # 每四周重新计算交易资金

# 创建两个策略组合
portfolio_retail = {}
portfolio_institution = {}

# 设置回测的日期和股票代码信息
dates = top_stocks_data['Date'].unique()

# 初始化组合
portfolio_retail[dates[0]] = {
    'cash': initial_capital,
    'short_sell_cash': 0, # 卖空的现金是锁定的，即每周的交易资金为cash的1/4，不带卖空的现金
    'position_units': {},
    'holding_weeks': {},
    'net_value': 0,
}
portfolio_institution[dates[0]] = {
    'cash': initial_capital,
    'short_sell_cash': 0,
    'position_units': {},
    'holding_weeks': {},
    'net_value': 0,
}

# 逐周执行买卖决策
for i, date in enumerate(dates):
    print(f"第{i+1}周：{date}")

    # 获取零售和机构的买卖信号
    retail_signals = top_stocks_data[(top_stocks_data['Date'] == date) & (top_stocks_data['Investor Type'] == 'retail')]
    insti_signals = top_stocks_data[(top_stocks_data['Date'] == date) & (top_stocks_data['Investor Type'] == 'institution')]

    # 处理每个策略的买卖信号
    for strategy, signals, portfolio in zip(
            ['Smart Retail Flow FT5009', 'Smart Insti Flow FT5009'],
            [retail_signals, insti_signals],
            [portfolio_retail, portfolio_institution]):
        
        # signals中的Stock Code在stock_prices_data中可能不存在，需要过滤掉
        signals = signals[signals['Stock Code'].isin(stock_prices_data['Stock Code'])]

        # 获取当前日期的组合
        if i > 0:
            # 复制上周的组合状态
            portfolio[date] = portfolio[dates[i-1]].copy()
        
        current_portfolio = portfolio[date]
        
        current_portfolio['net_value'] = 0  # 重置净值
        # print('Strategy:', strategy)
        # if strategy == 'Smart Retail Flow FT5009':
        #     print('Retail Portfolio:', current_portfolio)
        # print('这一轮刚开始: ',current_portfolio)


        # 先花钱将卖空仓位的股票买回，并计算借贷成本（这一步可能造成short_sell_cash<0，因为强制一周时间就仓位归0，此时股价可能比short sell时更贵，低价卖出高价买入造成亏损，再加上借贷成本就更亏了）
        for stock_code, units in list(current_portfolio['position_units'].items()):
            if units < 0:  # 只有卖空仓位才计算借贷成本
                if i > 0:  # 确保有上一周的数据
                    prev_date = dates[i - 1]
                    prev_open_price = stock_prices_data[(stock_prices_data['Date'] == prev_date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                else:
                    prev_open_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                # print(date,stock_code)
                open_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                avg_price = (prev_open_price + open_price) / 2
                borrow_cost = abs(units) * avg_price * borrow_cost_rate
                current_portfolio['cash'] -= borrow_cost
                current_portfolio['short_sell_cash'] -= abs(units) * open_price
                del current_portfolio['position_units'][stock_code]  # 清零卖空持仓

        # 如果short sell亏损，则将亏损的short sell用现金补上
        if current_portfolio['short_sell_cash'] < 0:
            current_portfolio['cash'] += current_portfolio['short_sell_cash']
            current_portfolio['short_sell_cash'] = 0


        # 每周交易资金为总资金的1/4，每四周重新计算
        if i % weekly_capital_interval == 0:
            weekly_capital = current_portfolio['cash'] / weekly_capital_interval


        # 当现金不足时，卖出仓位最小的持仓，直到有足够的现金（>weekly_capital)，保证每周交易资金充足
        while current_portfolio['cash'] < weekly_capital:
            # 如果没有持仓，直接跳出
            if len(current_portfolio['position_units']) == 0:
                break
            # 卖出持仓最小的股票
            stock_code = min(current_portfolio['position_units'], key=current_portfolio['position_units'].get)
            units = current_portfolio['position_units'][stock_code]
            stock_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
            sell_units = min(min_trade_units, abs(units))
            current_portfolio['cash'] += sell_units * stock_price
            current_portfolio['position_units'][stock_code] -= sell_units
            if current_portfolio['position_units'][stock_code] == 0:
                del current_portfolio['position_units'][stock_code]
                del current_portfolio['holding_weeks'][stock_code]
            else:
                # 不必更新持有时间，因为这只是为了弥补现金不足 
                pass


        # 分别计算买入和卖出的信号强度
        buy_signals = signals[signals['Action'] == 'buy']
        sell_signals = signals[signals['Action'] == 'sell']
        buy_signal_strength = buy_signals['Amount'].abs().sum()
        sell_signal_strength = sell_signals['Amount'].abs().sum()

        # 处理买入信号
        for _, signal in buy_signals.iterrows():
            stock_code = signal['Stock Code']
            amount = abs(signal['Amount'])
            signal_strength = amount / buy_signal_strength  # 买入信号强度

            trade_value = weekly_capital * signal_strength
            # 获取股票价格
            stock_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
            units = int(trade_value / stock_price / min_trade_units) * min_trade_units

            # 扩大持仓并记录持有时间
            if units > 0:
                total_cost = units * stock_price
                current_portfolio['cash'] -= total_cost
                current_portfolio['position_units'][stock_code] = current_portfolio['position_units'].get(stock_code, 0) + units
                current_portfolio['holding_weeks'][stock_code] = 1  # 重置持有时间
            # print('买了',units,'个: ', stock_code, current_portfolio)

        # 处理卖出信号
        for _, signal in sell_signals.iterrows():
            stock_code = signal['Stock Code']
            amount = abs(signal['Amount'])
            signal_strength = amount / sell_signal_strength  # 卖出信号强度

            trade_value = weekly_capital * signal_strength
            # 获取股票价格
            stock_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
            units = int(trade_value / stock_price / min_trade_units) * min_trade_units

            if units > 0:
                # 检查是否有持仓
                held_units = current_portfolio['position_units'].get(stock_code, 0)
                if held_units > 0:
                    # 先卖出持仓部分
                    sell_units = min(units, held_units)
                    current_portfolio['cash'] += sell_units * stock_price
                    current_portfolio['position_units'][stock_code] -= sell_units
                    if current_portfolio['position_units'][stock_code] == 0:
                        del current_portfolio['position_units'][stock_code]
                        del current_portfolio['holding_weeks'][stock_code]
                    else:
                        current_portfolio['holding_weeks'][stock_code] += 1
                   
                    # 剩余部分按short sell处理
                    if sell_units < units:
                        short_units = units - sell_units
                        current_portfolio['short_sell_cash'] += short_units * stock_price  # short sell收益计入现金
                        current_portfolio['position_units'][stock_code] = current_portfolio['position_units'].get(stock_code, 0) - short_units

                else:
                    # 没有持仓时执行short sell
                    current_portfolio['short_sell_cash'] += units * stock_price
                    current_portfolio['position_units'][stock_code] = current_portfolio['position_units'].get(stock_code, 0) - units
            # print('卖了',units,'个: ', stock_code, current_portfolio)

        # 更新持仓时间
        for stock_code in current_portfolio['position_units']:
            if stock_code not in buy_signals['Stock Code'].values and stock_code not in sell_signals['Stock Code'].values:
                current_portfolio['holding_weeks'][stock_code] += 1

        # 清理超过持仓期限的股票
        for stock_code, weeks in list(current_portfolio['holding_weeks'].items()):
            if weeks >= holding_period_limit:
                # 超过持仓期限，强制卖出剩余持仓
                units = current_portfolio['position_units'][stock_code]
                stock_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                current_portfolio['cash'] += units * stock_price  # 卖出持仓

                del current_portfolio['position_units'][stock_code]
                del current_portfolio['holding_weeks'][stock_code]


        # 假设现在把所有的股票都换成现金，那么净值计算如下
        for stock_code, units in list(current_portfolio['position_units'].items()):
            stock_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
            if units < 0:
                if i > 0:  # 确保有上一周的数据
                    prev_date = dates[i - 1]
                    prev_open_price = stock_prices_data[(stock_prices_data['Date'] == prev_date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                else:
                    prev_open_price = stock_prices_data[(stock_prices_data['Date'] == date) & (stock_prices_data['Stock Code'] == stock_code)]['Open'].values[0]
                avg_price = (prev_open_price + stock_price) / 2
                current_portfolio['net_value'] += units * avg_price * borrow_cost_rate

            current_portfolio['net_value'] += units * stock_price

        current_portfolio['net_value'] += (current_portfolio['cash'] + current_portfolio['short_sell_cash'])
        if strategy == 'Smart Retail Flow FT5009':
            print('Retail Portfolio:', current_portfolio)
        else:
            print('Institution Portfolio:', current_portfolio)
        # print('这一轮结束: ',current_portfolio)

print("Retail Portfolio Final Cash:", portfolio_retail[dates[-1]])
print("Institution Portfolio Final Cash:", portfolio_institution[dates[-1]])

第1周：2019-01-07
Retail Portfolio: {'cash': 750457.0916162223, 'short_sell_cash': 246620.87121394955, 'position_units': {'C38U': 12600, '5CP': 55400, 'CC3': 11400, 'H02': 1200, '544': 27300, 'BDX': 449900, 'CJLU': 22300, 'BN4': -5100, 'O39': -6900, 'D05': -3500, 'U11': -1900, 'Z74': -8000, 'C09': -2500, 'V03': -1400, 'Y92': -4825600, 'G13': -15600}, 'holding_weeks': {'C38U': 1, '5CP': 1, 'CC3': 1, 'H02': 1, '544': 1, 'BDX': 1, 'CJLU': 1}, 'net_value': 999753.3791287859}
Institution Portfolio: {'cash': 754238.1820072584, 'short_sell_cash': 249058.59736616715, 'position_units': {'G13': 18400, 'Y92': 7230800, 'Z74': 4300, 'S58': 3100, 'C09': 1900, 'V03': 1900, 'O39': 4700, 'D05': 3800, 'U11': 2500, 'H02': -900, 'CC3': -9500, '5CP': -43500, 'C52': -18100, 'C38U': -25300, 'BDX': -341700, '544': -23800, 'CJLU': -13900}, 'holding_weeks': {'G13': 1, 'Y92': 1, 'Z74': 1, 'S58': 1, 'C09': 1, 'V03': 1, 'O39': 1, 'D05': 1, 'U11': 1}, 'net_value': 999750.9414026339}
第2周：2019-01-14
Retail Portfolio: {'

IndexError: index 0 is out of bounds for axis 0 with size 0