# 黑色金属 - 螺纹钢

In [1]:
# 设置工作目录
import os
os.getcwd()
if os.name == 'posix':
    os.chdir('/Volumes/Repository/Projects/ffa/')
else:
    os.chdir("E:\\Document\\Project\\ffa")

In [2]:
# 加载依赖模块
import pandas as pd
import numpy as np
import akshare as ak
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, date
import importlib
import commodity
import json
from datetime import timedelta
import dataworks as dw
    

In [6]:
# 重新加载salary计算模块
importlib.reload(commodity)
importlib.reload(dw)

<module 'dataworks' from 'E:\\Document\\Project\\ffa\\dataworks.py'>

## 数据准备与预处理

In [3]:
symbol_id = 'RB'
symbol_name = '螺纹钢'
fBasePath = 'steel/data/mid-stream/螺纹钢/'
json_file = './steel/setting.json'

### 数据索引设置

In [4]:
# 品种数据索引初始化
# 首次使用json配置文件存取品种的数据索引
data_index = {'产量': {'Name': "production", 'Source':'Choice', 'Path': fBasePath + '螺纹钢产量.xlsx', 'Field':'产量：钢筋：全国：当月值', 'DataFrame': "df_prodoction"},
              '销量': {'Name': "sales", 'Source':'Choice', 'Path': fBasePath + '螺纹钢销量.xlsx', 'Field':'销量：钢筋：累计值', 'DataFrame': "df_sales"},
              '库存': {'Name': "instock", 'Source':'Choice', 'Path': fBasePath + '螺纹钢库存.xlsx', 'Field':'库存：螺纹钢：合计', 'DataFrame': "df_instock"},
              '仓单': {'Name': "receipt", 'Source':'Choice', 'Path': fBasePath + '螺纹钢库存.xlsx', 'Field':'仓单数量：螺纹钢', 'DataFrame': "df_instock"},                
              }
profit_formula = {'Name': 'profit_formula', 'Factor': {'铁矿石': 1.6, '焦炭': 0.6}, '其他成本': 1200}
dominant_months = [1, 5, 10]
exchange_id = 'shfe'
symbol_setting = {'DataIndex': data_index,
                  'ProfitFormula': profit_formula,
                  'DominantMonths': dominant_months,
                  'ExchangeID': exchange_id}


In [7]:
# 构造品种数据访问对象
dws = dw.DataWorks()
# symbol = commodity.SymbolData(symbol_id, symbol_name, json_file, symbol_setting)
symbol = commodity.SymbolData(symbol_id, symbol_name)
df_symbol_rb = symbol.merge_data(dws)

In [None]:
symbol.get_spot_months()
symbol_j = commodity.SymbolData('J', '焦炭', json_file)
symbol_j.merge_data(dws)
symbol_i = commodity.SymbolData('I', '铁矿石', json_file)
symbol_i.merge_data(dws)

In [10]:
symbol_chain = commodity.SymbolChain('Steel', '黑色金属', json_file)
symbol_chain.add_symbol(symbol)
symbol_chain.add_symbol(symbol_i)
symbol_chain.add_symbol(symbol_j)
df_profit_c = symbol.get_profits(symbol_chain)
# df_profit_c.dropna(axis=0, how='all', subset=['现货利润', '盘面利润'], inplace=True)
symbol.calculate_data_rank(trace_back_months=60)

Unnamed: 0,date,库存历史时间百分位,库存历史时间分位,库存,仓单历史时间百分位,仓单历史时间分位,仓单,现货利润历史时间百分位,现货利润历史时间分位,现货利润,盘面利润历史时间百分位,盘面利润历史时间分位,盘面利润
0,2010-03-29,,,,1.000000,,140971,,,,,,
1,2010-03-30,,,,1.000000,,152633,,,,,,
2,2010-03-31,,,,1.000000,,154121,,,,,,
3,2010-04-01,,,,1.000000,,166995,,,,,,
4,2010-04-02,1.000000,,691.40,1.000000,,178067,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3432,2023-12-27,0.135000,2,366.46,0.357449,2,15588,0.047414,1,-358.5,0.092545,1,-257.6
3433,2023-12-28,0.175000,2,395.29,0.363014,2,15889,0.045690,1,-364.016,0.095116,1,-255.5
3434,2023-12-29,0.175417,2,395.29,0.384418,3,17090,0.038793,1,-380.452,0.106255,1,-244.7
3435,2024-01-02,0.175833,2,395.29,,,,,,,0.095116,1,-256.8


## 基差-库存/仓单-利润分析

### 历史走势分析

前置条件：
- 非季节性品种确认

分析内容：
- 现货价格/期货价格（收盘价）、基差的历史趋势
- 基差率历史趋势，基差率历史分位
- 库存、仓单、库存消费比，库存、仓单的历史分位
- 现货利润和盘面利润，及其历史分位
- 现货月区域标记
- 多维指标共振区域标记

扩展功能：
- 图表可配置化
- 小图：时点跨期套利分析
- 小图：时点期限结构分析

In [None]:
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, 
                    specs=[[{"secondary_y": True}], [{"secondary_y": True}], [{"secondary_y": True}], [{"secondary_y": True}]],
                   vertical_spacing=0.01, 
                   subplot_titles=('基差分析', '基差率', '库存/仓单历史分位', '现货利润/盘面利润'), 
                   row_width=[0.1, 0.1, 0.1, 0.7])

# 创建主图：期货价格、现货价格、基差
fig_future_price = go.Scatter(x=symbol.symbol_data['date'], y=symbol.symbol_data['主力合约收盘价'], name='期货价格', 
                              marker_color='rgb(84,134,240)')
fig_spot_price = go.Scatter(x=symbol.symbol_data['date'], y=symbol.symbol_data['现货价格'], name='现货价格', marker_color='rgb(105,206,159)')
fig_basis = go.Scatter(x=symbol.symbol_data['date'], y=symbol.symbol_data['基差'], stackgroup='one', name='基差', 
                       marker_color='rgb(239,181,59)', showlegend=False)
fig.add_trace(fig_basis, secondary_y=True)
fig.add_trace(fig_future_price, row = 1, col = 1)
fig.add_trace(fig_spot_price, row = 1, col = 1)

# 创建辅图-基差率，并根据基差率正负配色
sign_color_mapping = {0:'green', 1:'red'}
fig_basis_rate = go.Bar(x=symbol.symbol_data['date'], y = symbol.symbol_data['基差率'], name='基差率',
                        marker=dict(color=symbol.basis_color['基差率颜色'], colorscale=list(sign_color_mapping.values()),
                                    showscale=False),
                        showlegend=False,
                        hovertemplate='%{y:.2%}')
fig.add_trace(fig_basis_rate, row = 2, col = 1)

# 创建辅图-库存/仓单
fig_receipt = go.Scatter(x=symbol.symbol_data['date'], y=symbol.symbol_data['仓单'], name='仓单', marker_color='rgb(239,181,59)')
fig_storage = go.Bar(x=symbol.symbol_data['date'], y=symbol.symbol_data['库存'], name='库存', marker_color='rgb(234,69,70)')
fig.add_trace(fig_receipt, row = 3, col = 1, secondary_y=True)
fig.add_trace(fig_storage, row = 3, col = 1)

# 创建辅图-库存/仓单历史时间百分位，并根据分位配色
histroy_color_mapping ={1:'red', 2:'lightblue', 3:'lightblue', 4:'lightblue', 5:'green'}
# df_rank['仓单分位颜色'] = df_rank['仓单历史时间分位'].map(histroy_color_mapping)
# fig_receipt_rank = go.Scatter(x=df_rank['date'], y=df_rank['仓单历史时间百分位'], name='仓单分位', marker_color='rgb(239,181,59)')
fig_receipt_rank = go.Scatter(x=symbol.data_rank['date'], y=symbol.data_rank['仓单历史时间百分位'], name='仓单分位', mode='markers',
                              marker=dict(size=2, color=symbol.data_rank['仓单历史时间分位'], colorscale=list(histroy_color_mapping.values())),
                              showlegend=False,
                              hovertemplate='%{y:.2%}')
# fig.add_trace(fig_receipt_rank, row = 3, col = 1, secondary_y=True)
symbol.data_rank['库存分位颜色'] = symbol.data_rank['库存历史时间分位'].map(histroy_color_mapping)
# fig_storage_rank = go.Bar(x=df_rank['date'], y=df_rank['库存历史时间百分位'], name='库存分位', marker_color='rgb(234,69,70)')
fig_storage_rank = go.Bar(x=symbol.data_rank['date'], y=symbol.data_rank['库存历史时间百分位'], name='库存分位', marker_color=symbol.data_rank['库存分位颜色'],
                          hovertemplate='%{y:.2%}')
# fig.add_trace(fig_storage_rank, row = 3, col = 1)

# 创建辅图-现货利润/盘面利润
# fig_spot_profit = go.Scatter(x=df_profit['date'], y=df_profit['现货利润'], name='现货利润', marker_color='rgb(239,181,59)')
# fig_future_profit = go.Bar(x=df_profit['date'], y=df_profit['盘面利润'], name='盘面利润', marker_color='rgb(234,69,70)')
# fig.add_trace(fig_spot_profit, row = 4, col = 1, secondary_y=True)
# fig.add_trace(fig_future_profit, row = 4, col = 1)

# 创建辅图-现货利润/盘面利润历史时间分位
symbol.data_rank['盘面利润分位颜色'] = symbol.data_rank['盘面利润历史时间分位'].map(histroy_color_mapping)
fig_spot_profit = go.Scatter(x=symbol.data_rank['date'], y=symbol.data_rank['现货利润历史时间百分位'], name='现货利润', mode='markers',
                             marker=dict(size=2, color=symbol.data_rank['现货利润历史时间分位'], colorscale=list(histroy_color_mapping.values())),
                             hovertemplate='%{y:.2%}')
fig.add_trace(fig_spot_profit, row = 4, col = 1, secondary_y=True)
fig_future_profit = go.Bar(x=symbol.data_rank['date'], y=symbol.data_rank['盘面利润历史时间百分位'], name='盘面利润', marker_color=symbol.data_rank['盘面利润分位颜色'],
                           showlegend=False,
                           hovertemplate='%{y:.2%}')
fig.add_trace(fig_future_profit, row = 4, col = 1)



# 根据交易时间过滤空数据
trade_date = ak.tool_trade_date_hist_sina()['trade_date']
trade_date = [d.strftime("%Y-%m-%d") for d in trade_date]
dt_all = pd.date_range(start=symbol.symbol_data['date'].iloc[0],end=symbol.symbol_data['date'].iloc[-1])
dt_all = [d.strftime("%Y-%m-%d") for d in dt_all]
dt_breaks = list(set(dt_all) - set(trade_date))

for _, row in symbol.spot_months.iterrows():
    fig.add_shape(
        # 矩形
        type="rect",
        # 矩形的坐标
        x0=row['Start Date'],
        x1=row['End Date'],
        y0=0,
        y1=1,
        xref='x',
        yref='paper',
        # 矩形的颜色和透明度
        fillcolor="LightBlue",
        opacity=0.1,
        # 矩形的边框
        line_width=0,
        # 矩形在数据之下
        layer="below"
    )

select_synchronize_index_value = ['基差率', '库存|仓单', '现货利润|盘面利润']
# df_signals =symbol.get_signals(select_synchronize_index_value)
symbol.signals = pd.merge(symbol.symbol_data[['date', '基差率']],
                        symbol.data_rank[['date', '库存历史时间分位', '仓单历史时间分位', '现货利润历史时间分位', '盘面利润历史时间分位']],
                        on='date', how='outer')
symbol.signals['基差率'] = symbol.signals['基差率'].map(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
# print(symbol.signals)
# # For other columns
# for col in ['库存历史时间分位', '仓单历史时间分位', '现货利润历史时间分位', '盘面利润历史时间分位']:
#     symbol.signals[col] = symbol.signals[col].map(lambda x: -1 if x == 5 else (0 if x != 1 else 1))
for col in ['库存历史时间分位', '仓单历史时间分位', '现货利润历史时间分位', '盘面利润历史时间分位']:
    symbol.signals[col] = symbol.signals[col].map(lambda x: -1 if x == 5 else (0 if x != 1 else 1)).fillna(0).astype(int)
print(symbol.signals)
symbol.signals['库存|仓单'] = symbol.signals['库存历史时间分位'] | symbol.signals['仓单历史时间分位']
symbol.signals['现货利润|盘面利润'] = symbol.signals['现货利润历史时间分位'] | symbol.signals['盘面利润历史时间分位']
if len(select_synchronize_index_value)!=0:
    symbol.signals['信号数量'] = symbol.signals[select_synchronize_index_value].sum(axis=1)

signal_nums = len(select_synchronize_index_value)
df_short_signals = symbol.signals[symbol.signals['信号数量']==-signal_nums]        
for _, row in df_short_signals.iterrows():
    next_day = row['date'] + timedelta(days=1)
    fig.add_shape(
        type='circle',
        x0=row['date'], x1=next_day,
        y0=1, y1=0.99,
        xref='x', yref='paper',
        fillcolor='green',
        line_color='green'
    )

# X轴坐标按照年-月显示
fig.update_xaxes(
    showgrid=True,
    zeroline=True,
    dtick="M1",  # 按月显示
    ticklabelmode="period",   # instant  period
    tickformat="%b\n%Y",
    rangebreaks=[dict(values=dt_breaks)],
    rangeslider_visible = False, # 下方滑动条缩放
    # 增加固定范围选择
    # rangeselector = dict(
    #     buttons = list([
    #         dict(count = 1, label = '1M', step = 'month', stepmode = 'backward'),
    #         dict(count = 6, label = '6M', step = 'month', stepmode = 'backward'),
    #         dict(count = 1, label = '1Y', step = 'year', stepmode = 'backward'),
    #         dict(count = 1, label = 'YTD', step = 'year', stepmode = 'todate'),
    #         dict(step = 'all')
    #         ]))
)
#fig.update_traces(xbins_size="M1")
max_y = symbol.symbol_data['主力合约收盘价'] .max() * 1.05
min_y = symbol.symbol_data['主力合约收盘价'] .min() * 0.95
fig.update_layout(
    yaxis_range=[min_y,max_y],
    #autosize=False,
    #width=800,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0),
    plot_bgcolor='WhiteSmoke',
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis2_showgrid=False,
    hovermode='x unified',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.show()

In [7]:
self = symbol
df_rank = None
mode='time'
trace_back_months=3
quantiles=[0, 20, 40, 60, 80, 100]
ranks=[1, 2, 3, 4, 5]
data_list=['库存', '仓单', '现货利润', '盘面利润']

field = data_list[0]
df_append= pd.DataFrame()
df_append['date'] = self.symbol_data['date']
if trace_back_months == 'all':
    window_size = len(self.symbol_data)
else:
    window_size = trace_back_months * 20  # assuming 30 days per month
if mode=='time':
    value_field = field + '历史时间百分位'
    rank_field = field + '历史时间分位'
    df_append[value_field] = self.symbol_data[field].rolling(window=window_size, min_periods=1).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1])
    quantiles = np.percentile(df_append[value_field].dropna(), quantiles)
elif mode=='value':
    value_field = field + '历史数值百分位'
    rank_field = field + '历史数值分位'
    df_append[value_field] = self.symbol_data[field].rolling(window=window_size, min_periods=1).apply(lambda x: (x[-1] - np.min(x)) / (np.max(x) - np.min(x)))
    quantiles = list(map(lambda x: x/100, quantiles))
else:
    None

df_append[rank_field] = pd.cut(df_append[value_field].dropna(), bins=quantiles, labels=ranks, include_lowest=True, duplicates='drop', right=False)
if df_rank==None:
    df_rank = df_append
else:
    df_rank = pd.merge(df_rank, df_append, on='date', how='outer')

df_rank = pd.merge(df_rank, symbol.symbol_data[['date', field]], on='date', how='outer')


In [31]:
self = symbol
df_rank = pd.DataFrame()
mode='time'
trace_back_months=1
quantiles=[0, 20, 40, 60, 80, 100]
ranks=[1, 2, 3, 4, 5]
data_list=['库存', '仓单', '现货利润', '盘面利润']
field = data_list[0]

df_append= pd.DataFrame()
df_append['date'] = self.symbol_data['date']
if mode=='time':
    value_field = field + '历史时间百分位'
    rank_field = field + '历史时间分位'
elif mode=='value':
    value_field = field + '历史数值百分位'
    rank_field = field + '历史数值分位'
else:
    None

if trace_back_months == 'all':
    window_size = len(self.symbol_data)
else:
    # Assuming that data is daily, convert months to days
    window_size = trace_back_months * 20

if mode=='time':
    df_append[value_field] = self.symbol_data[field].expanding().apply(lambda x: (x.rank(method='min') / len(x)).iloc[-1])
    quantiles = np.percentile(df_append[value_field].dropna(), quantiles)
elif mode=='value':
    df_append[value_field] = self.symbol_data[field].expanding().apply(lambda x: (x.iloc[-1] - x.min()) / (x.max() - x.min()))
    quantiles = list(map(lambda x: x/100, quantiles))

df_append[rank_field] = pd.cut(df_append[value_field].dropna(), bins=quantiles, labels=ranks, include_lowest=True, duplicates='drop', right=False)
if df_rank.empty:
    df_rank = df_append
else:
    df_rank = pd.merge(df_rank, df_append, on='date', how='outer')

df_rank = pd.merge(df_rank, symbol.symbol_data[['date', field]], on='date', how='outer')

### 历史水位分析

基差率-库存消费比-利润率

历史时间比例分位

In [None]:
# df1 = symbol.history_time_ratio('库存', df_rank=merged_data)
# df2 = symbol.history_time_ratio('库存', df_rank=merged_data, mode='value')


In [None]:
# 将累计销量数据转化为当月销量数据

# 假设您提供的数据保存在一个名为df的dataframe中，字段包含日期和累计销量
# 例如，df的前五行如下：
#          日期   累计销量
# 0 2020-01-31  1000
# 1 2020-02-29  1500
# 2 2020-03-31  1800
# 3 2020-04-30  2200
# 4 2020-05-31  2500

# 定义一个函数，计算当月销量值
def calc_monthly_sales(df):
    # 创建一个空的列表，用于存储当月销量值
    monthly_sales = []
    # 遍历dataframe的每一行，获取日期和累计销量
    for i, row in df.iterrows():
        # 获取日期
        date = row['日期']
        # 获取累计销量
        cum_sales = row['累计销量']
        # 如果是第一行，那么需要判断是否是1月份
        if i == 0:
            # 如果是1月份，那么当月销量值就等于累计销量
            if date.month == 1:
                monthly_sales.append(cum_sales)
            # 如果不是1月份，那么当月销量值就设为NaN
            else:
                monthly_sales.append(np.nan)
        # 如果不是第一行，那么需要判断当前月份与上一行的月份是否相邻
        else:
            # 获取上一行的日期
            prev_date = df.loc[i-1, '日期']
            # 如果当前月份与上一行的月份相邻，那么当月销量值就等于累计销量减去上一行的累计销量
            if date.month == prev_date.month + 1 or (date.month == 1 and prev_date.month == 12):
                monthly_sales.append(cum_sales - df.loc[i-1, '累计销量'])
            # 如果当前月份与上一行的月份不相邻，那么当月销量值就设为NaN
            else:
                monthly_sales.append(np.nan)
    # 返回列表
    return monthly_sales

# 调用函数，得到一个列表，存储当月销量值
monthly_sales = calc_monthly_sales(df)

# 在原始的dataframe中，创建一个新的列，存储当月销量值
df['当月销量'] = monthly_sales

# 打印dataframe的前五行，查看结果
print(df.head())

#          日期   累计销量  当月销量
# 0 2020-01-31  1000   1000.0
# 1 2020-02-29  1500    500.0
# 2 2020-03-31  1800    300.0
# 3 2020-04-30  2200    400.0
# 4 2020-05-31  2500    300.0


## 季节性分析

### 基差率季节分析

In [None]:
df_rb0['年度'] = df_rb0['日期'].dt.year
df_rb0['年内日期'] = df_rb0['日期'].dt.strftime('1900-%m-%d')
fig_basis_rate_season = px.line(df_rb0,
                                x='年内日期',
                                y='基差率',
                                color='年度',
                                #color_discrete_sequence=px.colors.qualitative.G10)
                                color_discrete_sequence=['lightgray', 'lightblue', 'orange', 'red'])
fig_basis_rate_season.update_layout(
    title={
        'text':'基差率季节分析',
        'xanchor':'center'},
    margin=dict(l=10, r=10, t=40, b=10)
)

fig_basis_rate_season.show()

### 基差率月度涨跌统计

### 基差率频率分布

### 库存季节性分析

## 跨期分析

### 期限结构

In [None]:
# 加载合约基础数据
futures_comm_info = pd.read_excel('data/common_info.xlsx')
spec_contact_list = futures_comm_info[futures_comm_info.合约名称.str.startswith('螺纹钢')]
fig_term = make_subplots(specs=[[{"secondary_y": True}]])
fig_term.add_trace(go.Scatter(x=spec_contact_list['合约代码'], y=spec_contact_list['现价']))
# 获取最新现货价格
spot_price = df_rb0[df_rb0['现货']!=0]['现货'].iloc[-1]
fig_term.add_hline(y=spot_price)
fig_term.update_layout(
    title={
        'text':'期限结构'
    },
    #autosize=False,
    width=800,
    #height=800,
    margin=dict(l=10, r=10, t=40, b=10)
)
fig_term.show()

### 套利分析

#### 价差分析-多期排列

#### 价差分析-跨期价差矩阵

#### 基差-月差分析

#### 价差季节性分析

## 库存

### 库存周期

#### 期转现

#### 交割统计

## 利润

### 现货利润

### 期货盘面利润

### 利润期限结构

## 综合分析

### 基差-库存-利润分析

### 基差-月差分析

### 期限结构-库存/仓单分析