In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib as mpl
import matplotlib.pyplot as plt 
from matplotlib.pyplot import MultipleLocator
from sklearn.linear_model import LinearRegression
from statsmodels.formula.api import ols
import statsmodels.api as sm

## 导入数据与数据预处理

In [114]:
share = pd.read_csv(r'C:\Users\17782\Desktop\FIN3080 Projiect2\case1\Data\Share Return.csv')
market = pd.read_csv(r'C:\Users\17782\Desktop\FIN3080 Projiect2\case1\Data\Market Return.csv')
shibor = pd.read_csv(r'C:\Users\17782\Desktop\FIN3080 Projiect2\case1\Data\Shibor_week.csv')

In [3]:
market = market[2:]
market['CloseDate'] = pd.to_datetime(market['CloseDate'])
market['week'] = market['CloseDate'].dt.strftime('%Y') + '-' + market['CloseDate'].dt.strftime('%W')
market = market.dropna()
market

Unnamed: 0,Symbol,CloseDate,ChangeRatio,week
2,906,2017-01-06,0.013119,2017-01
3,906,2017-01-13,-0.014152,2017-02
4,906,2017-01-20,0.003929,2017-03
5,906,2017-01-27,0.011832,2017-04
6,906,2017-02-03,-0.005677,2017-05
...,...,...,...,...
205,906,2020-11-27,0.003188,2020-47
206,906,2020-12-04,0.017262,2020-48
207,906,2020-12-11,-0.035887,2020-49
208,906,2020-12-18,0.020429,2020-50


In [4]:
# 选取周数全勤的股票
share = share.dropna()
share['week'] = share['Trdwnt']
share

Unnamed: 0,Stkcd,Trdwnt,Wretwd,week
0,1,2017-01,0.003297,2017-01
1,1,2017-02,0.003286,2017-02
2,1,2017-03,0.006550,2017-03
3,1,2017-04,0.011931,2017-04
4,1,2017-05,-0.007503,2017-05
...,...,...,...,...
562652,605399,2020-50,-0.077516,2020-50
562653,605399,2020-51,0.014614,2020-51
562654,605399,2020-52,-0.076646,2020-52
562655,605399,2020-53,-0.012256,2020-53


In [5]:
# 表格合并
step1 = pd.merge(share, market, on=['week', 'week'])
step1 = step1[['Stkcd','Wretwd','ChangeRatio','week','CloseDate']]
step1

Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate
0,1,0.003297,0.013119,2017-01,2017-01-06
1,2,0.004380,0.013119,2017-01,2017-01-06
2,4,-0.020935,0.013119,2017-01,2017-01-06
3,5,0.039941,0.013119,2017-01,2017-01-06
4,6,0.059259,0.013119,2017-01,2017-01-06
...,...,...,...,...,...
550261,605366,-0.003091,0.006,2020-51,2020-12-25
550262,605369,0.059955,0.006,2020-51,2020-12-25
550263,605376,0.610268,0.006,2020-51,2020-12-25
550264,605388,0.127891,0.006,2020-51,2020-12-25


# 步骤一 计算个股 β系数

In [6]:
step1['CloseDate'] = pd.to_datetime(step1['CloseDate'])
step1['week_num'] = step1['CloseDate'].dt.week + (step1['CloseDate'].dt.year-2017)*52 # 将时间划为更便于分类的序号
step1

  step1['week_num'] = step1['CloseDate'].dt.week + (step1['CloseDate'].dt.year-2017)*52 # 将时间划为更便于分类的序号


Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate,week_num
0,1,0.003297,0.013119,2017-01,2017-01-06,1
1,2,0.004380,0.013119,2017-01,2017-01-06,1
2,4,-0.020935,0.013119,2017-01,2017-01-06,1
3,5,0.039941,0.013119,2017-01,2017-01-06,1
4,6,0.059259,0.013119,2017-01,2017-01-06,1
...,...,...,...,...,...,...
550261,605366,-0.003091,0.006,2020-51,2020-12-25,208
550262,605369,0.059955,0.006,2020-51,2020-12-25,208
550263,605376,0.610268,0.006,2020-51,2020-12-25,208
550264,605388,0.127891,0.006,2020-51,2020-12-25,208


In [7]:
d = step1.groupby('Stkcd')['week_num'].count()

In [8]:
# 寻找有效的股票编号
n = []
for i in list(d.index):
    if d[i] == 202:
        n.append(i)

In [9]:
step1 = step1.loc[step1.Stkcd.isin(n)]
step1

Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate,week_num
0,1,0.003297,0.013119,2017-01,2017-01-06,1
1,2,0.004380,0.013119,2017-01,2017-01-06,1
3,5,0.039941,0.013119,2017-01,2017-01-06,1
7,9,0.014479,0.013119,2017-01,2017-01-06,1
8,10,0.002601,0.013119,2017-01,2017-01-06,1
...,...,...,...,...,...,...
550209,603989,-0.056980,0.006,2020-51,2020-12-25,208
550210,603990,-0.079150,0.006,2020-51,2020-12-25,208
550213,603993,0.174014,0.006,2020-51,2020-12-25,208
550215,603996,-0.224832,0.006,2020-51,2020-12-25,208


# 第一阶段CAPM模型构建

In [12]:
def capm_model_a(df): # 得到CAPM模型的预期alpha
    Y = df[['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).intercept_)
def capm_model_b(df): # 得到CAPM模型的预期β
    Y = df[['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).coef_)
def capm_model_r(df): # 得到CAPM模型的R-square
    Y = df[['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).score(X, Y))

In [14]:
n = list(range(71))
week_1 = step1.loc[step1.week_num.isin(n)]
week_1

Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate,week_num
0,1,0.003297,0.013119,2017-01,2017-01-06,1
1,2,0.004380,0.013119,2017-01,2017-01-06,1
3,5,0.039941,0.013119,2017-01,2017-01-06,1
7,9,0.014479,0.013119,2017-01,2017-01-06,1
8,10,0.002601,0.013119,2017-01,2017-01-06,1
...,...,...,...,...,...,...
171648,603989,0.011223,0.005647,2018-18,2018-05-04,70
171649,603990,-0.021218,0.005647,2018-18,2018-05-04,70
171651,603993,-0.010554,0.005647,2018-18,2018-05-04,70
171652,603996,-0.018138,0.005647,2018-18,2018-05-04,70


In [67]:
capm_res = week_1.groupby('Stkcd').apply(capm_model_a) # 根据股票分组应用函数
capm_res = capm_res.reset_index()
capm1_alpha = capm_res.rename(columns = {0:'alpha'})
capm1_alpha # 得到CAPM alpha的预期值
capm1_alpha[:10]

Unnamed: 0,Stkcd,alpha
0,1,0.0019955454370564
1,2,0.0038421193775565
2,5,-0.0097150885033218
3,9,-0.0086982907382242
4,10,-0.0112781965219352
5,11,-0.004312548982436
6,12,-0.0053318324655697
7,14,-0.0066588744374982
8,16,0.0029109555370624
9,21,-0.0011665730264948


In [15]:
capm_res = week_1.groupby('Stkcd').apply(capm_model_b) # 根据股票分组应用函数
capm_res = capm_res.reset_index()
capm1_beta = capm_res.rename(columns = {0:'beta'})
capm1_beta # 得到CAPM beta的预期值
capm1_beta[:10]

Unnamed: 0,Stkcd,beta
0,1,1.2186956193874203
1,2,1.513923226007352
2,5,1.0671463226705196
3,9,1.0058369890469994
4,10,1.5897711636755698
5,11,1.406881358819169
6,12,1.097463675499722
7,14,1.5165784021825943
8,16,1.7872333861938676
9,21,1.2721476646825107


In [71]:
capm_res = week_1.groupby('Stkcd').apply(capm_model_r) # 根据股票分组应用函数
capm_res = capm_res.reset_index()
capm1_r = capm_res.rename(columns = {0:'R2'})
capm1_r # 得到CAPM beta的预期值
capm1_r[:10]

Unnamed: 0,Stkcd,R2
0,1,0.256932
1,2,0.288106
2,5,0.337114
3,9,0.229923
4,10,0.230682
5,11,0.299412
6,12,0.311531
7,14,0.193075
8,16,0.435046
9,21,0.250451


In [73]:
capm1_beta.to_csv(r"C:\Users\17782\Desktop\FIN3080 Projiect2\case1\my Data\beta.csv")
capm1_alpha.to_csv(r"C:\Users\17782\Desktop\FIN3080 Projiect2\case1\my Data\alpha.csv")
capm1_r.to_csv(r"C:\Users\17782\Desktop\FIN3080 Projiect2\case1\my Data\r.csv")

In [16]:
def get(id): #得到股票代码为id的所有数据
    n = [id]
    n_data = week_1.loc[week_1.Stkcd.isin(n)]
    
    x0 = np.ones(n_data.shape[0])
    x1= n_data.ChangeRatio  # 根据因变量列名 list，建立因变量数据集
    y =  n_data.Wretwd
    x = np.column_stack((x0,x1)) 

    model = sm.OLS(y, x)  # 建立 OLS 模型
    results = model.fit()
    print(results.summary())

In [145]:
week_1["ChangeRatio"] = pd.to_numeric(week_1["ChangeRatio"],errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week_1["ChangeRatio"] = pd.to_numeric(week_1["ChangeRatio"],errors='coerce')


In [182]:
get(151)

                            OLS Regression Results                            
Dep. Variable:                 Wretwd   R-squared:                       0.029
Model:                            OLS   Adj. R-squared:                  0.014
Method:                 Least Squares   F-statistic:                     1.996
Date:                Tue, 19 Apr 2022   Prob (F-statistic):              0.162
Time:                        17:46:48   Log-Likelihood:                 76.681
No. Observations:                  69   AIC:                            -149.4
Df Residuals:                      67   BIC:                            -144.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0016      0.010     -0.160      0.8

# 步骤二 构造股票组合

In [10]:
n = list(range(71,141))
week_2 = step1.loc[step1.week_num.isin(n)]
week_2
perc = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] # 生成百分位数，以此分组
week_2['allo'] = 1 # 生成新列供存储数据
week_2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week_2['allo'] = 1 # 生成新列供存储数据


Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
171656,1,0.030899,0.022816,2018-19,2018-05-11,71,1
171657,2,0.038276,0.022816,2018-19,2018-05-11,71,1
171659,5,0.032967,0.022816,2018-19,2018-05-11,71,1
171663,9,0.042662,0.022816,2018-19,2018-05-11,71,1
171664,10,0.059490,0.022816,2018-19,2018-05-11,71,1
...,...,...,...,...,...,...,...
357832,603989,-0.037290,0.04287,2019-35,2019-09-06,140,1
357833,603990,0.007273,0.04287,2019-35,2019-09-06,140,1
357835,603993,0.002793,0.04287,2019-35,2019-09-06,140,1
357836,603996,0.020000,0.04287,2019-35,2019-09-06,140,1


In [17]:
step2 = pd.merge(capm1_beta,week_2,on=['Stkcd','Stkcd'])
step2

Unnamed: 0,Stkcd,beta,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
0,1,1.2186956193874203,0.030899,0.022816,2018-19,2018-05-11,71,1
1,1,1.2186956193874203,-0.004541,0.005925,2018-20,2018-05-18,72,1
2,1,1.2186956193874203,-0.033759,-0.01976,2018-21,2018-05-25,73,1
3,1,1.2186956193874203,-0.037771,-0.019196,2018-22,2018-06-01,74,1
4,1,1.2186956193874203,-0.006869,0.001063,2018-23,2018-06-08,75,1
...,...,...,...,...,...,...,...,...
93527,603999,1.2571870553601916,-0.051095,-0.033479,2019-31,2019-08-09,136,1
93528,603999,1.2571870553601916,-0.071154,0.022443,2019-32,2019-08-16,137,1
93529,603999,1.2571870553601916,0.132505,0.030955,2019-33,2019-08-23,138,1
93530,603999,1.2571870553601916,0.171847,-0.004337,2019-34,2019-08-30,139,1


In [102]:
def allocation(step2): # 构造函数，使得每个股票按照第一期β进行区间排名（排名分1-10，1代表β最小的股票）
    quan = step2['beta'].describe(percentiles = perc)
    quan = quan.iloc[4:13]
    step2.loc[step2.beta <= quan[0],'allo'] = 1
    step2.loc[(step2.beta > quan[0])&(step2.beta <= quan[1]),'allo'] = 2
    step2.loc[(step2.beta > quan[1])&(step2.beta <= quan[2]),'allo'] = 3
    step2.loc[(step2.beta > quan[2])&(step2.beta <= quan[3]),'allo'] = 4
    step2.loc[(step2.beta > quan[3])&(step2.beta <= quan[4]),'allo'] = 5
    step2.loc[(step2.beta > quan[4])&(step2.beta <= quan[5]),'allo'] = 6
    step2.loc[(step2.beta > quan[5])&(step2.beta <= quan[6]),'allo'] = 7
    step2.loc[(step2.beta > quan[6])&(step2.beta <= quan[7]),'allo'] = 8
    step2.loc[(step2.beta > quan[7])&(step2.beta <= quan[8]),'allo'] = 9
    step2.loc[step2.beta > quan[8],'allo'] = 10
    return step2

In [103]:
step2['beta'] = step2['beta'].astype('float64')
step2 = step2.groupby(['week']).apply(allocation) # 进行排名
step2

Unnamed: 0,Stkcd,beta,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
0,1,1.218696,0.030899,0.022816,2018-19,2018-05-11,71,7
1,1,1.218696,-0.004541,0.005925,2018-20,2018-05-18,72,7
2,1,1.218696,-0.033759,-0.01976,2018-21,2018-05-25,73,7
3,1,1.218696,-0.037771,-0.019196,2018-22,2018-06-01,74,7
4,1,1.218696,-0.006869,0.001063,2018-23,2018-06-08,75,7
...,...,...,...,...,...,...,...,...
93527,603999,1.257187,-0.051095,-0.033479,2019-31,2019-08-09,136,8
93528,603999,1.257187,-0.071154,0.022443,2019-32,2019-08-16,137,8
93529,603999,1.257187,0.132505,0.030955,2019-33,2019-08-23,138,8
93530,603999,1.257187,0.171847,-0.004337,2019-34,2019-08-30,139,8


# 投资组合CAPM分析

In [None]:
def capm_model2_a(df): # 得到CAPM模型的预期alpha
    Y = df[['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).intercept_)
def capm_model2_b(df): # 得到CAPM模型的预期β
    Y = [['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).coef_)
def capm_model_r(df): # 得到CAPM模型的R-square
    Y = df[['Wretwd']]
    X = df[['ChangeRatio']]
    return np.squeeze(LinearRegression().fit(X,Y).score(X, Y))

## 无风险利率计算

In [115]:
shibor

Unnamed: 0,SgnDate,Term,Shibor
0,2017/1/3,7天,2.589
1,2017/1/4,7天,2.545
2,2017/1/5,7天,2.474
3,2017/1/6,7天,2.441
4,2017/1/9,7天,2.410
...,...,...,...
983,2020/12/25,7天,2.151
984,2020/12/28,7天,2.133
985,2020/12/29,7天,2.264
986,2020/12/30,7天,2.308


In [117]:
shibor['SgnDate'] = pd.to_datetime(shibor['SgnDate'])
shibor['week'] = shibor['SgnDate'].dt.strftime('%Y') + '-' + shibor['SgnDate'].dt.strftime('%W')
shibor = shibor.groupby(['week']).Shibor.median().reset_index()

In [119]:
shibor['Shibor'] = shibor['Shibor']/(100*52)

In [121]:
step2 = pd.merge(step2, shibor, on=['week', 'week'])

In [127]:
step2['Shibor'] = step2['Shibor'].astype('float64')
step2['ChangeRatio'] = step2['ChangeRatio'].astype('float64')
step2['x'] = step2['ChangeRatio'] - step2['Shibor']
step2['y'] = step2['Wretwd'] - step2['Shibor']

In [140]:
week_2 = step2[['x','y','allo','week']]
week_2 = week_2.groupby(['allo','week','x'])['y'].mean()

In [153]:
week_2 = week_2.reset_index()

In [166]:
def capm_model2_a(df): # 得到CAPM模型的预期alpha
    Y = np.array(df['y']).reshape(-1,1)
    X = np.array(df['x']).reshape(-1,1)
    return np.squeeze(LinearRegression().fit(X,Y).intercept_)
def capm_model2_b(df): # 得到CAPM模型的预期β
    Y = df['y']
    X = np.array(df['x']).reshape(-1,1)
    return np.squeeze(LinearRegression().fit(X,Y).coef_)
def capm_model2_r(df): # 得到CAPM模型的R-square
    Y = df['y']
    X = np.array(df['x']).reshape(-1,1)
    return np.squeeze(LinearRegression().fit(X,Y).score(X, Y))
def get(id): #得到各组数的所有数据
    n = [id]
    n_data = week_2.loc[week_2.allo.isin(n)]
    
    x0 = np.ones(n_data.shape[0])
    x1= n_data.x  # 根据因变量列名 list，建立因变量数据集
    y =  n_data.y
    x = np.column_stack((x0,x1)) 

    model = sm.OLS(y, x)  # 建立 OLS 模型
    results = model.fit()
    print(results.summary())

In [165]:
capm_res = week_2.groupby('allo').apply(capm_model2_a) # 根据股票分组应用函数
capm_res = capm_res.reset_index()
capm2_alpha = capm_res.rename(columns = {0:'alpha'})
capm2_alpha # 得到CAPM alpha的预期值

Unnamed: 0,allo,alpha
0,1,-0.0004614675246346
1,2,-0.0004637270617894
2,3,0.0004030906331998
3,4,-0.0001744691088933
4,5,7.635013894244906e-05
5,6,0.0008961825418858
6,7,0.0008853464769616
7,8,0.0004141835399028
8,9,0.0003010041650499
9,10,-0.0007924712790547


In [168]:
capm_res = week_2.groupby('allo').apply(capm_model2_b) # 根据股票分组应用函数
capm_res = capm_res.reset_index()
capm2_beta = capm_res.rename(columns = {0:'beta'})
capm2_beta # 得到CAPM alpha的预期值

Unnamed: 0,allo,beta
0,1,0.5522115004344013
1,2,0.5994490117575214
2,3,0.5856721439106721
3,4,0.6023535476517677
4,5,0.6728031071244879
5,6,0.6557582022330689
6,7,0.6690388742295466
7,8,0.6931233846498486
8,9,0.6615576290079844
9,10,0.745062289915566


In [178]:
get(10)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.301
Model:                            OLS   Adj. R-squared:                  0.290
Method:                 Least Squares   F-statistic:                     27.59
Date:                Tue, 19 Apr 2022   Prob (F-statistic):           1.83e-06
Time:                        20:24:51   Log-Likelihood:                 127.89
No. Observations:                  66   AIC:                            -251.8
Df Residuals:                      64   BIC:                            -247.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0008      0.004     -0.182      0.8

# 步骤三 CAPM 的横截面回归

In [180]:
n = list(range(141,209))
week_3 = step1.loc[step1.week_num.isin(n)]
week_3
perc = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] # 生成百分位数，以此分组
week_3['allo'] = 1 # 生成新列供存储数据
week_3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week_3['allo'] = 1 # 生成新列供存储数据


Unnamed: 0,Stkcd,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
357840,1,0.045904,0.008659,2019-36,2019-09-13,141,1
357841,2,0.042248,0.008659,2019-36,2019-09-13,141,1
357843,5,0.148387,0.008659,2019-36,2019-09-13,141,1
357847,9,0.063043,0.008659,2019-36,2019-09-13,141,1
357848,10,0.014837,0.008659,2019-36,2019-09-13,141,1
...,...,...,...,...,...,...,...
550209,603989,-0.056980,0.006,2020-51,2020-12-25,208,1
550210,603990,-0.079150,0.006,2020-51,2020-12-25,208,1
550213,603993,0.174014,0.006,2020-51,2020-12-25,208,1
550215,603996,-0.224832,0.006,2020-51,2020-12-25,208,1


In [181]:
step3 = pd.merge(capm1_beta,week_3,on=['Stkcd','Stkcd'])
step3

Unnamed: 0,Stkcd,beta,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
0,1,1.2186956193874203,0.045904,0.008659,2019-36,2019-09-13,141,1
1,1,1.2186956193874203,-0.008778,-0.008774,2019-37,2019-09-20,142,1
2,1,1.2186956193874203,0.044959,-0.025182,2019-38,2019-09-27,143,1
3,1,1.2186956193874203,0.036506,-0.010612,2019-39,2019-10-04,144,1
4,1,1.2186956193874203,-0.019497,0.024943,2019-40,2019-10-11,145,1
...,...,...,...,...,...,...,...,...
92131,603999,1.2571870553601916,0.037866,0.003188,2020-47,2020-11-27,204,1
92132,603999,1.2571870553601916,-0.009950,0.017262,2020-48,2020-12-04,205,1
92133,603999,1.2571870553601916,0.031826,-0.035887,2020-49,2020-12-11,206,1
92134,603999,1.2571870553601916,-0.058442,0.020429,2020-50,2020-12-18,207,1


In [182]:
step3['beta'] = step3['beta'].astype('float64')
step3 = step3.groupby(['week']).apply(allocation) # 进行排名
step3

Unnamed: 0,Stkcd,beta,Wretwd,ChangeRatio,week,CloseDate,week_num,allo
0,1,1.218696,0.045904,0.008659,2019-36,2019-09-13,141,7
1,1,1.218696,-0.008778,-0.008774,2019-37,2019-09-20,142,7
2,1,1.218696,0.044959,-0.025182,2019-38,2019-09-27,143,7
3,1,1.218696,0.036506,-0.010612,2019-39,2019-10-04,144,7
4,1,1.218696,-0.019497,0.024943,2019-40,2019-10-11,145,7
...,...,...,...,...,...,...,...,...
92131,603999,1.257187,0.037866,0.003188,2020-47,2020-11-27,204,8
92132,603999,1.257187,-0.009950,0.017262,2020-48,2020-12-04,205,8
92133,603999,1.257187,0.031826,-0.035887,2020-49,2020-12-11,206,8
92134,603999,1.257187,-0.058442,0.020429,2020-50,2020-12-18,207,8


In [184]:
step3 = pd.merge(step3, shibor, on=['week', 'week'])

In [186]:
step3['Shibor'] = step3['Shibor'].astype('float64')
step3['ChangeRatio'] = step3['ChangeRatio'].astype('float64')
step3['x'] = step3['ChangeRatio'] - step2['Shibor']
step3['y'] = step3['Wretwd'] - step2['Shibor']

In [187]:
step3

Unnamed: 0,Stkcd,beta,Wretwd,ChangeRatio,week,CloseDate,week_num,allo,Shibor,x,y
0,1,1.218696,0.045904,0.008659,2019-36,2019-09-13,141,7,0.000509,0.008133,0.045378
1,2,1.513923,0.042248,0.008659,2019-36,2019-09-13,141,9,0.000509,0.008133,0.041722
2,5,1.067146,0.148387,0.008659,2019-36,2019-09-13,141,6,0.000509,0.008133,0.147861
3,9,1.005837,0.063043,0.008659,2019-36,2019-09-13,141,5,0.000509,0.008133,0.062517
4,10,1.589771,0.014837,0.008659,2019-36,2019-09-13,141,10,0.000509,0.008133,0.014311
...,...,...,...,...,...,...,...,...,...,...,...
90735,603989,0.976547,-0.056980,0.006000,2020-51,2020-12-25,208,5,0.000396,0.005487,-0.057494
90736,603990,0.717598,-0.079150,0.006000,2020-51,2020-12-25,208,2,0.000396,0.005487,-0.079663
90737,603993,1.704319,0.174014,0.006000,2020-51,2020-12-25,208,10,0.000396,0.005487,0.173501
90738,603996,0.593686,-0.224832,0.006000,2020-51,2020-12-25,208,1,0.000396,0.005487,-0.225346


In [197]:
week_3 = step3[['x','y','allo','week']]
week_3 = week_3.groupby(['allo','week','x'])['y'].mean()

In [198]:
week_3 = week_3.reset_index()

In [199]:
week_3

Unnamed: 0,allo,week,x,y
0,1,2019-36,0.008133,0.036397
1,1,2019-37,-0.009307,0.021077
2,1,2019-38,-0.025712,-0.006413
3,1,2019-39,-0.011164,-0.036826
4,1,2019-40,0.024406,-0.008723
...,...,...,...,...
645,10,2020-47,0.002677,0.026484
646,10,2020-48,0.016766,-0.013290
647,10,2020-49,-0.036402,0.014090
648,10,2020-50,0.019919,-0.038724


In [193]:
capm2_beta

Unnamed: 0,allo,beta
0,1,0.5522115004344013
1,2,0.5994490117575214
2,3,0.5856721439106721
3,4,0.6023535476517677
4,5,0.6728031071244879
5,6,0.6557582022330689
6,7,0.6690388742295466
7,8,0.6931233846498486
8,9,0.6615576290079844
9,10,0.745062289915566


In [200]:
week_3 = week_3.groupby(['allo'])['y'].mean()

In [203]:
week_3 = week_3.reset_index()
week_3

Unnamed: 0,allo,y
0,1,0.002543
1,2,0.002558
2,3,0.003532
3,4,0.003952
4,5,0.004701
5,6,0.003442
6,7,0.004236
7,8,0.004491
8,9,0.003117
9,10,0.004204


In [204]:
table = pd.merge(week_3,capm2_beta,on=['allo','allo'])

In [205]:
table

Unnamed: 0,allo,y,beta
0,1,0.002543,0.5522115004344013
1,2,0.002558,0.5994490117575214
2,3,0.003532,0.5856721439106721
3,4,0.003952,0.6023535476517677
4,5,0.004701,0.6728031071244879
5,6,0.003442,0.6557582022330689
6,7,0.004236,0.6690388742295466
7,8,0.004491,0.6931233846498486
8,9,0.003117,0.6615576290079844
9,10,0.004204,0.745062289915566


In [208]:
table['beta'] = table['beta'].astype('float64')

## 回归分析

In [209]:
n_data = table

x0 = np.ones(n_data.shape[0])
x1= n_data.beta  # 根据因变量列名 list，建立因变量数据集
y =  n_data.y
x = np.column_stack((x0,x1)) 

model = sm.OLS(y, x)  # 建立 OLS 模型
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.474
Model:                            OLS   Adj. R-squared:                  0.409
Method:                 Least Squares   F-statistic:                     7.218
Date:                Tue, 19 Apr 2022   Prob (F-statistic):             0.0276
Time:                        20:59:41   Log-Likelihood:                 61.272
No. Observations:                  10   AIC:                            -118.5
Df Residuals:                       8   BIC:                            -117.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0022      0.002     -1.005      0.3



In [210]:
table.to_csv(r"C:\Users\17782\Desktop\FIN3080 Projiect2\case1\my Data\draw.csv")