In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: shawn
"""
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms

## Generate 10 market cap-sorted portfolios

In [2]:
df = pd.read_csv('/Users/shawn/Github/M1/金融計量/TWstocks.csv')
df.head()

Unnamed: 0,證券代碼,年月,收盤價(元)_月,報酬率％_月,流通在外股數(千股),市值(百萬元),市值比重％,開盤價(元)_月,最高價(元)_月,最低價(元)_月,...,本益比-TEJ,股價淨值比-TSE,股價淨值比-TEJ,股價營收比-TEJ,股利殖利率-TSE,現金股利率,股價漲跌(元),高低價差%,市場別,該月結束日
0,1101 台泥,201001,10.92,-12.2058,3292175,98271,0.516,12.43,13.31,10.68,...,16.14,1.28,1.28,4.57,4.42,4.42,-4.15,21.1765,TSE,20100129
1,1102 亞泥,201001,12.76,-12.4276,2985735,90468,0.475,14.57,15.2,12.57,...,12.67,1.26,1.26,8.56,6.93,5.77,-4.3,18.0636,TSE,20100129
2,1103 嘉泥,201001,9.23,-12.7072,717877,11342,0.06,10.63,11.04,9.06,...,56.97,0.68,0.68,4.5,0.0,0.0,-2.3,18.7845,TSE,20100129
3,1104 環泥,201001,7.54,-7.1428,603891,9813,0.052,8.12,9.1,7.31,...,65.7,0.81,0.81,3.16,0.0,0.0,-1.25,22.0,TSE,20100129
4,1108 幸福,201001,5.03,-11.5084,404738,3206,0.017,5.68,5.94,4.96,...,,0.75,0.75,1.09,0.0,0.0,-1.03,17.3184,TSE,20100129


In [3]:
df['market_cap_sorted'] = df.groupby('年月')['市值(百萬元)'].transform(lambda x: pd.qcut(x, q=10, labels=['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10']))

In [4]:
df.drop(df[df['market_cap_sorted'] == ''].index, inplace=True)
df.head()


Unnamed: 0,證券代碼,年月,收盤價(元)_月,報酬率％_月,流通在外股數(千股),市值(百萬元),市值比重％,開盤價(元)_月,最高價(元)_月,最低價(元)_月,...,股價淨值比-TSE,股價淨值比-TEJ,股價營收比-TEJ,股利殖利率-TSE,現金股利率,股價漲跌(元),高低價差%,市場別,該月結束日,market_cap_sorted
0,1101 台泥,201001,10.92,-12.2058,3292175,98271,0.516,12.43,13.31,10.68,...,1.28,1.28,4.57,4.42,4.42,-4.15,21.1765,TSE,20100129,q10
1,1102 亞泥,201001,12.76,-12.4276,2985735,90468,0.475,14.57,15.2,12.57,...,1.26,1.26,8.56,6.93,5.77,-4.3,18.0636,TSE,20100129,q10
2,1103 嘉泥,201001,9.23,-12.7072,717877,11342,0.06,10.63,11.04,9.06,...,0.68,0.68,4.5,0.0,0.0,-2.3,18.7845,TSE,20100129,q7
3,1104 環泥,201001,7.54,-7.1428,603891,9813,0.052,8.12,9.1,7.31,...,0.81,0.81,3.16,0.0,0.0,-1.25,22.0,TSE,20100129,q7
4,1108 幸福,201001,5.03,-11.5084,404738,3206,0.017,5.68,5.94,4.96,...,0.75,0.75,1.09,0.0,0.0,-1.03,17.3184,TSE,20100129,q4


In [5]:
weighted_returns = df.groupby(['年月', 'market_cap_sorted']).apply(lambda x: (x['報酬率％_月'] * x['市值(百萬元)']).sum() / x['市值(百萬元)'].sum()).reset_index()
weighted_returns.columns = ['date', 'portf', 'ExRet']

In [6]:
weighted_returns

Unnamed: 0,date,portf,ExRet
0,201001,q1,-2.264849
1,201001,q2,-6.437285
2,201001,q3,-8.296804
3,201001,q4,-4.997506
4,201001,q5,-7.932275
...,...,...,...
1675,202312,q6,0.467620
1676,202312,q7,1.097020
1677,202312,q8,0.068645
1678,202312,q9,1.576679


In [7]:
result_df = weighted_returns.pivot(index='date', columns='portf', values='ExRet')
result_df

portf,q1,q2,q3,q4,q5,q6,q7,q8,q9,q10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
201001,-2.264849,-6.437285,-8.296804,-4.997506,-7.932275,-9.722709,-6.610153,-8.120098,-6.408804,-5.456440
201002,-1.701972,-2.860702,-0.483736,-0.917506,-2.058160,-0.909504,-1.424251,-3.569723,-4.069000,-2.187984
201003,11.642530,12.690145,12.643333,12.949478,11.109058,11.685387,11.677471,9.519990,8.712207,5.221342
201004,5.401096,3.947057,2.328018,3.088675,1.511558,0.254254,1.422132,1.428737,1.802785,1.627138
201005,-9.384219,-6.514104,-9.925066,-8.175461,-8.083272,-9.895033,-7.950852,-8.721672,-7.460651,-6.770519
...,...,...,...,...,...,...,...,...,...,...
202308,-2.523296,-0.068937,-2.340686,-3.901179,-1.590143,-3.095781,-1.839347,0.550902,-1.679048,-2.299595
202309,-0.184798,0.274205,0.675675,0.377878,1.011442,0.630239,-0.521626,1.409000,0.953025,-1.698787
202310,-2.419347,-2.726660,-0.182091,-1.460137,-2.252726,-3.583913,-3.039549,-3.320215,-3.569424,-1.266629
202311,5.548351,7.752075,7.361149,9.877273,11.103797,8.556867,9.392672,10.369305,10.607960,9.084072


In [8]:
result_df.to_csv('/Users/shawn/Github/M1/金融計量/10_market_cap.csv')