In [7]:
import pandas as pd

# 读取数据
financial_ratio = pd.read_csv('/Users/shawn/Github/M1/金融機構與風險管理/S_P500 財務比率.csv')
rating = pd.read_csv('/Users/shawn/Github/M1/金融機構與風險管理/rating.csv')

financial_ratio['datadate'] = pd.to_datetime(financial_ratio['datadate'], format='%Y/%m/%d')
financial_ratio['datadate'] = financial_ratio['datadate'].dt.strftime('%Y-%m-%d')


rating['datadate'] = pd.to_datetime(rating['datadate'], format='%Y-%m-%d')
rating['datadate'] = rating['datadate'].dt.strftime('%Y-%m-%d')

In [8]:
merged_df = pd.merge(financial_ratio, rating[['datadate', 'ticker', 'splticrm']], on=['datadate', 'ticker'], how='left')
merged_df['splticrm']


print(merged_df[merged_df['ticker'] == 'AMD'][['datadate','ticker','splticrm']].head()) 
print(rating[rating['ticker'] == 'AMD'][['datadate','ticker','splticrm']].head()) 

         datadate ticker splticrm
31998  2010-01-31    AMD       B-
31999  2010-02-28    AMD       B-
32000  2010-03-31    AMD       B-
32001  2010-04-30    AMD       B-
32002  2010-05-31    AMD       B-
       datadate ticker splticrm
388  2001-01-31    AMD        B
389  2001-02-28    AMD        B
390  2001-03-31    AMD        B
391  2001-04-30    AMD        B
392  2001-05-31    AMD        B


In [9]:
# financial_ratio, rating期間範圍無對齊
merged_df.dropna(subset=['splticrm'], inplace=True)
def filter_rows(group):
    # 組內rating前後不同的才會被留下
    mask = group['splticrm'] != group['splticrm'].shift()
    return group[mask]
merged_df = merged_df.groupby('ticker').apply(filter_rows).reset_index(drop=True)
merged_df['ticker'].value_counts().to_frame() 


Unnamed: 0_level_0,count
ticker,Unnamed: 1_level_1
CHK,12
AKS,7
ETFC,7
M,7
DO,7
...,...
JNJ,1
KEY,1
KMB,1
KR,1


In [4]:
# merged_df保留了所有ticker
merged_df2 = merged_df.groupby('ticker').filter(lambda x: len(x) > 1)
print(merged_df2['ticker'].value_counts().to_frame())
print(len(merged_df2))
merged_df2.to_csv('SP500_merge.csv')

        count
ticker       
CHK        12
ETFC        7
DO          7
AKS         7
M           7
...       ...
OKE         2
D           2
ODP         2
DD          2
PEG         2

[225 rows x 1 columns]
668


In [78]:
print(merged_df2['splticrm'].value_counts().to_frame())

          count
splticrm       
BBB         109
BBB+        106
A-           81
BBB-         76
A            58
BB+          44
BB-          36
A+           36
BB           35
AA-          23
B            18
B+           15
AA            8
B-            7
CCC+          6
AA+           3
AAA           2
CCC           2
CC            2
D             1


In [79]:
def process_data(group):
    # 評等排序
    rating_order = ['D', 'CC', 'CCC', 'CCC+', 
                    'B-', 'B', 'B+', 'BB-', 'BB','BB+', 'BBB-', 'BBB', 'BBB+', 
                    'A-', 'A', 'A+', 'AA-', 'AA', 'AA+', 'AAA']
    def is_upgraded(row, prev_row):
        if prev_row is not None and row['splticrm'] in rating_order and prev_row['splticrm'] in rating_order:
            return 1 if rating_order.index(row['splticrm']) > rating_order.index(prev_row['splticrm']) else 0
        return 0
    # 保留前一期的評價
    group['prev_rating'] = group['splticrm'].shift(1)
    # 升級與否
    group['is_upgraded'] = group.apply(lambda row: is_upgraded(row, {'splticrm': row['prev_rating']}), axis=1)
    # 變化率
    for col in group.columns:
        if pd.api.types.is_numeric_dtype(group[col]):
            group[col + '_change'] = group[col].pct_change()

    # 刪除前一期評價(prev_rating)
    # group = group[group['is_upgraded'] == 1].drop(columns=['prev_rating'])
    group = group.dropna(subset=['prev_rating'])

    return group

In [80]:
merged_df3 = merged_df2.groupby('ticker').apply(process_data).reset_index(drop=True)
merged_df3['prev_rating'].head()

0      AA
1      A+
2    BBB+
3    BBB+
4      A-
Name: prev_rating, dtype: object

In [81]:
merged_df2[['ticker','splticrm']]

Unnamed: 0,ticker,splticrm
1,ABT,AA
2,ABT,A+
3,ABT,BBB
4,ADBE,BBB+
5,ADBE,A-
...,...,...
753,XRAY,BBB+
754,XRX,BBB
755,XRX,BBB-
756,XRX,BBB


In [82]:
merged_df3[['ticker','is_upgraded']]

Unnamed: 0,ticker,is_upgraded
0,ABT,0
1,ABT,0
2,ADBE,1
3,ADI,1
4,ADI,0
...,...,...
438,XRAY,1
439,XRAY,0
440,XRX,0
441,XRX,1


In [83]:
merged_df3 = merged_df3.drop(columns=['gvkey','permno','adate','qdate'])
merged_df3.to_csv('/Users/shawn/Github/M1/金融機構與風險管理/SP500_change.csv')

In [6]:
ratings_map = {
    'D': 0, 'CC': 1, 'CCC': 2, 'CCC+': 3,
    'B-': 4, 'B': 5, 'B+': 6, 'BB-': 7, 'BB': 8, 'BB+': 9,
    'BBB-': 10, 'BBB': 11, 'BBB+': 12,
    'A-': 13, 'A': 14, 'A+': 15, 'AA-': 16, 'AA': 17, 'AA+': 18, 'AAA': 19
}

# 应用映射到 merged_df2['splticrm']
merged_df2['splticrm'] = merged_df2['splticrm'].map(ratings_map)
print(merged_df2['splticrm'])


1     NaN
2     NaN
3     NaN
4     NaN
5     NaN
       ..
753   NaN
754   NaN
755   NaN
756   NaN
757   NaN
Name: splticrm, Length: 668, dtype: float64
