In [9]:
import pandas as pd

In [10]:
bloomberg_not_include_score_processed = pd.read_csv('../CSR_score/bloomber_tickers_esg_disclosure_score_1009.csv')
refinitiv_ESG_score = pd.read_csv('../CSR_score/refinitiv_ESG_score_100.csv')

In [None]:
def compare_and_export(df1, df2, common_years, output_path=None):
    # 過濾只包含 ticker 與指定年份
    df1_filtered = df1[['ticker'] + common_years]
    df2_filtered = df2[['ticker'] + common_years]

    # 進行 merge（確保 'ticker' 是共同鍵）
    merged = df1_filtered.merge(df2_filtered, on='ticker', suffixes=('_df1', '_df2'))

    # 計算年份差異
    for year in common_years:
        col_df1 = f"{year}_df1"
        col_df2 = f"{year}_df2"

        merged[year] = merged.apply(lambda row: 0 if row[col_df1] in {0, -1} or row[col_df2] in {0, -1}
                                    else row[col_df1] - row[col_df2], axis=1)

    # 只保留 ticker 和計算結果
    merged = merged[['ticker'] + common_years]

    # 如果提供了輸出路徑，則存為 CSV
    if output_path:
        merged.to_csv(output_path, index=False)
        print(f"結果已儲存至 {output_path}")

    return merged

In [13]:
# 兩個 DataFrame（例如 Bloomberg & Refinitiv 的 ESG 資料）
common_years = [str(year) for year in range(2005, 2025)]

result = compare_and_export(
    bloomberg_not_include_score_processed,  # 第一個 DataFrame
    refinitiv_ESG_score,                    # 第二個 DataFrame
    common_years,
    output_path='../CSR_score/common_score_ESG_1009.csv'  # 可選
)

# 如果不想存成 CSV，而是直接取得 DataFrame
print(result.head())

結果已儲存至 ../CSR_score/common_score_ESG_1009.csv
  ticker       2005      2006       2007      2008       2009       2010  \
0    AAL -21.859142 -3.090015 -32.270374 -42.85253 -38.978609 -48.721519   
1   ACAD   0.000000  0.000000   0.000000   0.00000   0.000000   0.000000   
2    ACI   0.000000  0.000000   0.000000   0.00000   0.000000   0.000000   
3    AGI   0.000000  0.000000   0.000000   0.00000   0.000000  17.508158   
4    AJX   0.000000  0.000000   0.000000   0.00000   0.000000   0.000000   

        2011       2012       2013  ...       2015       2016       2017  \
0 -51.990725 -55.278625 -51.441634  ... -32.727042 -29.976215 -19.834347   
1   0.000000   0.000000   0.000000  ...   9.796640  13.586094   3.548823   
2   0.000000   0.000000   0.000000  ...   0.000000   0.000000   0.000000   
3  18.573775  14.407252   8.854204  ...  17.503876   9.511703  14.953062   
4   0.000000   0.000000   0.000000  ...   0.000000  26.908100  27.196394   

        2018       2019       2020      

In [19]:
common_score_ESG_0319 = pd.read_csv('../CSR_score/common_score_ESG_0319.csv')
common_score_ESG_1009 = pd.read_csv('../CSR_score/common_score_ESG_1009.csv')

In [None]:
common_score_ESG_all = pd.concat([common_score_ESG_0319, common_score_ESG_1009], ignore_index=True)
common_score_ESG_all = common_score_ESG_all.groupby('ticker', as_index=False).first()
common_score_ESG_all.to_csv('../CSR_score/common_score_ESG_all.csv', index=False)
# first: means keep 0319 data (the newer one) if there are duplicate tickers