In [4]:
import pandas as pd

current_df = pd.read_csv("./cleaned/FBIC_cleaned.csv")
sentiment_df = pd.read_csv("./cleaned/sentiment_index_normalized.csv")

# Create a temporary copy for merging
temp_df = current_df.copy()

# Replace HKG with CHN in the temporary dataset for merging
temp_df["iso3a"] = temp_df["iso3a"].replace("HKG", "CHN")
temp_df["iso3b"] = temp_df["iso3b"].replace("HKG", "CHN")

sentiment_df = sentiment_df.rename(
    columns={"Year": "year", "Actor1CountryCode": "iso3a", "Actor2CountryCode": "iso3b"}
)

# Merge using the temporary dataset
merged_df = pd.merge(
    temp_df,
    sentiment_df[["year", "iso3a", "iso3b", "sentiment_index"]],
    on=["year", "iso3a", "iso3b"],
    how="left",
)

# Restore original HKG values from the original dataset
merged_df["iso3a"] = current_df["iso3a"]
merged_df["iso3b"] = current_df["iso3b"]

# Only use year from 2006 onwards
merged_df = merged_df[merged_df["year"] >= 2006]

# Calculate average sentiment for each country pair
pair_avg_sentiment = merged_df.groupby(["iso3a", "iso3b"])["sentiment_index"].transform(
    "mean"
)

# Calculate global average sentiment as fallback
global_avg_sentiment = merged_df["sentiment_index"].mean()

# Fill missing values first with pair average, then with global average
merged_df["sentiment_index"] = merged_df["sentiment_index"].fillna(pair_avg_sentiment)
merged_df["sentiment_index"] = merged_df["sentiment_index"].fillna(global_avg_sentiment)

# Drop cols [bandwidth, norm_allianceindex, norm_lor_avg, securitybandwidth, securitydependence]
drop_cols = [
    "bandwidth",
    "norm_allianceindex",
    "norm_lor_avg",
    "securitybandwidth",
    "securitydependence",
]
merged_df = merged_df.drop(columns=drop_cols)

# Save the merged dataframe
merged_df.to_csv("./cleaned/FBIC_with_sentiment.csv", index=False)

In [5]:
df_fbic = pd.read_csv("./cleaned/FBIC_with_sentiment.csv")
df_comtrade = pd.read_csv("./cleaned/comtrade_cleaned.csv")

# Rename period to year
df_comtrade = df_comtrade.rename(columns={"period": "year"})

# Rename reporter_iso3 to iso3a and partner_iso3 to iso3b
df_comtrade = df_comtrade.rename(columns={"reporter_iso3": "iso3a", "partner_iso3": "iso3b"})

# Combine the two dataframes on year, iso3a, and iso3b
merged_df = pd.merge(df_fbic, df_comtrade, on=["year", "iso3a", "iso3b"], how="left")

# Save the merged dataframe
merged_df.to_csv("./cleaned/FBIC_sentiment_comtrade.csv", index=False)

In [6]:
# Read the data
df = pd.read_csv("./cleaned/FBIC_sentiment_comtrade.csv")

# Define BEC columns (excluding bec_8_prop as it will be calculated)
bec_cols = [
    "bec_1_prop",
    "bec_2_prop",
    "bec_3_prop",
    "bec_4_prop",
    "bec_5_prop",
    "bec_6_prop",
    "bec_7_prop",
]

# Fill missing values with group means for first 7 BEC categories
# Group only by country pairs (iso3a, iso3b)
for col in bec_cols:
    df[col] = df.groupby(["iso3a", "iso3b"])[col].transform(
        lambda x: x.fillna(x.mean())
    )

# Calculate bec_8_prop as 1 minus sum of other proportions
df["bec_8_prop"] = 1 - df[bec_cols].sum(axis=1)

# Save the updated dataframe
df.to_csv("./cleaned/FBIC_sentiment_comtrade.csv", index=False)