In [1]:
import pandas as pd

# Load precomputed stats instead of recomputing
df = pd.read_csv("brexit_stats.csv")
df["year"] = df["year"].astype(int)

print("Number of rows in df:", len(df))
df.head()

Number of rows in df: 410


Unnamed: 0,ticker,year,month,day,filename,BrexitExposure,BrexitRisk,BrexitSentiment,NonBrexitRisk,NonBrexitSentiment,company,category
0,DPWGn.DE,2013,11,12,/Users/min/brexit-uncertainty/src/processed/20...,0.0,0.0,-0.0,4.0,73.0,DHL,Multi-national
1,BP.L,2013,7,30,/Users/min/brexit-uncertainty/src/processed/20...,0.0,0.0,-0.0,8.0,-57.0,BP,UK
2,LLY.N,2013,4,24,/Users/min/brexit-uncertainty/src/processed/20...,0.0,0.0,-0.0,1.0,2.0,Eli & Lilly,Multi-national
3,F.N,2013,10,24,/Users/min/brexit-uncertainty/src/processed/20...,0.0,0.0,-0.0,4.0,9.0,Ford,US
4,7203.T,2013,2,5,/Users/min/brexit-uncertainty/src/processed/20...,0.0,0.0,-0.0,0.0,-1.0,Toyota,Multi-national


In [2]:
COMPANY_INFO = {
    # UK
    "TSCO.L":  ("Tesco", "UK"),
    "BARC.L":  ("Barclays", "UK"),
    "AZN.L":   ("AstraZeneca", "UK"),
    "BP.L":    ("BP", "UK"),

    # US
    "KR.N":    ("Kroger", "US"),
    "F.N":     ("Ford", "US"),
    "JNJ.N":   ("Johnson & Johnson", "US"),
    "KHC.OQ":  ("Kraft Heinz", "US"),
    "KRFT.O":  ("Kraft Heinz", "US"),  # pre-merger Kraft Foods

    # Multi-national
    "DPWGn.DE":("DHL", "Multi-national"),
    "7203.T":  ("Toyota", "Multi-national"),
    "LLY.N":   ("Eli & Lilly", "Multi-national"),
    "ULVR.L":  ("Unilever", "Multi-national"),
}

df["company"]  = df["ticker"].map(lambda t: COMPANY_INFO.get(t, (t, "Other"))[0])
df["category"] = df["ticker"].map(lambda t: COMPANY_INFO.get(t, (t, "Other"))[1])


In [9]:
# Tickers that are not in the 12-firm mapping

all_tickers = sorted(df["ticker"].unique())
mapped_tickers = sorted(COMPANY_INFO.keys())
unmapped_tickers = sorted(set(all_tickers) - set(mapped_tickers))

print("All tickers in data:")
print(all_tickers)
print("\nMapped tickers (12-firm universe, including KRFT.O):")
print(mapped_tickers)
print("\nUnmapped tickers (extra firms beyond the 12):")
print(unmapped_tickers)



All tickers in data:
['7203.T', 'AZN.L', 'BARC.L', 'BP.L', 'DPWGn.DE', 'F.N', 'FMC.N', 'JNJ.N', 'KHC.OQ', 'KOG.OL', 'KR.N', 'KRFT.O', 'LLY.N', 'TOM.OL', 'TSCO.L', 'ULVR.L']

Mapped tickers (12-firm universe, including KRFT.O):
['7203.T', 'AZN.L', 'BARC.L', 'BP.L', 'DPWGn.DE', 'F.N', 'JNJ.N', 'KHC.OQ', 'KR.N', 'KRFT.O', 'LLY.N', 'TSCO.L', 'ULVR.L']

Unmapped tickers (extra firms beyond the 12):
['FMC.N', 'KOG.OL', 'TOM.OL']


In [10]:
years = sorted(df["year"].unique())

for y in years:
    g = df[df["year"] == y]
    tids = sorted(g["ticker"].unique())
    print(f"\nYear {y}:")
    for t in tids:
        comp = g.loc[g["ticker"] == t, "company"].iloc[0]
        cat  = g.loc[g["ticker"] == t, "category"].iloc[0]
        flag = " (OTHER)" if cat == "Other" else ""
        print(f"  {t:8s} -> {comp} [{cat}]{flag}")


Year 2013:
  7203.T   -> Toyota [Multi-national]
  AZN.L    -> AstraZeneca [UK]
  BARC.L   -> Barclays [UK]
  BP.L     -> BP [UK]
  DPWGn.DE -> DHL [Multi-national]
  F.N      -> Ford [US]
  JNJ.N    -> Johnson & Johnson [US]
  KR.N     -> Kroger [US]
  KRFT.O   -> Kraft Heinz [US]
  LLY.N    -> Eli & Lilly [Multi-national]
  TSCO.L   -> Tesco [UK]

Year 2014:
  7203.T   -> Toyota [Multi-national]
  AZN.L    -> AstraZeneca [UK]
  BARC.L   -> Barclays [UK]
  BP.L     -> BP [UK]
  DPWGn.DE -> DHL [Multi-national]
  F.N      -> Ford [US]
  JNJ.N    -> Johnson & Johnson [US]
  KR.N     -> Kroger [US]
  KRFT.O   -> Kraft Heinz [US]
  LLY.N    -> Eli & Lilly [Multi-national]
  TSCO.L   -> Tesco [UK]

Year 2015:
  7203.T   -> Toyota [Multi-national]
  AZN.L    -> AstraZeneca [UK]
  BARC.L   -> Barclays [UK]
  BP.L     -> BP [UK]
  DPWGn.DE -> DHL [Multi-national]
  F.N      -> Ford [US]
  JNJ.N    -> Johnson & Johnson [US]
  KR.N     -> Kroger [US]
  KRFT.O   -> Kraft Heinz [US]
  LLY.N    -

In [5]:
extra = df[df["category"] == "Other"]

extra_summary = (
    extra.groupby(["company", "ticker", "year"])
         .size()
         .reset_index(name="n_transcripts")
         .sort_values(["company", "year"])
)

print("Extra (non-12-firm) tickers by year:")
print(extra_summary.to_string(index=False))


Extra (non-12-firm) tickers by year:
company ticker  year  n_transcripts
  FMC.N  FMC.N  2020              1
  FMC.N  FMC.N  2021              4
 KOG.OL KOG.OL  2019              4
 KOG.OL KOG.OL  2020              4
 KOG.OL KOG.OL  2021              4
 TOM.OL TOM.OL  2019              4


In [6]:
coverage = (
    df[df["category"] != "Other"]
      .groupby(["company", "year"])
      .size()
      .unstack(fill_value=0)
      .sort_index()
)

years = sorted(df["year"].unique())
companies = sorted(coverage.index)

missing_pairs = []

for c in companies:
    for y in years:
        n = coverage.loc[c, y] if y in coverage.columns else 0
        if n == 0:
            missing_pairs.append((c, y))

print("Company-years with 0 transcripts (among the 12 firms):")
for c, y in missing_pairs:
    print(f"  {c:18s} {y}")


Company-years with 0 transcripts (among the 12 firms):
  AstraZeneca        2020
  DHL                2019
  DHL                2020
  DHL                2021
  Ford               2019
  Ford               2020
  Ford               2021
  Kroger             2019
  Kroger             2020
  Kroger             2021
  Unilever           2013
  Unilever           2014
  Unilever           2015
  Unilever           2019
  Unilever           2020
  Unilever           2021
