In [24]:
import polars as pl
from record_consolidation.graphs import (
    unconsolidated_df_to_subgraphs,
)
from record_consolidation.subgraph_post_processing.specific_algs.deprecated__split_high_betweenness import (
    draw_graph,
)
import networkx as nx

from record_consolidation.subgraph_post_processing.specific_algs.partition_companies import (
    partition_companies_graph_where_necessary,
)

from record_consolidation.subgraph_post_processing.specific_algs.partitioning_algs import (
    partition_via_louvain,
)

from functools import partial
from record_consolidation.graphs import extract_normalized_atomic

from record_consolidation.df_consolidations import (
    normalize_subset,
)

In [25]:
from typing import Iterable
from warnings import warn


def extract_specific_name_subgraph(
    connected_subgs: Iterable[nx.Graph], name: str
) -> nx.Graph:
    collected: list[nx.Graph] = []
    for subg in connected_subgs:
        for n in subg.nodes.data():
            if name.lower() in (n[0]).lower():
                collected.append(subg)
    collected = set(collected)
    if len(collected) == 0:
        raise ValueError(f"Could not find {name=} in connected_subgs.")
    if len(collected) > 1:
        alert_str = f"{len(collected)} subgraphs have {name=}.\n{[len(subg.nodes) for subg in collected]=}"
        warn(alert_str)
        return max(collected, key=lambda g: len(g.nodes))

        # raise ValueError(alert_str)
    return tuple(collected)[0]

In [26]:
# votes: pl.DataFrame = access_db_table("raw_output", "votes")
votes: pl.DataFrame = pl.read_parquet("raw_votes.parquet")
COMPANY_COLS = pl.col(["issuer_name", "cusip", "isin", "figi"])
COMPANY_COLS_L = ["issuer_name", "cusip", "isin", "figi"]
MEETING_COLS = pl.col(["issuer_name", "meeting_date", "cusip", "isin", "figi"])

In [27]:
# from record_consolidation.utils.polars_df import remove_string_nulls_and_uppercase


# raw_subgraphs = tuple(
#     unconsolidated_df_to_subgraphs(
#         votes.select(COMPANY_COLS),
#         connected_subgraphs_postprocessor=None,
#         pre_processing_fnc=remove_string_nulls_and_uppercase,
#     )
# )
# mattel_subg = extract_specific_name_subgraph(raw_subgraphs, "mattel")
# draw_graph(mattel_subg, 7)

In [28]:
# from record_consolidation.graphs import extract_consolidation_mapping_from_subgraphs
# from record_consolidation.utils.polars_df import remove_string_nulls_and_uppercase


# canonical_mapping = extract_consolidation_mapping_from_subgraphs(
#     unconsolidated_df_to_subgraphs(
#         votes.select(COMPANY_COLS),
#         connected_subgraphs_postprocessor=partition_companies_graph_where_necessary,
#         pre_processing_fnc=remove_string_nulls_and_uppercase,
#     )
# )

In [29]:
normed5_intra = normalize_subset(
    votes,
    connected_subgraphs_postprocessor=partition_companies_graph_where_necessary,
    cols_to_normalize=["issuer_name", "cusip", "isin", "figi"],
)
normed5_NO_intra = normalize_subset(
    votes,
    connected_subgraphs_postprocessor=partition_companies_graph_where_necessary,
    cols_to_normalize=["issuer_name", "cusip", "isin", "figi"],
    consolidate_twice=False,
)

KeyboardInterrupt: 

In [7]:
jpmorgans = normed5_intra.filter(
    pl.col("issuer_name").str.contains("(?i)jpmorgan")
).sort(["accession", "order_recorded"], descending=False)
jpmorgans.write_csv("jpmorgans.csv")
jpmorgans["meeting_date"].value_counts(sort=True)

meeting_date,count
str,u32
"""2024-05-21""",2641
"""2024-05-22""",9
"""2024-05-20""",4
"""0024-05-20""",1
"""2024-05-24""",1
"""2134-07-25""",1


In [8]:
import plotly.express as px

In [9]:
px.histogram(
    jpmorgans.group_by("accession").agg(pl.len()).sort("len"), x="len", nbins=160
)

In [32]:
jpmorgans.write_csv("jpmorgans.csv")

In [21]:
px.histogram(
    jpmorgans.group_by("filer_cik")
    .agg(
        pl.col("order_recorded"),
        pl.col("order_recorded").len().alias("n_order_recorded"),
    )
    .sort("n_order_recorded", descending=True)
    .filter(pl.col("n_order_recorded") < pl.lit(800)),
    x="n_order_recorded",
    nbins=50,
)

In [24]:
jpmorgans["order_recorded"].value_counts(sort=True).head(30).to_pandas()

Unnamed: 0,order_recorded,count
0,0,787
1,1,132
2,2,106
3,3,94
4,4,92
5,5,92
6,6,91
7,9,91
8,10,91
9,7,91


In [10]:
for col in COMPANY_COLS_L:
    print(col)
    new_nulls = votes[col].is_not_null() & normed5_intra[col].is_null()
    print(votes.filter(new_nulls))

issuer_name
shape: (0, 24)
┌─────────────┬───────┬──────┬──────┬───┬───────────┬────────────┬─────────┬──────────────┐
│ issuer_name ┆ cusip ┆ isin ┆ figi ┆ … ┆ filer_cik ┆ filer_name ┆ fund_id ┆ fund_id_type │
│ ---         ┆ ---   ┆ ---  ┆ ---  ┆   ┆ ---       ┆ ---        ┆ ---     ┆ ---          │
│ str         ┆ str   ┆ str  ┆ str  ┆   ┆ str       ┆ str        ┆ str     ┆ str          │
╞═════════════╪═══════╪══════╪══════╪═══╪═══════════╪════════════╪═════════╪══════════════╡
└─────────────┴───────┴──────┴──────┴───┴───────────┴────────────┴─────────┴──────────────┘
cusip
shape: (0, 24)
┌─────────────┬───────┬──────┬──────┬───┬───────────┬────────────┬─────────┬──────────────┐
│ issuer_name ┆ cusip ┆ isin ┆ figi ┆ … ┆ filer_cik ┆ filer_name ┆ fund_id ┆ fund_id_type │
│ ---         ┆ ---   ┆ ---  ┆ ---  ┆   ┆ ---       ┆ ---        ┆ ---     ┆ ---          │
│ str         ┆ str   ┆ str  ┆ str  ┆   ┆ str       ┆ str        ┆ str     ┆ str          │
╞═════════════╪═══════╪══════╪══

In [11]:
# older

display(
    votes.select(COMPANY_COLS.is_null().sum())
    / normed5_intra.select(pl.all().is_null().sum())
)

issuer_name,cusip,isin,figi
f64,f64,f64,f64
,4.746032,95.557173,1.505622


In [12]:
# prev

display(
    votes.select(COMPANY_COLS.is_null().sum())
    / normed5_intra.select(pl.all().is_null().sum())
)

issuer_name,cusip,isin,figi
f64,f64,f64,f64
,4.746032,95.557173,1.505622


In [13]:
display(
    votes.select(COMPANY_COLS.is_null().sum())
    / normed5_intra.select(pl.all().is_null().sum())
)

issuer_name,cusip,isin,figi
f64,f64,f64,f64
,4.746032,95.557173,1.505622


In [14]:
votes["issuer_name"].n_unique()

19277

In [15]:
normed5_intra["issuer_name"].value_counts(sort=True).head(30).to_series().to_list()

['MICROSOFT CORPORATION',
 'THE WALT DISNEY COMPANY',
 'AMAZON.COM, INC.',
 'ALPHABET INC.',
 'JPMORGAN CHASE & CO.',
 'THE PROCTER & GAMBLE COMPANY',
 'ORACLE CORPORATION',
 'CISCO SYSTEMS, INC.',
 'APPLE INC.',
 'JOHNSON & JOHNSON',
 'PEPSICO, INC.',
 'VISA INC.',
 'CHEVRON CORPORATION',
 'MASTERCARD INCORPORATED',
 'ABBVIE INC.',
 'BANK OF AMERICA CORPORATION',
 'MERCK & CO., INC.',
 'CHUBB LIMITED',
 'THE HOME DEPOT, INC.',
 'NVIDIA CORPORATION',
 'TE CONNECTIVITY LTD.',
 'EXXON MOBIL CORPORATION',
 'META PLATFORMS, INC.',
 'UNITEDHEALTH GROUP INCORPORATED',
 'VERIZON COMMUNICATIONS INC.',
 'THE COCA-COLA COMPANY',
 'SALESFORCE, INC.',
 "MCDONALD'S CORPORATION",
 'WELLS FARGO & COMPANY',
 'ADOBE INC.']

In [16]:
from record_consolidation.utils.polars_df import remove_string_nulls_and_uppercase


atomic = extract_normalized_atomic(
    votes.select(COMPANY_COLS),
    partition_companies_graph_where_necessary,
    pre_processing_fnc=remove_string_nulls_and_uppercase,
)

Post-processing subgraphs.


7615it [00:02, 3299.10it/s]


In [19]:
atomic

cusip,isin,issuer_name,figi
str,str,str,str
"""13646K108""","""CA13646K1084""","""CANADIAN PACIFIC KANSAS CITY L…",
"""000360206""","""US0003602069""","""AAON, INC.""",
"""023135106""","""US0231351067""","""AMAZON.COM, INC.""","""BBG000BVPV84"""
"""032095101""","""US0320951017""","""AMPHENOL CORPORATION""",
"""26441C204""","""US26441C2044""","""DUKE ENERGY CORPORATION""",
…,…,…,…
"""78074G200""","""US78074G2003""","""ROYALE ENERGY, INC.""",
"""G3654P100""","""GB00BCKFY513""","""FOXTONS GROUP PLC""",
"""K16018192""","""DK0060448595""","""COLOPLAST A/S""",
"""629444100""","""US6294441000""","""NRX PHARMACEUTICALS, INC.""",


In [20]:
names_comparison = pl.concat(
    [
        votes.select("issuer_name"),
        normed5_intra.select("issuer_name").rename(
            {"issuer_name": "normed_issuer_name"}
        ),
    ],
    how="horizontal",
)
names_comparison

issuer_name,normed_issuer_name
str,str
"""CANADIAN PACIFIC KANSAS CITY L…","""CANADIAN PACIFIC KANSAS CITY L…"
"""AAON, INC.""","""AAON, INC."""
"""AMAZON.COM, INC.""","""AMAZON.COM, INC."""
"""AMPHENOL CORPORATION""","""AMPHENOL CORPORATION"""
"""MICROSOFT CORPORATION""","""MICROSOFT CORPORATION"""
…,…
"""VERALTO CORPORATION""","""VERALTO CORPORATION"""
"""ANALOG DEVICES, INC.""","""ANALOG DEVICES, INC."""
"""EDITAS MEDICINE, INC.""","""EDITAS MEDICINE, INC."""
"""LOCKHEED MARTIN CORPORATION""","""LOCKHEED MARTIN CORPORATION"""


In [21]:
names_comparison.filter(pl.col("issuer_name").str.contains("(?i)conoco phillips"))

issuer_name,normed_issuer_name
str,str
"""Conoco Phillips""","""CONOCOPHILLIPS"""
"""Conoco Phillips""","""CONOCOPHILLIPS"""
"""Conoco Phillips""","""CONOCOPHILLIPS"""
"""Conoco Phillips""","""CONOCOPHILLIPS"""
"""Conoco Phillips""","""CONOCOPHILLIPS"""
…,…
"""CONOCO PHILLIPS""","""CONOCOPHILLIPS"""
"""CONOCO PHILLIPS""","""CONOCOPHILLIPS"""
"""CONOCO PHILLIPS""","""CONOCOPHILLIPS"""
"""CONOCO PHILLIPS""","""CONOCOPHILLIPS"""


In [22]:
normed5_intra.filter(
    (pl.col("issuer_name") == pl.lit("MICROSOFT CORPORATION"))
    & (pl.col("cusip").is_null())
)

issuer_name,cusip,isin,figi,meeting_date,date_parsed_w_certainty,vote_description,other_vote_description,vote_source,management_recommendation,how_voted,shares_voted,total_shares_voted,shares_on_loan,vote_manager,vote_series,vote_other_info,accession,date_recorded,order_recorded,filer_cik,filer_name,fund_id,fund_id_type
str,str,str,str,str,i64,str,str,str,str,str,f64,f64,f64,null,str,str,str,str,i64,str,str,str,str


In [30]:
for name in (
    # normed5["issuer_name"].value_counts(sort=True).head(30).to_series().to_list()
    [
        "microsoft",
        "disney",
        "amazon",
        "jpmorgan",
        "procter",
        "oracle",
        "apple",
        "visa",
        "exxon",
        "chevron",
        "meta platform",
        "air transport",
        "bank of america",
        "blackrock",
        "suncor",
        "conoco",
        "phillips 66",
    ]
):
    print("*" * 60, name.upper(), "*" * 60)
    t_unnormalized = (
        votes.select(COMPANY_COLS)
        .filter(pl.col("issuer_name").str.contains(f"(?i){name}"))
        .group_by("issuer_name")
        .agg(pl.len(), pl.all().unique())
        .with_columns((pl.col("len") * 100 / pl.col("len").sum()).alias("% of obs"))
        .sort("len", descending=True)
    )
    print(f"Un-Normalized (n={t_unnormalized["len"].sum()})")
    display(t_unnormalized)

    t_intra = (
        normed5_intra.select(COMPANY_COLS)
        .filter(pl.col("issuer_name").str.contains(f"(?i){name}"))
        .group_by("issuer_name")
        .agg(pl.len(), pl.all().unique())
        .with_columns((pl.col("len") * 100 / pl.col("len").sum()).alias("% of obs"))
        .sort("len", descending=True)
    )
    print(f"Normalized -- INTRA (n={t_intra["len"].sum()})")
    display(t_intra)

    t = (
        normed5_NO_intra.select(COMPANY_COLS)
        .filter(pl.col("issuer_name").str.contains(f"(?i){name}"))
        .group_by("issuer_name")
        .agg(pl.len(), pl.all().unique())
        .with_columns((pl.col("len") * 100 / pl.col("len").sum()).alias("% of obs"))
        .sort("len", descending=True)
    )
    print(f"Normalized -- NO INTRA (n={t["len"].sum()})")
    display(t)

    consolidation_fodder = (
        names_comparison.filter(
            pl.col("normed_issuer_name").str.contains(f"(?i){name}")
        )
        .select("issuer_name")
        .to_series()
        .value_counts(sort=True)
    )
    print("Consolidation Fodder:")
    display(consolidation_fodder.to_pandas())

************************************************************ MICROSOFT ************************************************************
Un-Normalized (n=4729)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""MICROSOFT CORPORATION""",2555,"[""594918104"", null, ""594918105""]","[null, ""US5949181045""]",[null],54.028336
"""Microsoft Corporation""",1795,"[""594918104"", ""594918103"", … ""000594918""]","[null, ""US5949181045""]","[""BBG000BPH459"", null]",37.957285
"""Microsoft Corp""",226,"[null, ""594918104""]","[null, ""US5949181045""]","[null, ""BBG000BPH459""]",4.779023
"""MICROSOFT CORPORATION""",64,"[""594918104""]","[""US5949181045"", null]",[null],1.353352
"""Microsoft Corp.""",60,"[""594918104""]","[""US5949181045"", null]","[""BBG000BPH459"", null]",1.268767
…,…,…,…,…,…
"""MICROSOFT""",2,"[""594918104"", null]",[null],[null],0.042292
"""MICROSOFT CORPORATION COM""",2,"[""594918104""]","[""US5949181045""]",[null],0.042292
"""MICROSOFTCORPORATION""",2,"[""594918104""]","[""US5949181045""]",[null],0.042292
"""MICROSOFT CORP COM""",1,"[""594918104""]","[""US5949181045""]",[null],0.021146


Normalized -- INTRA (n=4734)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""MICROSOFT CORPORATION""",4734,"[""58155Q103"", ""594918104""]","[""US5949181045"", ""US58155Q1031""]","[""BBG000BLNPB7"", ""BBG000BPH459""]",100.0


Normalized -- NO INTRA (n=4733)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""MICROSOFT CORPORATION""",4733,"[""594918104"", null]","[""US5949181045""]","[""BBG000BPH459""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,MICROSOFT CORPORATION,2555
1,Microsoft Corporation,1795
2,Microsoft Corp,226
3,MICROSOFT\nCORPORATION,64
4,Microsoft Corp.,60
5,MICROSOFT CORP,13
6,Microsoft,6
7,MSFT,3
8,MICROSOFT,2
9,MICROSOFT CORPORATION COM,2


************************************************************ DISNEY ************************************************************
Un-Normalized (n=2943)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""The Walt Disney Company""",1489,"[""254687106"", null]","[""US2546871060"", null]","[null, ""BBG000BLNNV0""]",50.594631
"""THE WALT DISNEY COMPANY""",1408,"[""Z54687106"", ""254687106"", null]","[null, ""US2546871060""]","[""BBG000BH4R78"", null]",47.842338
"""THE WALT DISNEY COMPANY""",12,"[""254687106""]","[""US2546871060"", null]",[null],0.407747
"""Walt Disney Co (The)""",9,"[""254687106""]","[""US2546871060""]",[null],0.30581
"""The Walt Disney Co""",3,"[""254687106""]","[""US2546871060"", null]","[null, ""BBG000BH4R79"", ""BBG000BH4R78""]",0.101937
…,…,…,…,…,…
"""WALT DISNEY CO""",1,"[""254687106""]",[null],[null],0.033979
"""Walt Disney""",1,"[""254687106""]",[null],[null],0.033979
"""Walt Disney Company (The)""",1,"[""254687106""]",[null],[null],0.033979
"""THE WALT DISNEY CO""",1,"[""254687106""]",[null],[null],0.033979


Normalized -- INTRA (n=2951)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""THE WALT DISNEY COMPANY""",2951,"[null, ""254687106""]","[""US2546871060"", null]","[""BBG000BH4R78"", null]",100.0


Normalized -- NO INTRA (n=2952)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""THE WALT DISNEY COMPANY""",2952,"[""254687106"", ""855244109""]","[""US8552441094"", ""US2546871060""]","[""BBG000CTQBF3"", ""BBG000BH4R78""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,The Walt Disney Company,1489
1,THE WALT DISNEY COMPANY,1408
2,THE WALT DISNEY\nCOMPANY,12
3,Walt Disney Co (The),9
4,DIS,8
5,Walt Disney Company,3
6,The Walt Disney Co,3
7,DISNEY WALT CO,2
8,WALT DISNEY,2
9,The Walt Disney Company Contested,2


************************************************************ AMAZON ************************************************************
Un-Normalized (n=2890)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AMAZON.COM, INC.""",1656,"[null, ""023135106""]","[""US0231351067"", null]",[null],57.301038
"""Amazon.com, Inc.""",1090,"[""023135106"", ""231351060"", null]","[null, ""US0231351067""]","[""BBG000BVPV84"", null]",37.716263
"""Amazon.com Inc.""",64,"[""023135106""]","[null, ""US0231351067""]",[null],2.214533
"""Amazon.com Inc""",36,"[""023135106""]","[""US0231351067"", null]","[null, ""BBG000BVPV84""]",1.245675
"""Amazon""",9,"[""000023135"", null, ""023135106""]","[""US0231351067"", null]",[null],0.311419
…,…,…,…,…,…
"""AMAZON.COM INC AMZN US""",1,"[""023135106""]",[null],[null],0.034602
"""AMAZON COM INC COM""",1,"[""023135106""]","[""US0231351067""]",[null],0.034602
"""AMAZON.COM INC. COM""",1,"[""023135106""]","[""US0231351067""]",[null],0.034602
"""Amazon.com Inc (AMZN)""",1,"[""023135106""]","[""US0231351067""]",[null],0.034602


Normalized -- INTRA (n=2891)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AMAZON.COM, INC.""",2890,"[""023135106""]","[""US0231351067""]","[""BBG000BVPV84""]",99.96541
"""AMAZON. COM""",1,[null],[null],[null],0.03459


Normalized -- NO INTRA (n=2891)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AMAZON.COM, INC.""",2890,"[""023135106""]","[""US0231351067""]","[""BBG000BVPV84""]",99.96541
"""AMAZON. COM""",1,[null],[null],[null],0.03459


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,"AMAZON.COM, INC.",1656
1,"Amazon.com, Inc.",1090
2,Amazon.com Inc.,64
3,Amazon.com Inc,36
4,Amazon,9
5,AMAZON.COM INC,8
6,AMAZON,5
7,AMAZON COM INC,4
8,Amazon.com,3
9,"Amazon.com, Inc",3


************************************************************ JPMORGAN ************************************************************
Un-Normalized (n=2635)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""JPMORGAN CHASE & CO.""",1324,"[""46625H100"", null]","[""US46625H1005"", null]",[null],50.246679
"""JPMorgan Chase & Co.""",1151,"[null, ""46625h100"", ""46625H100""]","[null, ""US46625H1005""]","[""BBG000DMBXR2"", null]",43.681214
"""JPMORGAN CHASE CO.""",80,"[""46625H100""]",[null],[null],3.036053
"""JPMORGAN CHASE and CO.""",27,"[""46625H100""]","[""US46625H1005""]",[null],1.024668
"""JPMorgan Chase & CO.""",20,"[""46625H100""]","[""US46625H1005""]",[null],0.759013
…,…,…,…,…,…
"""JPMORGAN CHASE &amp; CO.""",2,"[""46625H100""]","[""US46625H1005""]",[null],0.075901
"""JPMorgan""",1,[null],[null],[null],0.037951
"""JPMorgan Chase & Co (JPM)""",1,"[""46625H100""]","[""US46625H1005""]",[null],0.037951
"""JPMORGAN CHASE & CO. COM""",1,"[""46625H100""]","[""US46625H1005""]",[null],0.037951


Normalized -- INTRA (n=2657)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""JPMORGAN CHASE & CO.""",2656,"[""46625H100"", null]","[null, ""US46625H1005""]","[null, ""BBG000DMBXR2""]",99.962364
"""JPMORGAN""",1,[null],[null],[null],0.037636


Normalized -- NO INTRA (n=2660)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""JPMORGAN CHASE & CO.""",2659,"[""717081103"", ""46625H100"", ""02209S103""]","[""US7170811035"", ""US46625H1005"", ""US02209S1033""]","[""BBG000BR2B91"", ""BBG000DMBXR2"", ""BBG000BP6LJ8""]",99.962406
"""JPMORGAN""",1,[null],[null],[null],0.037594


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,JPMORGAN CHASE & CO.,1324
1,JPMorgan Chase & Co.,1151
2,JPMORGAN CHASE CO.,80
3,JPMORGAN CHASE and CO.,27
4,JPMorgan Chase & CO.,20
5,JPMorgan Chase & Co,11
6,JPMORGAN CHASE & CO,9
7,JPMorgan Chase,7
8,JP Morgan Chase & Co.,7
9,JP Morgan Chase & Co,7


************************************************************ PROCTER ************************************************************
Un-Normalized (n=2499)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""THE PROCTER & GAMBLE COMPANY""",1186,"[null, ""742718109""]","[""US7427181091"", null]",[null],47.458984
"""The Procter & Gamble Company""",1001,"[""742718109"", null]","[null, ""US7427181091""]","[null, ""BBG000BR2TH3"", ""BBG000BLNNV0""]",40.056022
"""THE PROCTER GAMBLE COMPANY""",100,"[""742718109""]",[null],[null],4.001601
"""THE PROCTER and GAMBLE COMPANY""",78,"[""742718109""]","[""US7427181091""]",[null],3.121248
"""THE PROCTER & GAMBLE COMPANY""",65,"[""742718109""]","[null, ""US7427181091""]",[null],2.60104
…,…,…,…,…,…
"""THE PROCTER AND GAMBLE COMPANY""",2,"[""742718109""]",[null],[null],0.080032
"""PROCTER & GAMBLE COMPANY COM""",2,"[""742718109""]","[""US7427181091""]",[null],0.080032
"""PROCTER AND GAMBLE CO""",1,"[""742718109""]",[null],"[""BBG001S5V4L9""]",0.040016
"""PROCTER AND GAMBLE CO COM""",1,"[""742718109""]","[""US7427181091""]",[null],0.040016


Normalized -- INTRA (n=2611)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""THE PROCTER & GAMBLE COMPANY""",2602,"[null, ""742718109""]","[null, ""US7427181091""]","[null, ""BBG000BLNNV0""]",99.655304
"""PROCTER & GAMBLE HYGIENE & HEA…",9,"[""Y7089A117""]","[""INE179A01014""]",[null],0.344696


Normalized -- NO INTRA (n=2612)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""THE PROCTER & GAMBLE COMPANY""",2603,"[""460690100"", ""742718109""]","[""US7427181091"", ""US4606901001""]","[""BBG000BLNNV0""]",99.655436
"""PROCTER & GAMBLE HYGIENE & HEA…",9,"[""Y7089A117""]","[""INE179A01014""]",[null],0.344564


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,THE PROCTER & GAMBLE COMPANY,1186
1,The Procter & Gamble Company,1001
2,THE PROCTER GAMBLE COMPANY,100
3,Proctor & Gamble Company,100
4,THE PROCTER and GAMBLE COMPANY,78
5,THE PROCTER & GAMBLE\nCOMPANY,65
6,Procter & Gamble Company,25
7,The Procter & Gamble Co.,10
8,PROCTER & GAMBLE HYGIENE & HEALTH CARE LTD,9
9,Procter & Gamble,5


************************************************************ ORACLE ************************************************************
Un-Normalized (n=2525)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""ORACLE CORPORATION""",1287,"[null, ""68389X105""]","[""US68389X1054"", null]",[null],50.970297
"""Oracle Corporation""",1097,"[null, ""68389X105"", ""68389x105""]","[""US68389X1054"", null]","[""BBG000BQLTW7"", null]",43.445545
"""Oracle Corp""",69,"[""68389X105""]","[null, ""US68389X1054"", ""US68389X1055""]","[""BBG000BQLTW7"", null]",2.732673
"""Oracle Corp.""",17,"[""68389X105""]","[null, ""US68389X1054""]",[null],0.673267
"""Oracle Financial Services Soft…",16,"[null, ""Y3864R102""]","[""INE881D01027""]",[null],0.633663
…,…,…,…,…,…
"""ORACLE""",3,"[""68389X105""]","[null, ""US68389X1054""]",[null],0.118812
"""ORACLE CORPORTATION""",2,"[""68389X105""]",[null],[null],0.079208
"""Oracle Corporatio n""",2,[null],[null],[null],0.079208
"""ORACLE CORP COM""",1,"[""68389X105""]","[""US68389X1054""]",[null],0.039604


Normalized -- INTRA (n=2528)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""ORACLE CORPORATION""",2487,"[""68389X105""]","[""US68389X1054""]","[""BBG000BQLTW7""]",98.378165
"""ORACLE FINANCIAL SERVICES SOFT…",21,"[""Y3864R102""]","[""INE881D01027""]",[null],0.830696
"""ORACLE CORP JAPAN""",18,"[""J6165M109""]","[""JP3689500001""]",[null],0.712025
"""ORACLE CORPORATIO N""",2,[null],[null],[null],0.079114


Normalized -- NO INTRA (n=2528)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""ORACLE CORPORATION""",2487,"[""68389X105""]","[""US68389X1054""]","[""BBG000BQLTW7""]",98.378165
"""ORACLE FINANCIAL SERVICES SOFT…",21,"[""Y3864R102""]","[""INE881D01027""]",[null],0.830696
"""ORACLE CORP JAPAN""",18,"[""J6165M109""]","[""JP3689500001""]",[null],0.712025
"""ORACLE CORPORATIO N""",2,[null],[null],[null],0.079114


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,ORACLE CORPORATION,1287
1,Oracle Corporation,1097
2,Oracle Corp,69
3,Oracle Corp.,17
4,Oracle Financial Services Software Limited,16
5,Oracle Corp Japan,9
6,ORACLE CORPORATION JAPAN,9
7,ORACLE CORP,7
8,ORACLE FINANCIAL SERVICES SOFTWARE LTD,5
9,ORACLE,3


************************************************************ APPLE ************************************************************
Un-Normalized (n=2528)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""APPLE INC.""",1105,"[""037833100"", null]","[""US0378331005"", null]",[null],43.710443
"""Apple Inc.""",936,"[""378331000"", null, ""037833100""]","[""US0378331005"", null]","[null, ""BBG000B9XRY4""]",37.025316
"""APPLE INC""",175,"[""037833100""]","[""US0378331005"", null]","[""BBG001S5N8V8"", null]",6.922468
"""Apple Hospitality REIT, Inc.""",111,"[""03784Y200""]","[null, ""US03784Y2000""]",[null],4.390823
"""APPLE HOSPITALITY REIT, INC.""",48,"[""03784Y200""]","[""US03784Y2000"", null]",[null],1.898734
…,…,…,…,…,…
"""Apple Hospitality REIT""",1,"[""03784Y200""]",[null],[null],0.039557
"""Apple Inc (AAPL)""",1,"[""037833100""]","[""US0378331005""]",[null],0.039557
"""Apple Hospitality REIT Inc""",1,"[""03784Y200""]",[null],[null],0.039557
"""Apple, Inc""",1,"[""037833100""]",[null],[null],0.039557


Normalized -- INTRA (n=2530)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""APPLE INC.""",2355,"[""037833100""]","[""US0378331005""]","[""BBG000B9XRY4""]",93.083004
"""APPLE HOSPITALITY REIT, INC.""",164,"[""03784Y200""]","[""US03784Y2000""]",[null],6.482213
"""MAUI LAND & PINEAPPLE COMPANY,…",11,"[""577345101""]","[""US5773451019""]",[null],0.434783


Normalized -- NO INTRA (n=2530)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""APPLE INC.""",2355,"[""037833100""]","[""US0378331005""]","[""BBG000B9XRY4""]",93.083004
"""APPLE HOSPITALITY REIT, INC.""",164,"[""03784Y200""]","[""US03784Y2000""]",[null],6.482213
"""MAUI LAND & PINEAPPLE COMPANY,…",11,"[""577345101""]","[""US5773451019""]",[null],0.434783


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,APPLE INC.,1105
1,Apple Inc.,936
2,APPLE INC,175
3,"Apple Hospitality REIT, Inc.",111
4,"APPLE HOSPITALITY REIT, INC.",48
5,Apple Inc,35
6,"Apple, Inc.",33
7,"APPLE, INC.",26
8,Apple,19
9,"APPLE, INC",15


************************************************************ VISA ************************************************************
Un-Normalized (n=2074)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""VISA INC.""",1082,"[""92826C839"", null]","[""US92826C8394"", null]",[null],52.16972
"""Visa Inc.""",914,"[""92826C839"", null]","[""US92826C8394"", null]","[null, ""BBG000PSKYX7""]",44.069431
"""Visa, Inc.""",20,"[""92826C839"", ""92826c839""]","[null, ""US92826C8394""]",[null],0.96432
"""Visa Inc""",20,"[null, ""92826C839""]","[""US92826C8394"", null]",[null],0.96432
"""VISA INC""",12,"[""92826C839""]","[null, ""US92826C8394""]","[null, ""BBG001SRCFY3""]",0.578592
…,…,…,…,…,…
"""Lovisa Holdings Limited""",2,"[""Q56334107""]","[""AU000000LOV7""]",[null],0.096432
"""Visa Inc (V)""",1,"[""92826C839""]","[""US92826C8394""]",[null],0.048216
"""VISA, Inc.""",1,"[""92826C839""]",[null],[null],0.048216
"""VISA INC COM CL A""",1,"[""92826C839""]","[""US92826C8394""]",[null],0.048216


Normalized -- INTRA (n=2075)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""VISA INC.""",2066,"[""92826C839""]","[""US92826C8394""]","[""BBG000PSKYX7""]",99.566265
"""VISAKA INDUSTRIES LIMITED""",7,"[""Y93787151""]","[""INE392A01021""]",[null],0.337349
"""LOVISA HOLDINGS LIMITED""",2,"[""Q56334107""]","[""AU000000LOV7""]",[null],0.096386


Normalized -- NO INTRA (n=2075)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""VISA INC.""",2066,"[""92826C839""]","[""US92826C8394""]","[""BBG000PSKYX7""]",99.566265
"""VISAKA INDUSTRIES LIMITED""",7,"[""Y93787151""]","[""INE392A01021""]",[null],0.337349
"""LOVISA HOLDINGS LIMITED""",2,"[""Q56334107""]","[""AU000000LOV7""]",[null],0.096386


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,VISA INC.,1082
1,Visa Inc.,914
2,Visa Inc,20
3,"Visa, Inc.",20
4,VISA INC,12
5,Visaka Industries Limited,7
6,VISA Inc.,6
7,Visa,5
8,VISA,2
9,Lovisa Holdings Limited,2


************************************************************ EXXON ************************************************************
Un-Normalized (n=1785)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""EXXON MOBIL CORPORATION""",876,"[""30231G102"", null]","[null, ""US30231G1022""]",[null],49.07563
"""Exxon Mobil Corporation""",802,"[""30231G102"", null]","[""US30231G1022"", null]","[""BBG000GZQ728"", null]",44.929972
"""Exxon Mobil""",38,"[""30231G102""]","[""US30231G1022"", null]",[null],2.128852
"""EXXON MOBIL CORP""",26,"[""30231G102""]","[null, ""US30231G1022""]","[""BBG000GZQ728"", ""BBG001S69V32"", null]",1.456583
"""EXXON MOBIL CORPORATION""",13,"[""30231G102""]","[""US30231G1022"", null]",[null],0.728291
…,…,…,…,…,…
"""EXXON MOBIL""",1,"[""30231G102""]",[null],[null],0.056022
"""EXXON MOBIL CORPORAITON""",1,"[""30231G102""]","[""US30231G1022""]","[""BBG000GZQ728""]",0.056022
"""EXXON MOBIL CORP COM""",1,"[""30231G102""]","[""US30231G1022""]",[null],0.056022
"""EXXON""",1,"[""30231G102""]",[null],[null],0.056022


Normalized -- INTRA (n=1787)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""EXXON MOBIL CORPORATION""",1787,"[""30231G102""]","[""US30231G1022""]","[""BBG000GZQ728""]",100.0


Normalized -- NO INTRA (n=1787)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""EXXON MOBIL CORPORATION""",1787,"[""30231G102""]","[""US30231G1022""]","[""BBG000GZQ728""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,EXXON MOBIL CORPORATION,876
1,Exxon Mobil Corporation,802
2,Exxon Mobil,38
3,EXXON MOBIL CORP,26
4,EXXON MOBIL\nCORPORATION,13
5,ExxonMobil,8
6,Exxon Mobil Corp.,8
7,Exxon Mobile,2
8,Exxon Mobil Corp,2
9,EXXON MOBIL,2


************************************************************ CHEVRON ************************************************************
Un-Normalized (n=1828)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CHEVRON CORPORATION""",1034,"[""166764100"", null, ""577081102""]","[null, ""US1667641005""]","[null, ""BBG000K4ND22""]",56.564551
"""Chevron Corporation""",751,"[null, ""166764100"", ""000166764""]","[""US1667641005"", null]","[null, ""BBG000K4ND22""]",41.083151
"""CHEVRON CORP""",21,"[""166764100""]","[""US1667641005"", null]","[null, ""BBG000K4ND22""]",1.148796
"""Chevron Corp""",8,"[""166764100"", null]","[""US1667641005"", null]","[""BBG000K4ND22"", null]",0.437637
"""Chevron Corp.""",4,"[""166764100""]","[null, ""US1667641005""]",[null],0.218818
…,…,…,…,…,…
"""Chevron Corposration""",1,"[""166764100""]",[null],[null],0.054705
"""Chevron""",1,"[""166764100""]",[null],[null],0.054705
"""CHEVRON CORP NEW COM""",1,"[""166764100""]","[""US1667641005""]",[null],0.054705
"""Chevron Corpoation""",1,"[""166764100""]","[""US1667641005""]",[null],0.054705


Normalized -- INTRA (n=2008)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CHEVRON CORPORATION""",2008,"[""166764100""]","[""US1667641005""]","[""BBG000K4ND22""]",100.0


Normalized -- NO INTRA (n=2008)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CHEVRON CORPORATION""",2008,"[""166764100""]","[""US1667641005""]","[""BBG000K4ND22""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,CHEVRON CORPORATION,1034
1,Chevron Corporation,751
2,"MATTEL, INC.",117
3,"Mattel, Inc.",56
4,CHEVRON CORP,21
5,Chevron Corp,8
6,Chevron Corp.,4
7,CHEVRON,2
8,CHEVRON CORP NEW,2
9,Mattel Inc (MAT),2


************************************************************ META PLATFORM ************************************************************
Un-Normalized (n=1766)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""Meta Platforms, Inc.""",949,"[""30303M102"", null]","[""US30303M1027"", null]",[null],53.737259
"""META PLATFORMS, INC.""",808,"[null, ""30303M102""]","[null, ""US30303M1027""]",[null],45.753114
"""Meta Platforms""",4,"[""30303M102"", null]","[null, ""US30303M1027""]",[null],0.226501
"""Meta Platforms Inc""",2,"[""30303M102""]","[""US30303M1027""]","[null, ""BBG000MM2P62""]",0.11325
"""Meta Platforms Inc.""",2,"[""30303M102""]","[""US30303M1027""]",[null],0.11325
"""Meta Platforms, Inc""",1,"[""30303M102""]",[null],[null],0.056625


Normalized -- INTRA (n=1766)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""META PLATFORMS, INC.""",1766,"[""30303M102""]","[""US30303M1027""]","[""BBG000MM2P62""]",100.0


Normalized -- NO INTRA (n=1766)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""META PLATFORMS, INC.""",1766,"[""30303M102""]","[""US30303M1027""]","[""BBG000MM2P62""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,"Meta Platforms, Inc.",949
1,"META PLATFORMS, INC.",808
2,Meta Platforms,4
3,Meta Platforms Inc,2
4,Meta Platforms Inc.,2
5,"Meta Platforms, Inc",1


************************************************************ AIR TRANSPORT ************************************************************
Un-Normalized (n=79)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AIR TRANSPORT SERVICES GROUP, …",48,"[null, ""00922R105""]","[""US00922R1059"", null]",[null],60.759494
"""Air Transport Services Group, …",30,"[""00922R105""]","[""US00922R1059"", null]",[null],37.974684
"""AIR TRANSPORT SERVICES GROUP, …",1,"[""00922R105""]","[""US00922R1059""]",[null],1.265823


Normalized -- INTRA (n=79)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AIR TRANSPORT SERVICES GROUP, …",79,"[""00922R105""]","[""US00922R1059""]",[null],100.0


Normalized -- NO INTRA (n=79)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""AIR TRANSPORT SERVICES GROUP, …",79,"[""00922R105""]","[""US00922R1059""]",[null],100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,"AIR TRANSPORT SERVICES GROUP, INC.",48
1,"Air Transport Services Group, Inc.",30
2,"AIR TRANSPORT\nSERVICES GROUP, INC.",1


************************************************************ BANK OF AMERICA ************************************************************
Un-Normalized (n=1901)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BANK OF AMERICA CORPORATION""",961,"[""060505104"", ""060505583"", … ""060505591""]","[null, ""US0605051046"", … ""US0605056334""]","[null, ""BBG000BCTLF6""]",50.552341
"""Bank of America Corporation""",733,"[""060505625"", ""060505583"", … ""060505633""]","[""US0605055914"", ""US0605056334"", … ""US0605055831""]",[null],38.558653
"""BANK OF AMERICA""",112,"[""060505195"", ""060505104"", null]",[null],[null],5.891636
"""Bank of America""",35,"[""000060505"", null, ""060505104""]","[null, ""US0605051046""]",[null],1.841136
"""Bank of America Corp""",25,"[""060505104""]","[null, ""US0605051046""]","[null, ""BBG000BCTLF6""]",1.315097
…,…,…,…,…,…
"""BANK OF AMERICA CORP""",6,"[""060505104""]","[null, ""US0605051046""]",[null],0.315623
"""Bank of America Corp.""",5,"[""605051040"", ""060505104""]","[""US0605051046"", null]",[null],0.263019
"""Bank Of America Corp.""",3,"[""060505104""]","[""US0605051046""]",[null],0.157812
"""Bank of America Corporation (B…",2,"[""060505104""]","[""US0605051046""]",[null],0.105208


Normalized -- INTRA (n=1904)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BANK OF AMERICA CORPORATION""",1904,"[""060505104""]","[""US0605051046""]","[""BBG000BCTLF6""]",100.0


Normalized -- NO INTRA (n=1904)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BANK OF AMERICA CORPORATION""",1904,"[""060505104""]","[""US0605051046""]","[""BBG000BCTLF6""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,BANK OF AMERICA CORPORATION,961
1,Bank of America Corporation,733
2,BANK OF AMERICA,112
3,Bank of America,35
4,Bank of America Corp,25
5,BANK OF AMERICA\nCORPORATION,18
6,BANK OF AMERICA CORP,6
7,Bank of America Corp.,5
8,Bank Of America Corp.,3
9,Bank of America Corporation (BAC),2


************************************************************ BLACKROCK ************************************************************
Un-Normalized (n=1466)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BLACKROCK, INC.""",696,"[null, ""09247X101""]","[null, ""US09247X1019""]",[null],47.476126
"""BlackRock, Inc.""",456,"[""09247X101"", ""09247K101"", null]","[null, ""US09247X1019""]","[null, ""BBG000C2PW58""]",31.105048
"""BLACKROCK INC""",46,"[""09247X101""]",[null],[null],3.13779
"""BlackRock Funds""",44,"[""092501105"", ""09260E105"", … ""09255R202""]",[null],[null],3.001364
"""Blackrock""",22,"[""09247X101""]","[""US09247X1019""]",[null],1.500682
…,…,…,…,…,…
"""BlackRock Income Trust""",3,"[""09247F209""]","[""US09247F2092""]","[""BBG000BDC193""]",0.204638
"""BLACKROCK, INC""",2,"[""09247X101""]",[null],[null],0.136426
"""BlackRock Inc""",1,"[""09247X101""]",[null],[null],0.068213
"""BlackRock US Core Property Fun…",1,"[""97MSCRP36""]",[null],[null],0.068213


Normalized -- INTRA (n=1467)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BLACKROCK, INC.""",1243,"[""09247X101"", null]","[""US09247X1019"", null]","[null, ""BBG000C2PW58""]",84.730743
"""BLACKROCK FUNDS""",63,"[""09260E105""]","[""US09255R2022""]","[""BBG000BDC193""]",4.294479
"""BLACKROCK ESG CAPITAL ALLOCATI…",16,"[""09262F100""]","[""US09262F1003""]",[null],1.090661
"""BLACKROCK VARIABLE SERIES FUND…",16,"[""09258X107""]","[""US09258X1072""]",[null],1.090661
"""BLACKROCK TCP CAPITAL CORP.""",15,"[""09259E108""]","[""US09259E1082""]",[null],1.022495
…,…,…,…,…,…
"""BLACKROCK MUNICIPAL INCOME TRU…",4,"[""09249N101""]","[""US09249N1019""]","[""BBG000NWS486""]",0.272665
"""BLACKROCK NY MUNI OPP FUNDS A1""",4,"[""09253A813""]",[null],[null],0.272665
"""BLACKROCK MUNICIPAL INCOME TRU…",3,"[""09248F109""]","[""US09248F1093""]",[null],0.204499
"""BLACKROCK CREDIT ALLOCATION, I…",3,"[""092508100""]","[""US0925081004""]","[""BBG000QHYF20""]",0.204499


Normalized -- NO INTRA (n=1468)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""BLACKROCK, INC.""",1244,"[""717081103"", ""09247X101""]","[""US09247X1019"", ""US7170811035""]","[""BBG000C2PW58"", ""BBG000BR2B91""]",84.741144
"""BLACKROCK FUNDS""",63,"[""09260E105""]","[""US09255R2022""]","[""BBG000BDC193""]",4.291553
"""BLACKROCK VARIABLE SERIES FUND…",16,"[""09258X107""]","[""US09258X1072""]",[null],1.089918
"""BLACKROCK ESG CAPITAL ALLOCATI…",16,"[""09262F100""]","[""US09262F1003""]",[null],1.089918
"""BLACKROCK TCP CAPITAL CORP.""",15,"[""09259E108""]","[""US09259E1082""]",[null],1.021798
…,…,…,…,…,…
"""BLACKROCK MUNICIPAL INCOME TRU…",4,"[""09249N101""]","[""US09249N1019""]","[""BBG000NWS486""]",0.27248
"""BLACKROCK FLOATING RATE INC PO…",4,"[""09260B762""]",[null],[null],0.27248
"""BLACKROCK MUNICIPAL INCOME TRU…",3,"[""09248F109""]","[""US09248F1093""]",[null],0.20436
"""BLACKROCK CREDIT ALLOCATION, I…",3,"[""092508100""]","[""US0925081004""]","[""BBG000QHYF20""]",0.20436


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,"BLACKROCK, INC.",696
1,"BlackRock, Inc.",456
2,BLACKROCK INC,46
3,BlackRock Funds,44
4,Blackrock,22
5,BLACKROCK ESG CAPITAL ALLOCATION,16
6,"BlackRock Variable Series Funds II, Inc.",16
7,BLACKROCK TCP CAPITAL CORP.,15
8,BLACKROCK CAPITAL ALLOCATION TRUST,12
9,BLACKROCK MUNIYIELD FUND,9


************************************************************ SUNCOR ************************************************************
Un-Normalized (n=265)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""SUNCOR ENERGY INC.""",148,"[null, ""867224107""]","[null, ""CA8672241079""]",[null],55.849057
"""Suncor Energy Inc.""",65,"[""867229106"", null, ""867224107""]","[""CA8672241079"", null]",[null],24.528302
"""SUNCOR ENERGY INC""",36,"[""867224107""]","[""CA8672241079"", null]","[""BBG000BRK7L6"", null]",13.584906
"""SUNCORP GROUP LTD""",12,"[""Q8802S103"", ""Q88040110""]","[""AU000000SUN6""]",[null],4.528302
"""Suncor Energy, Inc.""",2,"[""867224107""]","[""CA8672241079"", null]",[null],0.754717
"""Suncor Energy Inc""",2,"[""867224107""]","[""CA8672241079"", null]",[null],0.754717


Normalized -- INTRA (n=265)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""SUNCOR ENERGY INC.""",253,"[""867224107""]","[""CA8672241079""]","[""BBG000BRK7L6""]",95.471698
"""SUNCORP GROUP LTD""",12,"[""Q88040110""]","[""AU000000SUN6""]",[null],4.528302


Normalized -- NO INTRA (n=265)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""SUNCOR ENERGY INC.""",253,"[""867224107""]","[""CA8672241079""]","[""BBG000BRK7L6""]",95.471698
"""SUNCORP GROUP LTD""",12,"[""Q88040110""]","[""AU000000SUN6""]",[null],4.528302


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,SUNCOR ENERGY INC.,148
1,Suncor Energy Inc.,65
2,SUNCOR ENERGY INC,36
3,SUNCORP GROUP LTD,12
4,"Suncor Energy, Inc.",2
5,Suncor Energy Inc,2


************************************************************ CONOCO ************************************************************
Un-Normalized (n=1251)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CONOCOPHILLIPS""",682,"[null, ""20825C104"", … ""20852C104""]","[null, ""US20825C1045""]","[null, ""BBG000BQQH30""]",54.516387
"""ConocoPhillips""",509,"[null, ""20825C104""]","[""US20825C1045"", null]","[null, ""BBG000BQQH30""]",40.68745
"""CONOCO PHILLIPS""",48,"[""20825C104""]",[null],[null],3.83693
"""Conoco Phillips""",6,"[null, ""20825C104""]","[""US20825C1045"", null]",[null],0.479616
"""Conocophillips""",4,"[""20825C104""]",[null],[null],0.319744
"""CONOCO PHILLIPS """,1,"[""20825C104""]","[""US20825C1045""]",[null],0.079936
"""CONOCOPHILLIPS COM""",1,"[""20825C104""]","[""US20825C1045""]",[null],0.079936


Normalized -- INTRA (n=1252)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CONOCOPHILLIPS""",1252,"[""20825C104""]","[""US20825C1045""]","[""BBG000BQQH30""]",100.0


Normalized -- NO INTRA (n=1252)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""CONOCOPHILLIPS""",1252,"[""20825C104""]","[""US20825C1045""]","[""BBG000BQQH30""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,CONOCOPHILLIPS,682
1,ConocoPhillips,509
2,CONOCO PHILLIPS,48
3,Conoco Phillips,6
4,Conocophillips,4
5,CONOCOPHILLIPS COM,1
6,COP,1
7,CONOCO PHILLIPS,1


************************************************************ PHILLIPS 66 ************************************************************
Un-Normalized (n=719)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""PHILLIPS 66""",420,"[""718546104"", null]","[""US7185461040"", null]","[null, ""BBG00286S4N9""]",58.414465
"""Phillips 66""",298,"[null, ""718546104""]","[null, ""US7185461040""]",[null],41.446453
"""Phillips 66 (PSX)""",1,"[""718546104""]","[""US7185461040""]",[null],0.139082


Normalized -- INTRA (n=722)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""PHILLIPS 66""",722,"[""718546104""]","[""US7185461040""]","[""BBG00286S4N9""]",100.0


Normalized -- NO INTRA (n=722)


issuer_name,len,cusip,isin,figi,% of obs
str,u32,list[str],list[str],list[str],f64
"""PHILLIPS 66""",722,"[""718546104""]","[""US7185461040""]","[""BBG00286S4N9""]",100.0


Consolidation Fodder:


Unnamed: 0,issuer_name,count
0,PHILLIPS 66,420
1,Phillips 66,298
2,PHILLIPS 73,1
3,Phillips 68,1
4,Phillps 66,1
5,Phillips 66 (PSX),1


In [50]:
votes.filter(normed5["issuer_name"] != votes["issuer_name"]).select(COMPANY_COLS)

issuer_name,cusip,isin,figi
str,str,str,str
"""AMPHENOL CORPORATION""","""032095101""","""US0320951017""",
"""THE SHERWIN-WILLIAMS COMPANY""","""824348106""","""US8243481061""",
"""THE SHERWIN-WILLIAMS COMPANY""","""824348106""","""US8243481061""",
"""AMPHENOL CORPORATION""","""032095101""","""US0320951017""",
"""Toast, Inc.""","""888787108""",,
…,…,…,…
"""INTEL CORPORATION""","""458140100""",,
"""THE SHERWIN-WILLIAMS COMPANY""","""824348106""",,
"""THE KROGER CO.""","""501044101""",,
"""CHOICE HOTELS INTERNATIONAL, I…","""169905106""",,


In [51]:
normed5.filter(normed5["issuer_name"] != votes["issuer_name"]).select(COMPANY_COLS)

issuer_name,cusip,isin,figi
str,str,str,str
"""Amphenol Corporation""","""032095101""","""US0320951017""",
"""The Sherwin-Williams Company""","""824348106""","""US8243481061""","""BBG000BSXQV7"""
"""The Sherwin-Williams Company""","""824348106""","""US8243481061""","""BBG000BSXQV7"""
"""Amphenol Corporation""","""032095101""","""US0320951017""",
"""TOAST, INC.""","""888787108""","""US8887871080""",
…,…,…,…
"""Intel Corporation""","""458140100""","""US4581401001""","""BBG000C0G1D1"""
"""The Sherwin-Williams Company""","""824348106""","""US8243481061""","""BBG000BSXQV7"""
"""The Kroger Co.""","""501044101""","""US5010441013""","""BBG000BMY992"""
"""Choice Hotels International, I…","""169905106""","""US1699051066""",


In [42]:
normed5["cusip"].n_unique()

7545

In [14]:
votes.select(COMPANY_COLS)

issuer_name,cusip,isin,figi
str,str,str,str
"""CANADIAN PACIFIC KANSAS CITY L…","""13646K108""","""CA13646K1084""",
"""AAON, INC.""","""000360206""","""US0003602069""",
"""AMAZON.COM, INC.""","""023135106""","""US0231351067""",
"""AMPHENOL CORPORATION""","""032095101""","""US0320951017""",
"""MICROSOFT CORPORATION""","""594918104""","""US5949181045""",
…,…,…,…
"""VERALTO CORPORATION""","""92338C103""",,
"""ANALOG DEVICES, INC.""","""032654105""",,
"""EDITAS MEDICINE, INC.""","""28106W103""",,
"""LOCKHEED MARTIN CORPORATION""","""539830109""",,


In [6]:
normed5.select(pl.all().is_null().sum())

issuer_name,cusip,isin,figi
u32,u32,u32,u32
0,11703,8315,514694


In [None]:
normed4 = normalize_subset4(
    votes,
    connected_subgraphs_postprocessor=partition_companies_graph_where_necessary,
    cols_to_normalize=["issuer_name", "cusip", "isin", "figi"],
)

In [None]:
pp3 = partial(
    partition_companies_graph_where_necessary,
    # verbose=True,
)


normed3 = normalize_subset3(
    votes,
    connected_subgraphs_postprocessor=pp3,
    cols_to_normalize=["issuer_name", "cusip", "isin", "figi"],
)

In [None]:
normed3

In [None]:
normed3.select(COMPANY_COLS.is_null()).sum().sum_horizontal()

In [None]:
votes.select(COMPANY_COLS.is_null().sum()).sum_horizontal().item()

## Check that removed nodes are repopulated

In [5]:
G = tuple(
    unconsolidated_df_to_subgraphs(
        votes.select(COMPANY_COLS), connected_subgraphs_postprocessor=None
    )
)

In [6]:
sb = extract_specific_name_subgraph(G, "STARBUCKS")

In [None]:
len(sb.nodes)

In [None]:
partitioned = partition_companies_graph_where_necessary(sb, verbose=True)

In [None]:
print(len(partitioned.nodes))
draw_graph(partitioned, 5)

In [10]:
G = tuple(
    unconsolidated_df_to_subgraphs(
        votes.select(COMPANY_COLS), connected_subgraphs_postprocessor=None
    )
)

In [None]:
list(G[0].nodes.data())[0]

In [None]:
partition_via_louvain(G[4], verbose=True)
# draw_graph(G[4], 5)

In [None]:
from record_consolidation.df_consolidations import normalize_subset

pp = partial(
    partition_companies_graph_where_necessary,
    # verbose=True,
)

atomic = extract_normalized_atomic(
    votes.select(COMPANY_COLS), connected_subgraphs_postprocessor=pp
)
normed = normalize_subset(
    votes,
    connected_subgraphs_postprocessor=pp,
    cols_to_normalize=["issuer_name", "cusip", "isin", "figi"],
)

In [None]:
votes.select(pl.all().is_null().sum())

In [None]:
normed.select(
    pl.all().is_null().sum()
)  # .filter(pl.col("issuer_name") == pl.lit("AAON, INC."))

In [None]:
atomic.select(pl.all().is_null().sum())

# ISSUE

## Problem
Nulls are being propagated in the 

## Diagnosis
(Best guess): When nodes are cut wholesale out of the graph - rather than even just having all edges cut - they don't end up in the canonical mapping, and so become null...

In [None]:
removed_issuers = (
    votes.filter(normed["issuer_name"].is_null())
    .filter(pl.col("issuer_name").is_not_null())["issuer_name"]
    .unique()
    .to_list()
)
removed_issuers

In [None]:
emerson = extract_specific_name_subgraph(G, "emerson electric co.")
draw_graph(emerson, 5)

In [None]:
emerson_partitioned = partition_companies_graph_where_necessary(
    emerson, verbose=True, verbose_within_partitioning_algs=True
)

In [None]:
for issuer in removed_issuers:
    try:
        draw_graph(extract_specific_name_subgraph(G, issuer), size=5)
    except:
        print(issuer)

In [None]:
votes.select(COMPANY_COLS).filter(
    pl.col("issuer_name") == pl.lit("Northrup Grumman Corporation ")
)

In [None]:
removed_issuers

In [None]:
atomized_subset = extract_normalized_atomic(
    votes.select(COMPANY_COLS), connected_subgraphs_postprocessor=pp
)

In [None]:
s = "walmart"
atomized_subset.filter(pl.col("issuer_name").str.contains(f"(?i){s}"))

In [None]:
atomized_subset.filter(pl.col("issuer_name").is_in(removed_issuers))

In [26]:
normed_comps = normed.select(COMPANY_COLS)

In [None]:
normed_comps.filter(pl.col("issuer_name").is_null())

In [None]:
normed3

In [None]:
votes

In [None]:
null_issuers = normed_comps["issuer_name"].is_null()


display(votes.select(COMPANY_COLS).filter(null_issuers).head())  # .sort(
#     pl.all()
# ).unique(maintain_order=True).to_pandas()
display(normed_comps.filter(null_issuers).head())

In [None]:
atomized_subset.filter(cusip="291011104")

In [None]:
normed.select(pl.all().is_null().sum())