In [1]:
import duckdb

In [2]:
sunspec = duckdb.sql("""
select * from read_csv('EntityCodes_cleaned.csv')
""")
sunspec

┌──────────────────────────────────────────────┬────────────┐
│                ProdMfr_Value                 │ EntityCode │
│                   varchar                    │  varchar   │
├──────────────────────────────────────────────┼────────────┤
│ ABB                                          │ ABB        │
│ Ablytek                                      │ ABLYT      │
│ Anji Dasol Solar Energy Science & Technology │ ADSEST     │
│ Adesto Technologies                          │ ADTE       │
│ Advance Power                                │ ADVP       │
│ AEG SOLAR ENERGY SRL                         │ AEG        │
│ AE Solar GmbH                                │ AESL       │
│ Alpha ESS Co., Ltd.                          │ AESS       │
│ Afore Energy                                 │ AFOR       │
│ AIMS Power                                   │ AIMS       │
│   ·                                          │  ·         │
│   ·                                          │  ·         │
│   ·   

In [3]:
prodbattery = duckdb.sql("""
select distinct
    "Manufacturer Name" as ProdMfr_Value
from read_csv('ProdBattery/Battery_List_Data_ADA.2026-02-11.csv')
where ProdMfr_Value is not null
""")
prodbattery

┌────────────────────────────────────────────────────┐
│                   ProdMfr_Value                    │
│                      varchar                       │
├────────────────────────────────────────────────────┤
│ Discover Energy Corp.                              │
│ Energizer Solar                                    │
│ GuangZhou MeriTech Power Co Ltd                    │
│ Lunna                                              │
│ MANGO POWER TECH INC.                              │
│ Shenzhen GSL Energy Co., Ltd.                      │
│ Chint Power Systems America                        │
│ Discover Energy Systems Corp.                      │
│ Hoymiles Power Electronics Inc.                    │
│ Power Center                                       │
│      ·                                             │
│      ·                                             │
│      ·                                             │
│ Soluna(Shanghai)Co.,Ltd                            │
│ Storz Po

In [4]:
prodmodule = duckdb.sql("""
select distinct
    "Manufacturer" as ProdMfr_Value
from read_csv('ProdModule/PV_Module_List_Full_Data_ADA.2026-02-11.csv')
where ProdMfr_Value is not null
""")
prodmodule

┌──────────────────────────────────────────────────────────┐
│                      ProdMfr_Value                       │
│                         varchar                          │
├──────────────────────────────────────────────────────────┤
│ BLUESUN GROUP LIMITED                                    │
│ CW ENERJİ MÜHENDİSLİK TİCARET VE SANAYİ ANONİM ŞİRKETİ   │
│ Energate Enerji Üretim San. A.Ş.                         │
│ GermanSolar USA                                          │
│ Hanersun Technology Co., Ltd.                            │
│ Jackery Inc.                                             │
│ Jetion Solar (China) Co.,Ltd.                            │
│ La Solar LLC                                             │
│ MECEN SOLAR VINA CO.,LTD                                 │
│ PEIMAR                                                   │
│   ·                                                      │
│   ·                                                      │
│   ·                   

In [5]:
cec = duckdb.sql("""
select
    row_number() over () as id,
    ProdMfr_Value,
from (
    select * from prodbattery
    union
    select * from prodmodule
)
""")
cec

┌───────┬────────────────────────────────────────────────────────────┐
│  id   │                       ProdMfr_Value                        │
│ int64 │                          varchar                           │
├───────┼────────────────────────────────────────────────────────────┤
│     1 │ Discover Energy Corp.                                      │
│     2 │ Energizer Solar                                            │
│     3 │ GuangZhou MeriTech Power Co Ltd                            │
│     4 │ Lunna                                                      │
│     5 │ MANGO POWER TECH INC.                                      │
│     6 │ Shenzhen GSL Energy Co., Ltd.                              │
│     7 │ Amerisolar-Worldwide Energy and Manufacturing USA Co., Ltd │
│     8 │ DuCal Solar Technology LLC                                 │
│     9 │ EMMVEE Photovoltaic Power Private Limited                  │
│    10 │ GOLDI SUN PRIVATE LIMITED                                  │
│     

In [6]:
cleaned_cec = duckdb.sql("""
select
    id,
    case ProdMfr_Value
        when 'Darfon Electronics Corp.' then 'Darfon Electronics Corporation'
        when 'Tesla Inc.' then 'Tesla'
        when 'Yotta Energy, Inc.' then 'YOTTA ENERGY INC'
        when 'GuangZhou MeriTech Power Co Ltd      ' then 'GuangZhou MeriTech Power Co Ltd'
        when 'Fortress Power LLC' then 'Fortress Power'
        when 'Holu Hou Energy LLC' then 'Holu Hou'
        when 'Discover Energy Systems Corp.' then 'Discover Energy Corp.'
        when 'Iron Edison Battery Company LLC' then 'Iron Edison LLC'
        when 'KORE Power, Inc.' then 'KORE Power'
        when 'LG Energy Solution, Ltd.' then 'LG Electronics Inc.'
        when 'Lunar Energy, Inc.' then 'Lunar Energy'
        when 'MidNite Solar, Inc.' then 'MidNite Solar Inc'
        when 'NINGBO DEYE ESS TECHNOLOGY CO., LTD' then 'Ningbo Deye Inverter Technology'
        when 'NeoVolta Inc.' then 'NeoVolta'
        when 'Outback Power LLC' then 'OutBack Power'
        when 'PANASONIC CORPORATION OF NORTH AMERICA' then 'Panasonic Corporation of North America'
        when 'Schneider Electric USA, Inc.' then 'Schneider Electric (US)'
        when 'Shenzhen Growatt New Energy Co., Ltd.' then 'Shenzhen Growatt New Energy Technology Co., Ltd'
        when 'SolaX Power Network Technology (Zhe jiang) Co., Ltd.' then 'Solax Power Network Technology (Zhejiang) Co., Ltd'
        when 'SimpliPhi Power, Inc.' then 'Simpliphi Power'
        when 'SolarEdge Technologies Ltd.' then 'SolarEdge Technologies Inc'
        when 'BLUESUN GROUP LIMITED' then 'Bluesun Solar Co.,Ltd'
        when 'Caterpillar Inc.' then 'Caterpillar, Inc.'
        when 'Chint Power Systems America' then 'Chint Power'
        when 'Trina Solar Co.,Ltd' then 'Trina Solar'
        when 'Yingli Energy Development Co Ltd' then 'Yingli Energy (China)'
        when 'SunPower' then 'SunPower Corporation'
        when 'SunStyle' then 'SUNSTYLE AG / LTD'
        when 'GE Energy' then 'General Electric Company'
        when 'Enphase Energy, Inc.' then 'Enphase Energy'
        when 'Freedom Forever Procurement LLC' then 'Freedom Forever'
        when 'Hanwha Qcells (Qidong) Co. Ltd.' then 'Hanwha Q-Cells'
        when 'Hanersun Energy Co., Ltd.' then 'Hanersun Technology Co., Ltd.'
        when 'EMMVEE Photovoltaic Power Private Limited' then 'EMMVEE Photovoltaic Power Private Ltd.'
        when 'GOLDI SUN PRIVATE LIMITED' then 'Goldi Solar Private Limited'
        when 'Hengdian Group DMEGC Magnetics Co., Ltd.' then 'Hengdian Group DMEGC Magnetics'
        when 'Hoymiles Power Electronics Inc.' then 'Hoymiles Converter Technology'
        when 'Luma Resources LLC' then 'LUMA Resources'
        when 'Mundra Solar Energy Limited (Adani)' then 'Mundra Solar Energy Limited'
        when 'Mundra Solar PV Limited (Adani)' then 'Mundra Solar PV Ltd.'
        when 'REC Group' then 'REC Solar'
        when 'RECOM & Co Ltd.' then 'Recom'
        when 'Saatvik Green Energy Limited' then 'Saatvik Green Energy (P) Limited'
        when 'Changzhou EGing Photovoltaic Technology Co., Ltd.' then 'Changzhou Eging Photovoltaic Technology'
        else ProdMfr_Value
    end as ProdMfr_Value,
from cec
""")
print('Unmatched')
duckdb.sql("""
select distinct
    fragment.ProdMfr_Value as CEC_ProdMfr_Value,
    sunspec.ProdMfr_Value as SunSpec_ProdMfr_Value,
    sunspec.EntityCode,
from sunspec join (
    select
        ProdMfr_Value,
        split(ProdMfr_Value, ' ')[1] as F,
    from cleaned_cec
    where ProdMfr_Value not in (select ProdMfr_Value from sunspec)
) as fragment on
    sunspec.ProdMfr_Value ilike format('%{}%', fragment.F)
order by CEC_ProdMfr_Value, SunSpec_ProdMfr_Value
offset 0
""")

Unmatched


┌───────────────────┬───────────────────────┬────────────┐
│ CEC_ProdMfr_Value │ SunSpec_ProdMfr_Value │ EntityCode │
│      varchar      │        varchar        │  varchar   │
├───────────────────┴───────────────────────┴────────────┤
│                         0 rows                         │
└────────────────────────────────────────────────────────┘

In [8]:
duckdb.sql("""
select distinct ProdMfr_Value from cleaned_cec except select ProdMfr_Value from sunspec
""")#.pl().write_csv('unmatched.csv')

┌───────────────┐
│ ProdMfr_Value │
│    varchar    │
├───────────────┤
│    0 rows     │
└───────────────┘

In [9]:
final_entity_codes = duckdb.sql("""
select
    EntityCode,
    sunspec.ProdMfr_Value as ProdMfr_Value__cleaned,
    cec.ProdMfr_Value as ProdMfr_Value__cec,
from cleaned_cec
join sunspec using (ProdMfr_Value)
join cec using (id)
""")
final_entity_codes.show(max_width=120)

┌────────────┬────────────────────────────────────────────┬────────────────────────────────────────────────────────────┐
│ EntityCode │           ProdMfr_Value__cleaned           │                     ProdMfr_Value__cec                     │
│  varchar   │                  varchar                   │                          varchar                           │
├────────────┼────────────────────────────────────────────┼────────────────────────────────────────────────────────────┤
│ DISCE      │ Discover Energy Corp.                      │ Discover Energy Corp.                                      │
│ NRGZR      │ Energizer Solar                            │ Energizer Solar                                            │
│ GZMTP      │ GuangZhou MeriTech Power Co Ltd            │ GuangZhou MeriTech Power Co Ltd                            │
│ LUNNA      │ Lunna                                      │ Lunna                                                      │
│ MANGO      │ MANGO POWER TECH 

In [10]:
final_entity_codes.pl().write_csv('EntityCodes.csv')