In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import requests
import re
from bs4 import BeautifulSoup

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from word2number import w2n

from datetime import datetime, timedelta

import re
from newspaper import Article
import unidecode
import contractions
import en_core_web_sm
import string

from textblob import TextBlob


In [3]:
data_dir = "./lithium_dataset"
cleaned_data_dir = "./lithium_dataset/cleaned_data"

## Price Data Ingestion + Processing
- Consider lithium future, lithium metal spot, Li2CO3, and LIOH prices are potential regressands
- Clean each by calculating all available log returns with lags 1, 7, and 14 days
- According to Professor Geard, we'll only use data from 2021, which would include on average 500-600 datapoints
- Store them in jerry/lithium_dataset/cleaned_data

In [4]:
def convert_string_to_numeric(s):
    """
    Convert strings like '5.74M' or '532.50K' to numeric values
    """
    s = str(s).replace(',', '')  # Remove commas
    if 'M' in s:
        return float(s.replace('M', '')) * 1e6
    elif 'K' in s:
        return float(s.replace('K', '')) * 1e3
    else:
        return float(s)


def process_price_dataframe(df, price_cols, date_format, lags):

    # Convert columns in price_cols from string to numeric
    for col in price_cols:
        if type(df.loc[0,col]) == str:
            df[col] = df[col].apply(convert_string_to_numeric)
        else: 
            df[col] = df[col].astype(float)

    # Processing Dates
    df['Date'] = df['Date'].str.replace('.', '')
    df['Date'] = pd.to_datetime(df['Date'], format=date_format)
    for lag in lags:
        df[f'lag_{lag}'] = df.loc[:,'Date'] - timedelta(days=lag)
    df.set_index('Date', inplace=True)
    df = df.sort_index(ascending=True)
    for lag in lags:
        df[f'lag_{lag}_price'] = df[f'lag_{lag}'].apply(lambda x: df.loc[x,'Close'] if x in df.index else np.NaN)
        df = df.drop(columns=[f'lag_{lag}'])
        df[f'lag_{lag}_log_return'] = np.log(df['Close'] / df[f'lag_{lag}_price'])

    return df

In [5]:
regressands = ['60C-LTCX', '99C-LTCB', '995C-LTCB', '99MIN-LTMT', '999MIN-LTMT']
lags = [1,7,14]
all_data = {k: None for k in regressands}

all_data['li_future'] = pd.read_excel(f"{data_dir}/Historical Prices (9).xlsx")[['Date', 'Close']]
all_data['li2co3'] = pd.read_csv(f"{data_dir}/Lithium Carbonate (wind database).csv").rename(columns={'Li2CO3 99%': 'Close'})
all_data['lioh'] = pd.read_csv(f"{data_dir}/Lithium Hydroxide (wind database).csv").rename(columns={'LiOH 56.5%': 'Close'})
for reg in regressands:
    all_data[reg] = pd.read_csv(f"{data_dir}/{reg}.csv")[['Date', 'Price']].rename(columns={'Price':'Close'})

for reg in all_data.keys():
    if reg == 'li_future':
        all_data[reg] = process_price_dataframe(all_data[reg], price_cols=['Close'], date_format='%b %d, %Y', lags=lags)
    elif reg in ['li2co3', 'lioh']:
        all_data[reg] = process_price_dataframe(all_data[reg], price_cols=['Close'], date_format='%Y-%m-%d', lags=lags)
    else:
        all_data[reg] = process_price_dataframe(all_data[reg], price_cols=['Close'], date_format='%m/%d/%Y', lags=lags)
    all_data[reg].to_csv(f"{cleaned_data_dir}/{reg}_cleaned.csv")

all_data['li2co3']

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0_level_0,Close,lag_1_price,lag_1_log_return,lag_7_price,lag_7_log_return,lag_14_price,lag_14_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-01-04,23200.0,,,,,,
2005-01-05,23200.0,23200.0,0.000000,,,,
2005-01-06,23200.0,23200.0,0.000000,,,,
2005-01-07,23200.0,23200.0,0.000000,,,,
2005-01-10,23200.0,,,,,,
...,...,...,...,...,...,...,...
2023-05-16,265000.0,258000.0,0.026770,195500.0,0.304169,,
2023-05-17,274000.0,265000.0,0.033398,208500.0,0.273189,,
2023-05-18,290000.0,274000.0,0.056753,225000.0,0.253781,179500.0,0.479706
2023-05-19,292000.0,290000.0,0.006873,242000.0,0.187816,180500.0,0.481023


## News Data Ingestion
- Use lithium_merged.csv as the source of all Lithium-related news
- Only keep news url and tone, which would be used to fetch all text contents --> NLP

In [6]:
news = pd.read_csv(f'{data_dir}/lithium_merged.csv')
news.set_index('DATE', inplace=True)
news.index = pd.to_datetime(news.index, format = '%Y%m%d%H%M%S')
news = news.sort_index(ascending = True)['2017-05-02':]
news = news[['DocumentIdentifier', 'V2Tone']].rename(columns={'DocumentIdentifier': 'url', 'V2Tone': 'tone'})

display(news)

Unnamed: 0_level_0,url,tone
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-05-02 13:30:00,https://www.insiderfinancial.com/lithium-x-ene...,0.818554
2017-05-02 15:30:00,http://www.prnewswire.com/news-releases/hotter...,-1.374570
2017-05-03 06:00:00,http://www.einnews.com/pr_news/379071017/power...,0.000000
2017-05-03 11:00:00,http://www.einnews.com/pr_news/379118179/nemas...,-0.092593
2017-05-03 13:30:00,http://www.finanznachrichten.de/nachrichten-20...,0.207469
...,...,...
2023-05-30 22:00:00,https://www.sandiegoreader.com/news/2023/may/3...,-4.336043
2023-05-31 00:30:00,https://www.havasunews.com/nation/could-the-ru...,-3.505911
2023-05-31 12:00:00,https://www.finanznachrichten.de/nachrichten-2...,-0.213447
2023-05-31 13:00:00,https://www.finanznachrichten.de/nachrichten-2...,3.892028


In [7]:
up_key1=['lithium-ore', 'nickel', 'cobalt','lithium-future','lithium-mining-companies', 'nickel-futures',
        'spodumene', 'spodumenite','lithium-market-share', 'cobalt-oxide', 'nickel-index', 'lithium-ore-reserves',
        'lithium-etf','lithium-index','lithium-concentration', 'industry-grade','battery-grade', 'li2co3', 'li-oh','lioh','lithium-mangnate',
        'lithium-iron-phosphate', 'ternary-materials', 'lithium-refining','lithium-carbonate','lithium-hydroxide','lithium-production']
up_key=up_key1

down_key1=['ev-car','electric-battery','lithium-battery', 'ev-car-subsidy','battery-subsidy', 'ev-company', 'ev-sales', 'ev-tax-credit','battery-tax-credit',
          'storage', 'lfp-battery','lithium-battery-companies','price-of-li-ion-battery','ternary-lithium-battery']
down_key=down_key1

for space in ["+","_","%20"," "]:
    up_key2=[sub.replace("-",space) for sub in up_key1]
    up_key=up_key+up_key2
    down_key2=[sub.replace("-",space) for sub in down_key1]
    down_key=down_key+down_key2

up_key=list(set(up_key))
down_key=list(set(down_key))

In [13]:
from multiprocessing import  Pool

lemmatizer = WordNetLemmatizer()

def process_text(text):

    # expand shortened words, e.g. don't to do not
    text = contractions.fix(text)
    
    # convert to lowercase
    text = text.lower()

    # remove newline characters
    text = re.sub('\n', ' ', text)

    # remove extra whitespace
    text = re.sub('\s+', ' ', text).strip()

    return text

def match_keyword(url, keywords):
    try:
        article = Article(url)
        article.download()
        article.parse()
        article.nlp()
        res = {
            'title': process_text(article.title),
            'text': process_text(article.text),
            'summary': process_text(article.summary),
            'keywords': article.keywords,
        }

        # sentence = TextBlob(res['text'])
        # for kw in keywords:
        #     print(kw, sentence.words.count(kw))
        # print(res['text'])
    
        for item in ['title', 'text', 'summary']:
            for kw in keywords:
                if lemmatizer.lemmatize(kw) in res[item]:
                    return True
        
        for item in res['keywords']:
            for kw in keywords:
                if lemmatizer.lemmatize(kw) in item:
                    return True
            
        return False
    
    except:
        return '1'
    
def add_categories(df):
    df['upstream'] = df['url'].apply(lambda x: match_keyword(x, up_key))
    df['downstream'] = df['url'].apply(lambda x: match_keyword(x, down_key))
    return df

In [15]:
# news1 = news.copy()[:12]
# news1 = add_categories(news1)
# display(news1)
# news1.to_csv(f'{cleaned_data_dir}/news_cat_cleaned.csv')

In [25]:
news_cat = pd.read_csv(f'{cleaned_data_dir}/news_cat_cleaned.csv')
display(news_cat)
print("Upstream count:", len(news_cat[news_cat['upstream'] == 'True']))
print("Downstream count:", len(news_cat[news_cat['downstream'] == 'True']))

Unnamed: 0,DATE,url,tone,upstream,downstream
0,2017-05-02 13:30:00,https://www.insiderfinancial.com/lithium-x-ene...,0.818554,False,False
1,2017-05-02 15:30:00,http://www.prnewswire.com/news-releases/hotter...,-1.374570,True,False
2,2017-05-03 06:00:00,http://www.einnews.com/pr_news/379071017/power...,0.000000,False,True
3,2017-05-03 11:00:00,http://www.einnews.com/pr_news/379118179/nemas...,-0.092593,True,True
4,2017-05-03 13:30:00,http://www.finanznachrichten.de/nachrichten-20...,0.207469,False,False
...,...,...,...,...,...
6681,2023-05-30 22:00:00,https://www.sandiegoreader.com/news/2023/may/3...,-4.336043,False,True
6682,2023-05-31 00:30:00,https://www.havasunews.com/nation/could-the-ru...,-3.505911,False,False
6683,2023-05-31 12:00:00,https://www.finanznachrichten.de/nachrichten-2...,-0.213447,True,False
6684,2023-05-31 13:00:00,https://www.finanznachrichten.de/nachrichten-2...,3.892028,True,True


Upstream count: 2424
Downstream count: 1312


# Entity Recognition

In [1]:
from gdeltdoc import GdeltDoc, Filters, near, repeat

### Lithium Midstream Companies

- 0TWH.LSE: Allkem LLC
- MIN.AX: Mineral Resources Ltd
- VAR1.STU: Varta AG
- 002340.SZ: GEM Co., Ltd
- 300919.SZ: CNGR Advanced Material Co Ltd
- 603799.SS: Zhejiang Huayou Cobalt Co.,Ltd
- 002125.SZ: Xiangtan Electrochemical Technology Co Ltd
- 300073.SZ: Beijing Easpring Material Technology
- 603659.SS: Shanghai Putailai New Energy Technology Co Ltd
- 600884.SS: Ningbo Shanshan Co Ltd
- 300035.SZ: Hunan Zhongke Electric
- 002812.SZ: Yunnan Energy New Material Co Ltd
- 300568.SZ: Shenzhen Senior Technology Material Co Ltd
- 002080.SZ: Sinoma Science & Technology Co Ltd
- 002709.SZ: Guangzhou Tinci Materials Technology Co Ltd
- 002407.SZ: Do-Fluoride New Materials Co Ltd
- 002759.SZ: TONZE NEW ENERGY TECHNOLOGY CO LTD

### Lithium Mining Companies

- SLI.USL: Standard Lithium Ltd
- ALB.US: Albemarle Corporation
- SQM.US: Sociedad Quimica y Minera de Chile SA
- 1772.Hk: Ganfeng Lithium Co Ltd
- 002466.SHE: Tianqi Lithium Corporation
- LTHM.US: Livent Corporation
- PLL.US: Piedmont Lithium Ltd
- LAC.US: Lithium Americas Corp
- GALXF.US: Galaxy Resources Limited
- ENS.US: EnerSys
- VUL.AU: Vulcan Energy Resources Limited
- EMHLF.US: European Metals Holdings Limited
- CRE.V: Critical Elements Corporation

### Lithium Mines

- Greenbushes Lithium Mine - Australia (owned by Talison Lithium, a joint venture between Tianqi Lithium and Albemarle)
- Salar del Hombre Muerto - Argentina (operated by Livent Corporation)
- Salar de Atacama - Chile (operated by SQM - Sociedad Química y Minera de Chile)
- Mt Cattlin Lithium Mine - Australia (owned by Galaxy Resources)
- Jiangxi Ganfeng Lithium Co., Ltd. - China (various mines)
- Bikita Minerals - Zimbabwe (operated by Bikita Minerals)
- Greenbushes Lithium Mine (Kuitpo) - Australia (owned by Mineral Resources Limited)
- Sichuan Tianqi Lithium Industries Inc. - China (various mines)
- Mibra Lithium Mine - Brazil (operated by Sigma Lithium Resources Corporation)
- Mariana Lithium Mine - Argentina (operated by Galaxy Resources)

### Lithium Battery Companies

- PCRFY.US: Japan's Panasonic is one of the world's largest producers of rechargeable lithium-ion batteries. The partnership with Tesla, in particular, has given Panasonic a significant presence in the field of electric vehicle batteries.

- 051910.KO: LG Chem is a South Korean chemical company and one of the world's largest manufacturers of lithium-ion batteries. The company provides batteries for electric vehicles, energy storage and mobility applications.

- 300750.SHE: CATL is a battery maker in China and one of the world's largest makers of batteries for electric vehicles. The company's customers include many international and Chinese electric vehicle manufacturers.

- 006400.KO: Samsung SDI, a subsidiary of South Korea's Samsung Group, is one of the world's leading manufacturers of batteries, whose products are widely used in electric vehicles, energy storage and mobile devices.

- MRAAF.US: Murata is also a producer of lithium-ion batteries, especially with a high market share in the field of small batteries. These batteries are mainly used in various mobile devices such as smartphones and laptops.

- 300014.SHE: The company's products include various types of lithium batteries, such as lithium-ion batteries, lithium-polymer batteries, lithium-iron-phosphorus batteries, etc., which are used in many fields, including consumer electronics, power tools, electric vehicles, energy storage, etc.

- ENR.US: Energizer is one of the largest battery manufacturers in the world, and its product line includes various types of batteries such as alkaline batteries, lithium batteries, rechargeable batteries, etc. Its "Energizer" and "Eveready" brands are well known around the world.

- 1211.HK: 1211.HK is the stock code of BYD Company Ltd. on the Hong Kong Stock Exchange. BYD is a world-leading high-tech company headquartered in Shenzhen, China, mainly engaged in the research and development, production and sales of automobiles, electronic equipment and new energy products.

- SONY.US: Sony is a world-renowned electronics manufacturer and entertainment company with a wide range of businesses, including the production of lithium batteries. In fact, Sony was the first company in the world to commercialize lithium-ion batteries, launching the world's first commercial lithium-ion batteries in 1991.`

### DLE (Direct Lithium Extraction) Companies

- Livent Corporation
- Sunresin New Materials
- Eramet Group
- International Battery Metals
- EnergySource Minerals
- Energy Exploration Technologies (EnergyX)
- Rio Tino
- Albemarle Corporation
- Sociedad Química y Minera (SQM)
- Compass Minerals International
- Standard Lithium
- Lilac Solutions
- Summit Nanotech
- IBC Advanced Technologies
- Controlled Thermal
- Occidential Petroleum
- Vulcan Energy Resources

In [17]:
midstream_list_clean = {
    "002340.SZ": ['Jingmen Gelinmei New Materials', 'Gelinmei New Materials', 'Gelinmei'],
    "300919.SZ": ["cngr advanced material", 'cngr'],
    "VAR1.STU":	['varta ag'],
    "603799.SS": ['Zhejiang Huayou Cobalt', 'Huayou Cobalt'],
    "002125.SZ": ['Xiangtan Electrochemical Technology', 'chinaemd', 'Xiangtan Electrochemical Scientific'],
    "300073.SZ": ['Beijing Easpring Material Tech', 'easpring'],
    "603659.SS": ['Shanghai Putailai New Energy Technology ', 'Shanghai Putailai', 'Putailai', 'Shanghai PTL'],
    "600884.SS": ['Ningbo Shanshan'],
    "300035.SZ": ['Hunan Zhongke Electric', 'zhongkeelectric', 'Zhongke Electric', 'Hunan Zhongke'],
    "002812.SZ": ['Yunnan Energy New Material'],
    "300568.SZ": ['Shenzhen Senior Technology Material', 'senior798.com'],
    "002080.SZ": ['Sinoma Science & Technology', 'Sinoma Science&Technology', 'Sinoma Science and Technology', 'sinomatech.com'],
    "002709.SZ": ['Tinci Materials', 'Guangzhou Tinci Materials Technology', 'Guangzhou Tinci'],
    "002407.SZ": ['Do-Fluoride New Materials', 'Do-Fluoride Chemicals', 'dfdchem.com'],
    "002759.SZ": ['TONZE NEW ENERGY TECHNOLOGY', 'Tonze New Energy', 'Tonze.com'],
    '0TWH.LSE':	['allkem', 'Orocobre', 'Allkem.co'],
    'MIN.AX': ['Mineral Resources Ltd', 'MinRes', 'ASX:MIN', 'Mineral Resources Limited', 'MIN.AU', 'MIN.AX'],
}


mining_co_list_clean = {
    "SLI.US": ["standard lithium"],
    "ALB.US": ["albemarle corp"],
    'SQM.US': ["sociedad quimica y minera de chile sa"],
    '1772.Hk': ["ganfeng lithium"],
    '002466.SHE': ["tianqi lithium"],
    'LTHM.US': ["livent corp"],
    'PLL.US': ["piedmont lithium"],
    'LAC.US': ["lithium americas"],
    'GALXF.US': [],
    'ENS.US': ["enersys"],
    'VUL.AU': ["vulcan energy resources"],
    'EMHLF.US': ["european metals holdings"],
    'CRE.V': ["critical elements"], 
}

lithium_mines_clean = {
    'Greenbushes Lithium Mine': ['greenbushes mine', 'greenbushes lithium', 'greenbushes project'],
    'Salar del Hombre Muerto': ['Salar del Hombre Muerto'],
    'Salar de Atacama': ['Salar de Atacama'],
    'Mt Cattlin Lithium Mine': ['Mt Cattlin Lithium Mine', 'Mt Cattlin Mine'],
    'Bikita Minerals': ['Bikita Minerals'],
    'Mibra Lithium Mine': ['Mibra lithium Mine', 'Mimbra Mine'],
    'Mariana Lithium Mine': ['Mariana Lithium'],
}

battery_co_list_clean = {
    'PCRFY.US': ['Panasonic'],
    '051910.KO': ['LG Chem'],
    '300750.SHE': ['contemporary amperex technology'],
    '006400.KO': ['Samsung'],
    'MRAAF.US': ['Murata'],
    '300014.SHE': ['EVE energy'],
    'ENR.US': ['Energizer'],
    '1211.HK': ['byd'],
    'SONY.US': ['sony'],
}

DLE_clean = {
	"LTHM": ["Livent"],
	"300487.SZ": ["Sunresin New Materials"],
	"": ["Eramet"],
	"IBAT.CN": ["International Battery Metals"],
	"": ["EnergySource Minerals"],
	"": ["EnergyX"],
	"RIO.AX": ["Rio Tinto"],
	"": ["Albemarle"],
	"": ["Sociedad Química y Minera", "SQM"],
	"CMP": ["Compass Minerals International"],
	"": ["Standard Lithium"],
	"": ["Lilac Solutions"],
	"": ["Summit Nanotech"],
	"": ["IBC Advanced Technologies"],
	"": ["Controlled Thermal"],
	"": ["Occidental Petroleum"],
	"VUL.AX": ["Vulcan Energy Resources"],
}


In [18]:
refining = [
    'Tianqi Lithium',
    'Ganfeng Lithium',
    'Mineral Resources Limited',
    'Pilbara Minerals',
    'Allkem Limited',
    'Lithium Americas',
    'Sichuan Yahua Group',
    'Livent',
    'Jiangxi Special Electric Motor',
    'Yongxing Special Materials Technology',
    'Sinomine Resource',
    'Altura Mining Limited',
    'Critical Elements Lithium Corporation',
    'Eramet SA',
    'Galaxy Resources Limited',
    'Infinity Lithium Corporation Limited',
    'Lithium Chile Inc.',
    'Lithium Power International Limited',
    'Lithium South Development Corporation',
    'Lithium Werks',
    'Lithium Energy Products',
    'Lithium Exploration Group',
    'Lithium Corporation',
    'LithiumOre Corp.',
    'Lithium X',
    'Lithium Energi Exploration',
    'Millennial Lithium Corp.',
    'Nemaska Lithium Inc.',
    'Neo Lithium Corp.',
    'North American Lithium Inc.',
    'Piedmont Lithium Limited',
    'Power Metals Corp.',
    'Pure Energy Minerals Limited',
    'QMC Quantum Minerals Corp.',
    'Rare Earth Salts',
    'Sigma Lithium Resources Corporation',
    'Sonora Lithium Ltd.',
]


mining = [
    'Lithium Americas',
    'Sichuan Yahua Group',
    'Allkem Limited',
    'Livent',
    'Mineral Resources Limited',
    'Pilbara Minerals',
    'Jiangxi Special Electric Motor',
    'Yongxing Special Materials Technology',
    'Sinomine Resource',
    'Altura Mining Limited',
    'Critical Elements Lithium Corporation',
    'Eramet SA',
    'Galaxy Resources Limited',
    'Infinity Lithium Corporation Limited',
    'International Lithium Corp.',
    'Lithium Chile Inc.',
    'Lithium South Development Corporation',
    'Lithium Werks',
    'Lithium Corporation',
    'Lithium X',
    'Lithium Energy Japan',
    'Lithium Energy Limited',
    'Lithium Urban Technologies',
    'Lithium Valley Technology',
    'LSC Lithium Corporation',
    'MGX Minerals Inc.',
    'QMC Quantum Minerals Corp.',
    'Sigma Lithium Resources Corporation',
    'Lithium Australia NL',
    'Lithium Ionic Corp.',
    'Arena Minerals Inc.',
    'Rock Tech Lithium Inc.',
    'American Lithium',
    'Wealth Minerals Ltd.',
    'Zadar Ventures Ltd.',
    'Lithium Power International',
    'Bacanora Lithium Ltd',
    'RB Energy',
    'Tianqi Lithium',
    'Ganfeng Lithium',
]

midstream = [
    'Ganfeng Lithium',
    'Livent',
    'General Lithium',
    'Neometals',
    'Eramet',
    'Nemaska Lithium',
    'Galaxy Resources',
    'Orocobre',
    'Piedmont Lithium',
    'Lithium Americas',
    'POSCO',
    'Tianqi Lithium',
    'Beta Hunt',
    'Sayona Mining',
    'Pilbara Minerals',
    'Altura Mining',
    'Kidman Resources',
    'Neo Lithium',
    'Bacanora Lithium',
    'Core Lithium',
    'European Metals',
    'Plateau Energy Metals',
    'Millennial Lithium',
    'Lake Resources',
]

In [20]:
redundant = ['co', 'plc', 'ltd', '&', 'inc', 'company', 'corp', 'corporation', 'limited']
redundant = redundant + [x + '.' for x in redundant] + ['.' + x for x in redundant]

def clean_bot(x):
    """
    The actual robot which is gonna clean the company suffix for every company.
    :type x: str
    :rtype: str
    """
    if x in ['lithium corporation']:
        return x
    x = x.split(' ')
    while len(x) > 0 and x[-1] in redundant:
        del x[-1]
    return ' '.join(x)

def compile_entity_names(dict_list):
    res = []
    for dict in dict_list:
        res = res + sum(list(dict.values()), [])
    res = map(lambda x: clean_bot(x.lower()), res)
    res = list(set(res))
    return sorted(list(set(res)))

def compile_entity_names2(listoflist):
    res = []
    for lst in listoflist:
        res = res + lst
    res = list(map(lambda x: clean_bot(x.lower()), res))
    return sorted(list(set(res)))

def company_in_string(str, mask):
    for m in mask:
        if m.lower() in str:
            return True
    return False

ALL_COMPANY_NAMES1 = compile_entity_names([mining_co_list_clean, lithium_mines_clean, DLE_clean])
ALL_COMPANY_NAMES2 = compile_entity_names2([refining, mining])

ALL_COMPANY_NAMES = sorted(list(set(ALL_COMPANY_NAMES1 + ALL_COMPANY_NAMES2)))
for company in ALL_COMPANY_NAMES:
    # print(f'    OR LOWER(V2Organizations) LIKE "%{company}%"')
    print(company)

print(len(ALL_COMPANY_NAMES))

albemarle
allkem
altura mining
american lithium
arena minerals
bacanora lithium
bikita minerals
compass minerals international
critical elements
critical elements lithium
enersys
eramet sa
european metals holdings
galaxy resources
ganfeng lithium
greenbushes lithium
greenbushes mine
greenbushes project
infinity lithium
international battery metals
international lithium
jiangxi special electric motor
lithium americas
lithium australia nl
lithium chile
lithium corporation
lithium energi exploration
lithium energy
lithium energy japan
lithium energy products
lithium exploration group
lithium ionic
lithium power international
lithium south development
lithium urban technologies
lithium valley technology
lithium werks
lithium x
lithiumore
livent
lsc lithium
mariana lithium
mgx minerals
mibra lithium mine
millennial lithium
mimbra mine
mineral resources
mt cattlin lithium mine
mt cattlin mine
nemaska lithium
neo lithium
north american lithium
occidental petroleum
piedmont lithium
pilbara min