In [3]:
import spacy

#!pip install -U pandas
import pandas as pd

Collecting pandas
  Downloading pandas-1.4.3-cp39-cp39-win_amd64.whl (10.6 MB)
Collecting pytz>=2020.1
  Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.4.3 pytz-2022.1


In [42]:
stocks_df = pd.read_csv('data/stocks.tsv', sep = '\t')
stocks_df.head()

index_df = pd.read_csv("data/indexes.tsv", sep = '\t')
index_df.head()

exchanges_df = pd.read_csv("data/stock_exchanges.tsv", sep = '\t')
exchanges_df.head()

Unnamed: 0,BloombergExchangeCode,BloombergCompositeCode,Country,Description,ISOMIC,Google Prefix,EODcode,NumStocks
0,AF,AR,Argentina,Bolsa de Comercio de Buenos Aires,XBUE,,BA,12
1,AO,AU,Australia,National Stock Exchange of Australia,XNEC,,,1
2,AT,AU,Australia,Asx - All Markets,XASX,ASX,AU,875
3,AV,,Austria,Wiener Boerse Ag,XWBO,VIE,VI,38
4,BI,,Bahrain,Bahrain Bourse,XBAH,,,4


In [44]:
#Change them to lists
symbols = stocks_df.Symbol.tolist()
companies = stocks_df.CompanyName.tolist()
indexes = index_df.IndexName.tolist()
index_symbols = index_df.IndexSymbol.tolist()
exchanges = exchanges_df.ISOMIC.tolist()+exchanges_df["Google Prefix"].tolist()+exchanges_df.Description.tolist()

In [45]:
stops = ["two"] #list of false positives basically
nlp = spacy.blank("en")
ruler = nlp.add_pipe("entity_ruler")

#Create a long list of patterns of STOCK:Symbol and COMPANY:CompanyName
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
patterns = []
for symbol in symbols:
    patterns.append({"label": "STOCK", "pattern": symbol})
    for l in letters: #Now able to capture things like "APPL.O
        patterns.append({"label": "STOCK", "pattern": symbol+f".{l}"})

for company in companies:
    if company not in stops:
        patterns.append({"label": "COMPANY", "pattern": company})

for index in indexes:
    patterns.append({"label": "INDEX", "pattern": index})
    words = index.split()
    patterns.append({"label": "INDEX", "pattern": " ".join(words[:2])}) #Case where 'S&P500' matches to S&P500 index

for index in index_symbols:
    patterns.append({"label": "INDEX", "pattern": index})

for e in exchanges:
    patterns.append({"label": "STOCK_EXCHANGE", "pattern": e})

ruler.add_patterns(patterns)

In [20]:
#source: https://www.reuters.com/business/futures-rise-after-biden-xi-call-oil-bounce-2021-09-10/
text = '''
Sept 10 (Reuters) - Wall Street's main indexes were subdued on Friday as signs of higher inflation and a drop in Apple shares following an unfavorable court ruling offset expectations of an easing in U.S.-China tensions.

Data earlier in the day showed U.S. producer prices rose solidly in August, leading to the biggest annual gain in nearly 11 years and indicating that high inflation was likely to persist as the pandemic pressures supply chains. read more .

"Today's data on wholesale prices should be eye-opening for the Federal Reserve, as inflation pressures still don't appear to be easing and will likely continue to be felt by the consumer in the coming months," said Charlie Ripley, senior investment strategist for Allianz Investment Management.

Apple Inc (AAPL.O) fell 2.7% following a U.S. court ruling in "Fortnite" creator Epic Games' antitrust lawsuit that stroke down some of the iPhone maker's restrictions on how developers can collect payments in apps.


Sponsored by Advertising Partner
Sponsored Video
Watch to learn more
Report ad
Apple shares were set for their worst single-day fall since May this year, weighing on the Nasdaq (.IXIC) and the S&P 500 technology sub-index (.SPLRCT), which fell 0.1%.

Sentiment also took a hit from Cleveland Federal Reserve Bank President Loretta Mester's comments that she would still like the central bank to begin tapering asset purchases this year despite the weak August jobs report. read more

Investors have paid keen attention to the labor market and data hinting towards higher inflation recently for hints on a timeline for the Federal Reserve to begin tapering its massive bond-buying program.

The S&P 500 has risen around 19% so far this year on support from dovish central bank policies and re-opening optimism, but concerns over rising coronavirus infections and accelerating inflation have lately stalled its advance.


Report ad
The three main U.S. indexes got some support on Friday from news of a phone call between U.S. President Joe Biden and Chinese leader Xi Jinping that was taken as a positive sign which could bring a thaw in ties between the world's two most important trading partners.

At 1:01 p.m. ET, the Dow Jones Industrial Average (.DJI) was up 12.24 points, or 0.04%, at 34,891.62, the S&P 500 (.SPX) was up 2.83 points, or 0.06%, at 4,496.11, and the Nasdaq Composite (.IXIC) was up 12.85 points, or 0.08%, at 15,261.11.

Six of the eleven S&P 500 sub-indexes gained, with energy (.SPNY), materials (.SPLRCM) and consumer discretionary stocks (.SPLRCD) rising the most.

U.S.-listed Chinese e-commerce companies Alibaba and JD.com , music streaming company Tencent Music (TME.N) and electric car maker Nio Inc (NIO.N) all gained between 0.7% and 1.4%


Report ad
Grocer Kroger Co (KR.N) dropped 7.1% after it said global supply chain disruptions, freight costs, discounts and wastage would hit its profit margins.

Advancing issues outnumbered decliners by a 1.12-to-1 ratio on the NYSE and by a 1.02-to-1 ratio on the Nasdaq.

The S&P index recorded 14 new 52-week highs and three new lows, while the Nasdaq recorded 49 new highs and 38 new lows.
'''

In [46]:
doc = nlp(text)
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple COMPANY
Apple COMPANY
AAPL.O STOCK
Apple COMPANY
Nasdaq COMPANY
S&P 500 INDEX
S&P 500 INDEX
ET STOCK
Dow Jones Industrial Average INDEX
S&P 500 INDEX
Nasdaq COMPANY
S&P 500 INDEX
JD.com COMPANY
TME.N STOCK
NIO.N STOCK
Kroger COMPANY
KR.N STOCK
NYSE STOCK_EXCHANGE
Nasdaq COMPANY
Nasdaq COMPANY


In [47]:
from spacy import displacy
colors = {'company': "#85C1E9", "stock": "#ff6961", "index": "#00FF00", "stock_exchange":"#800080"}
options = {"ents": ['company', 'stock', 'index', 'stock_exchange'], "colors": colors}

displacy.render(doc, style='ent',options=options)