In [1]:
import json
from pathlib import Path
from functools import reduce

import pandas as pd

from IPython.display import display

In [2]:
ARTICLES_DIR = "../../data/test/user_study/articles/"
STOCKS_PATH = "../../data/processed/stocks/revolut.2021-07-05.complete.stocks.jsonl"

###  Sorting out paths

In [3]:
def key(path):
    stem = path.stem
    if stem.startswith("oos"):
        return int(stem[-1])
    else:
        return int(stem)

In [4]:
# Sorting out paths
article_paths = sorted(list(Path(ARTICLES_DIR).glob("*.json")), key=key)
article_paths

[PosixPath('../../data/test/user_study/articles/0.json'),
 PosixPath('../../data/test/user_study/articles/oos_0.json'),
 PosixPath('../../data/test/user_study/articles/1.json'),
 PosixPath('../../data/test/user_study/articles/oos_1.json'),
 PosixPath('../../data/test/user_study/articles/oos_2.json'),
 PosixPath('../../data/test/user_study/articles/3.json'),
 PosixPath('../../data/test/user_study/articles/oos_3.json'),
 PosixPath('../../data/test/user_study/articles/4.json'),
 PosixPath('../../data/test/user_study/articles/oos_4.json'),
 PosixPath('../../data/test/user_study/articles/oos_5.json'),
 PosixPath('../../data/test/user_study/articles/5.json'),
 PosixPath('../../data/test/user_study/articles/oos_6.json'),
 PosixPath('../../data/test/user_study/articles/6.json'),
 PosixPath('../../data/test/user_study/articles/oos_7.json'),
 PosixPath('../../data/test/user_study/articles/7.json'),
 PosixPath('../../data/test/user_study/articles/8.json'),
 PosixPath('../../data/test/user_study/a

### Inverting articles dict for relevant articles

In [5]:
def read_json(path):
    with open(path, "r") as fp:
        return json.load(fp)


def read_jsonl(path):
    with open(path, "r") as fp:
        for line in fp:
            yield json.loads(line)

In [6]:
stocks

NameError: name 'stocks' is not defined

In [8]:
articles = dict()
stocks = dict()

for path in article_paths:
    article = read_json(path)
    articles[article["doc_id"]] = article
    for stock in article["stock_matches"]:
        ticker = stock["ticker_symbol"]
        if ticker not in stocks.keys():
            stocks[ticker] = stock
            stocks[ticker]["in_articles"] = list()
        stocks[ticker]["in_articles"].append({k:v for k,v in article.items()
                                              if k in ["doc_id", "title", "url", "summary"]})

len(stocks.keys())

246

In [10]:
stocks["LUMN"]

{'index': 439,
 'stock_name': 'Lumen Technologies',
 'ticker_symbol': 'LUMN',
 'sector': 'Communications',
 'industry': 'Specialty Telecommunications',
 'comment': 'Lumen Technologies (formerly CenturyLink) is an American telecommunications company headquartered in Monroe, Louisiana, that offers communications, network services, security, cloud solutions, voice, and managed services. The company is a member of the S&P 500 index and the Fortune 500. Its communications services include local and long-distance voice, broadband, Multi-Protocol Label Switching (MPLS), private line (including special access), Ethernet, hosting (including cloud hosting and managed hosting), data integration, video, network, public access, Voice over Internet Protocol (VoIP), information technology, and other ancillary services. Lumen also serves global enterprise customers across North America, Latin America, EMEA (Europe, Middle East, and Africa), and Asia Pacific.',
 'score': 0.7538793208249759,
 'in_articl

In [12]:
stocks_df = pd.DataFrame.from_dict(stocks, orient="index")
stock_matches_counts = {k:len(v["in_articles"]) for k, v in stocks.items()}

In [13]:
stocks_df["in_articles_count"] = stocks_df.in_articles.apply(len)

In [14]:
with pd.option_context("display.max_rows", None):
    display(stocks_df.groupby(["sector", "industry"]).in_articles_count.sum())

sector                  industry                        
Commercial Services     Advertising/Marketing Services       1
                        Financial Publishing/Services        1
Communications          Major Telecommunications             1
                        Specialty Telecommunications         2
                        Telecommunications Equipment         2
                        Wireless Telecommunications          1
Consumer Durables       Electronics/Appliances               2
                        Homebuilding                         1
                        Motor Vehicles                      15
                        Recreational Products                1
Consumer Non-Durables   Apparel/Footwear                     4
                        Beverages: Alcoholic                 4
                        Beverages: Non-Alcoholic             1
                        Food: Major Diversified              2
                        Household/Personal Care              

In [15]:
test_articles_top_tickers = reduce(set.union, [set(v["ticker_top"]) for v in articles.values() 
                                               if "ticker_top" in v.keys()])
test_articles_top_tickers

{'AAL',
 'AAPL',
 'ABEV',
 'AMC',
 'AMD',
 'BABA',
 'BBD',
 'C',
 'CCL',
 'CVX',
 'DIDI',
 'EDU',
 'ET',
 'F',
 'FCX',
 'INTC',
 'IQ',
 'ITUB',
 'KSS',
 'M',
 'MRNA',
 'NCLH',
 'NIO',
 'NVDA',
 'PBR',
 'PFE',
 'PLUG',
 'SOFI',
 'T',
 'TME',
 'TSLA',
 'TSM',
 'UBER',
 'VALE',
 'VIPS',
 'X',
 'XOM'}

In [16]:
stocks_matches = dict()
keys = ["stock_name", "ticker_symbol", "sector", "industry", "comment"]
for i, stock_ in enumerate(read_jsonl(STOCKS_PATH)):
    ticker_symbol_ = stock_["ticker_symbol"]
    if ticker_symbol_ in test_articles_top_tickers:
        stocks_matches[ticker_symbol_] = dict()
        stocks_matches[ticker_symbol_]["index"] = i
        stocks_matches[ticker_symbol_].update({k:v for k,v in stock_.items()
                                               if k in keys})
stocks_matches

{'AMD': {'index': 11,
  'stock_name': 'Advanced Micro Devices',
  'ticker_symbol': 'AMD',
  'sector': 'Electronic Technology',
  'industry': 'Semiconductors',
  'comment': "Advanced Micro Devices, Inc. (AMD) is an American multinational semiconductor company based in Santa Clara, California, that develops computer processors and related technologies for business and consumer markets. While it initially manufactured its own processors, the company later outsourced its manufacturing, a practice known as going fabless, after GlobalFoundries was spun off in 2009. AMD's main products include microprocessors, motherboard chipsets, embedded processors and graphics processors for servers, workstations, personal computers and embedded system applications."},
 'BABA': {'index': 23,
  'stock_name': 'Alibaba',
  'ticker_symbol': 'BABA',
  'sector': 'Retail Trade',
  'industry': 'Internet Retail',
  'comment': 'Alibaba Group Holding Limited, also known as Alibaba Group and Alibaba.com, is a Chinese

In [14]:
top_sectors_industries = [(stock["sector"], stock["industry"]) for stock in stocks_matches.values()
                          if stock["ticker_symbol"] in test_articles_top_tickers]

print(len(top_sectors_industries))
print(len(set(top_sectors_industries)))

top_sectors_industries = set(top_sectors_industries)
top_sectors_industries

37
23


{('Communications', 'Major Telecommunications'),
 ('Consumer Durables', 'Motor Vehicles'),
 ('Consumer Non-Durables', 'Beverages: Alcoholic'),
 ('Consumer Services', 'Hotels/Resorts/Cruise lines'),
 ('Consumer Services', 'Movies/Entertainment'),
 ('Consumer Services', 'Other Consumer Services'),
 ('Electronic Technology', 'Electronic Components'),
 ('Electronic Technology', 'Semiconductors'),
 ('Electronic Technology', 'Telecommunications Equipment'),
 ('Energy Minerals', 'Integrated Oil'),
 ('Finance', 'Finance/Rental/Leasing'),
 ('Finance', 'Financial Conglomerates'),
 ('Finance', 'Major Banks'),
 ('Health Technology', 'Biotechnology'),
 ('Health Technology', 'Pharmaceuticals: Major'),
 ('Industrial Services', 'Oil & Gas Pipelines'),
 ('Non-Energy Minerals', 'Other Metals/Minerals'),
 ('Non-Energy Minerals', 'Steel'),
 ('Retail Trade', 'Department Stores'),
 ('Retail Trade', 'Internet Retail'),
 ('Technology Services', 'Internet Software/Services'),
 ('Technology Services', 'Packaged

In [15]:
sectors_industries = stocks_df.groupby(["sector", "industry"])
sect_ind_not_present = set(sectors_industries.indices.keys()) - top_sectors_industries

print(len(sect_ind_not_present))
sect_ind_not_present

51


{('Commercial Services', 'Advertising/Marketing Services'),
 ('Commercial Services', 'Financial Publishing/Services'),
 ('Communications', 'Specialty Telecommunications'),
 ('Communications', 'Telecommunications Equipment'),
 ('Communications', 'Wireless Telecommunications'),
 ('Consumer Durables', 'Electronics/Appliances'),
 ('Consumer Durables', 'Homebuilding'),
 ('Consumer Durables', 'Recreational Products'),
 ('Consumer Non-Durables', 'Apparel/Footwear'),
 ('Consumer Non-Durables', 'Beverages: Non-Alcoholic'),
 ('Consumer Non-Durables', 'Food: Major Diversified'),
 ('Consumer Non-Durables', 'Household/Personal Care'),
 ('Consumer Services', 'Broadcasting'),
 ('Consumer Services', 'Cable/Satellite TV'),
 ('Consumer Services', 'Restaurants'),
 ('Electronic Technology', 'Aerospace & Defense'),
 ('Electronic Technology', 'Computer Communications'),
 ('Electronic Technology', 'Computer Peripherals'),
 ('Electronic Technology', 'Computer Processing Hardware'),
 ('Electronic Technology', 

In [16]:
for key, group in sectors_industries:
    if key in sect_ind_not_present:
        group = group.sort_values("in_articles_count", ascending=False)
        print(key)
        display(group)
        top_match = group.head(1).to_dict(orient="index")
        top_match.pop("in_articles_count", None)
        
        stocks_matches.update(top_match)

len(stocks_matches)

('Commercial Services', 'Advertising/Marketing Services')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
GRPN,326,Groupon,GRPN,Commercial Services,Advertising/Marketing Services,Groupon is an American global e-commerce marke...,0.726922,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1


('Commercial Services', 'Financial Publishing/Services')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
MCO,476,Moody’s,MCO,Commercial Services,Financial Publishing/Services,"Moody's Corporation, often referred to as Mood...",0.675675,"[{'doc_id': 23, 'title': 'Citigroup floats $23...",1


('Communications', 'Specialty Telecommunications')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
LUMN,439,Lumen Technologies,LUMN,Communications,Specialty Telecommunications,Lumen Technologies (formerly CenturyLink) is a...,0.753879,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
VG,734,Vonage Holdings,VG,Communications,Specialty Telecommunications,"Vonage (/ˈvɒnɪdʒ/, legal name Vonage Holdings ...",0.751131,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1


('Communications', 'Telecommunications Equipment')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
CHL,161,China Mobile,CHL,Communications,Telecommunications Equipment,China Mobile is the trade name of both China M...,0.720579,"[{'doc_id': 39, 'title': 'Alibaba stock tumble...",2


('Communications', 'Wireless Telecommunications')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
MBT,469,Mobile TeleSystems PJSC,MBT,Communications,Wireless Telecommunications,"MTS (Russian: Мобильные ТелеСистемы, МТС, ""Mob...",0.691943,"[{'doc_id': 39, 'title': 'Alibaba stock tumble...",1


('Consumer Durables', 'Electronics/Appliances')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
ROKU,596,Roku,ROKU,Consumer Durables,Electronics/Appliances,"Roku, Inc. (/ˈroʊkuː/ ROH-koo) is an American ...",0.735365,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
GPRO,322,GoPro,GPRO,Consumer Durables,Electronics/Appliances,"GoPro, Inc. (marketed as GoPro and sometimes s...",0.687383,"[{'doc_id': 12, 'title': 'New Benchmark Leak R...",1


('Consumer Durables', 'Homebuilding')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
LEN,425,Lennar,LEN,Consumer Durables,Homebuilding,Lennar Corporation is a home construction and ...,0.670334,"[{'doc_id': 23, 'title': 'Citigroup floats $23...",1


('Consumer Durables', 'Recreational Products')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
ATVI,8,Activision Blizzard,ATVI,Consumer Durables,Recreational Products,"Activision Blizzard, Inc. is an American video...",0.692776,"[{'doc_id': 17, 'title': 'In a huge blow, judg...",1


('Consumer Non-Durables', 'Apparel/Footwear')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
VFC,726,VF Corporation,VFC,Consumer Non-Durables,Apparel/Footwear,VF Corporation (formerly Vanity Fair Mills unt...,0.578919,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",2
LEVI,427,Levi Strauss & Co,LEVI,Consumer Non-Durables,Apparel/Footwear,Levi Strauss & Co. (/ˈliːvaɪ ˈstraʊs/) is an A...,0.675879,"[{'doc_id': 'oos_8', 'title': 'How Fast Fashio...",1
HBI,332,Hanesbrands,HBI,Consumer Non-Durables,Apparel/Footwear,Hanesbrands Inc. is an American multinational ...,0.667513,"[{'doc_id': 41, 'title': 'Why Vipshop Stock Cr...",1


('Consumer Non-Durables', 'Beverages: Non-Alcoholic')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
KDP,406,Keurig Dr Pepper,KDP,Consumer Non-Durables,Beverages: Non-Alcoholic,Dr Pepper Snapple Group (also called Dr. Peppe...,0.712538,"[{'doc_id': 5, 'title': 'Better Beer Stock: Am...",1


('Consumer Non-Durables', 'Food: Major Diversified')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
KHC,416,Kraft Heinz,KHC,Consumer Non-Durables,Food: Major Diversified,"The Kraft Heinz Company (KHC), commonly known ...",0.70923,"[{'doc_id': 5, 'title': 'Better Beer Stock: Am...",1
OTLY,520,Oatly,OTLY,Consumer Non-Durables,Food: Major Diversified,Oatly Group AB is a Swedish food company that ...,0.649441,"[{'doc_id': 50, 'title': 'Uber drivers to stag...",1


('Consumer Non-Durables', 'Household/Personal Care')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
COTY,201,Coty,COTY,Consumer Non-Durables,Household/Personal Care,Coty Inc. is an American multinational beauty ...,0.660185,"[{'doc_id': 36, 'title': 'Kohls CEO talks Amaz...",2
EL,270,Estee Lauder,EL,Consumer Non-Durables,Household/Personal Care,The Estée Lauder Companies Inc. (/ˈɛsteɪ ˈlɔːd...,0.697228,"[{'doc_id': 37, 'title': 'Macy’s And Kohl’s De...",1


('Consumer Services', 'Broadcasting')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
VIACA,727,ViacomCBS,VIACA,Consumer Services,Broadcasting,ViacomCBS Inc. is an American diversified mult...,0.737592,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1


('Consumer Services', 'Cable/Satellite TV')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
CMCSA,183,Comcast,CMCSA,Consumer Services,Cable/Satellite TV,Comcast Corporation (formerly registered as Co...,0.747331,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
CABO,126,Cable One,CABO,Consumer Services,Cable/Satellite TV,"Cable One, Inc. (NYSE: CABO) is an American br...",0.741331,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
ATUS,31,Altice USA,ATUS,Consumer Services,Cable/Satellite TV,"Altice USA, Inc., commonly known as Altice, is...",0.732868,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
CHTR,152,Charter Communications,CHTR,Consumer Services,Cable/Satellite TV,"Charter Communications, Inc., is an American t...",0.729749,"[{'doc_id': 0, 'title': 'AT&T CFO Pascal Desro...",1
NFLX,490,Netflix,NFLX,Consumer Services,Cable/Satellite TV,"Netflix, Inc. is an American over-the-top cont...",0.696775,"[{'doc_id': 43, 'title': 'iQIYI romantic drama...",1


('Consumer Services', 'Restaurants')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
YUMC,768,Yum!,YUMC,Consumer Services,Restaurants,"Yum China Holdings, Inc. (Chinese: 百胜中国; pinyi...",0.579082,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1
DRI,215,Darden Restaurants,DRI,Consumer Services,Restaurants,"Darden Restaurants, Inc. is an American multi-...",0.64589,"[{'doc_id': 41, 'title': 'Why Vipshop Stock Cr...",1


('Electronic Technology', 'Aerospace & Defense')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
SPCE,731,Virgin Galactic,SPCE,Electronic Technology,Aerospace & Defense,Virgin Galactic (VG) is an American spacefligh...,0.715752,"[{'doc_id': 'oos_0', 'title': 'Bezos’ Blue Ori...",2
LMT,434,Lockheed Martin,LMT,Electronic Technology,Aerospace & Defense,Lockheed Martin Corporation is an American aer...,0.695841,"[{'doc_id': 'oos_0', 'title': 'Bezos’ Blue Ori...",2
KTOS,417,Kratos,KTOS,Electronic Technology,Aerospace & Defense,"Kratos Defense & Security Solutions, Inc, (Nas...",0.678396,"[{'doc_id': 'oos_0', 'title': 'Bezos’ Blue Ori...",1
NOC,507,Northrop Grumman,NOC,Electronic Technology,Aerospace & Defense,Northrop Grumman Corporation (NYSE: NOC) is an...,0.642617,"[{'doc_id': 'oos_7', 'title': 'Human space tra...",1
HEI,339,Heico,HEI,Electronic Technology,Aerospace & Defense,HEICO Corporation is an aerospace and electron...,0.635839,"[{'doc_id': 'oos_7', 'title': 'Human space tra...",1


('Electronic Technology', 'Computer Communications')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
PANW,538,Palo Alto Networks,PANW,Electronic Technology,Computer Communications,"Palo Alto Networks, Inc. (NYSE: PANW) is an Am...",0.651928,"[{'doc_id': 'oos_9', 'title': 'The privacy par...",1
ANET,66,Arista Networks,ANET,Electronic Technology,Computer Communications,Arista Networks (formerly Arastra) is an Ameri...,0.686179,"[{'doc_id': 12, 'title': 'New Benchmark Leak R...",1


('Electronic Technology', 'Computer Peripherals')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
WDC,747,Western Digital,WDC,Electronic Technology,Computer Peripherals,"Western Digital Corporation (WDC, commonly kno...",0.65673,"[{'doc_id': 'oos_9', 'title': 'The privacy par...",3
NTAP,488,NetApp,NTAP,Electronic Technology,Computer Peripherals,"NetApp, Inc. is an American hybrid cloud data ...",0.660757,"[{'doc_id': 'oos_9', 'title': 'The privacy par...",2
STX,610,Seagate Technology,STX,Electronic Technology,Computer Peripherals,"Seagate Technology Holdings plc, an Irish publ...",0.673174,"[{'doc_id': 11, 'title': 'AMD CEO Sees Chip Sh...",2


('Electronic Technology', 'Computer Processing Hardware')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
HPE,343,Hewlett Packard,HPE,Electronic Technology,Computer Processing Hardware,The Hewlett Packard Enterprise Company (HPE) i...,0.584865,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1


('Electronic Technology', 'Electronic Equipment/Instruments')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
FCEL,303,FuelCell Energy,FCEL,Electronic Technology,Electronic Equipment/Instruments,"FuelCell Energy, Inc. is an American fuel cell...",0.722388,"[{'doc_id': 10, 'title': 'Fresno County Will S...",2


('Energy Minerals', 'Oil & Gas Production')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
HES,342,Hess Corporation,HES,Energy Minerals,Oil & Gas Production,Hess Corporation (formerly Amerada Hess Corpor...,0.654307,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",3
AR,54,Antero Resources Corporation,AR,Energy Minerals,Oil & Gas Production,Antero Resources Corporation is a company enga...,0.645964,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",2
COP,193,ConocoPhillips,COP,Energy Minerals,Oil & Gas Production,ConocoPhillips is an American multinational co...,0.636313,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",2
CVE,145,Cenovus Energy Inc,CVE,Energy Minerals,Oil & Gas Production,Cenovus Energy Inc. (pronounced se-nō-vus) is ...,0.631413,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",2
OVV,258,Encana Corporation,OVV,Energy Minerals,Oil & Gas Production,"Ovintiv Inc., formerly Encana Corporation, is ...",0.737315,"[{'doc_id': 20, 'title': 'Petrobras (PBR) Aims...",2
FANG,225,Diamondback Energy Inc,FANG,Energy Minerals,Oil & Gas Production,Diamondback Energy is a company engaged in hyd...,0.71219,"[{'doc_id': 19, 'title': 'Why Chevron And Exxo...",1
EQT,265,EQT Corporation,EQT,Energy Minerals,Oil & Gas Production,EQT Corporation is an American energy company ...,0.755546,"[{'doc_id': 29, 'title': 'Energy Transfer earn...",1


('Energy Minerals', 'Oil Refining/Marketing')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
MPC,444,Marathon Petroleum,MPC,Energy Minerals,Oil Refining/Marketing,Marathon Petroleum Corporation is an American ...,0.646949,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",2
PBF,544,PBF Energy,PBF,Energy Minerals,Oil Refining/Marketing,PBF Energy Inc. is a petroleum refiner and sup...,0.748783,"[{'doc_id': 20, 'title': 'Petrobras (PBR) Aims...",2
VLO,717,Valero,VLO,Energy Minerals,Oil Refining/Marketing,Valero Energy Corporation is a Fortune 500 int...,0.716426,"[{'doc_id': 19, 'title': 'Why Chevron And Exxo...",1


('Finance', 'Investment Banks/Brokers')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
NDAQ,482,Nasdaq OMX Group,NDAQ,Finance,Investment Banks/Brokers,"Nasdaq, Inc. is an American multinational fina...",0.713604,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",5
EVR,273,Evercore,EVR,Finance,Investment Banks/Brokers,"Evercore Inc., formerly known as Evercore Part...",0.7457,"[{'doc_id': 26, 'title': 'Itaú Unibanco Holdin...",3
NMR,503,Nomura,NMR,Finance,Investment Banks/Brokers,"Nomura Holdings, Inc. (野村ホールディングス株式会社, Nomura ...",0.649057,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",2
CME,175,CME Group (Class A),CME,Finance,Investment Banks/Brokers,"CME Group Inc. (Chicago Mercantile Exchange, C...",0.660388,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",1
GS,320,Goldman Sachs,GS,Finance,Investment Banks/Brokers,"The Goldman Sachs Group, Inc., (/ˈsæks/) is an...",0.701736,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",1
MS,477,Morgan Stanley,MS,Finance,Investment Banks/Brokers,Morgan Stanley is an American multinational in...,0.698567,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",1
IBKR,376,Interactive Brokers,IBKR,Finance,Investment Banks/Brokers,Interactive Brokers LLC (IB) is an American mu...,0.69105,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",1


('Finance', 'Investment Managers')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
KKR,413,KKR & Co,KKR,Finance,Investment Managers,KKR & Co. Inc. (formerly known as Kohlberg Kra...,0.644591,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",2
BLK,106,BlackRock,BLK,Finance,Investment Managers,"BlackRock, Inc. is an American multinational i...",0.733482,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",1
IVZ,385,Invesco,IVZ,Finance,Investment Managers,Invesco Ltd. is an American independent invest...,0.740024,"[{'doc_id': 26, 'title': 'Itaú Unibanco Holdin...",1


('Finance', 'Life/Health Insurance')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
MET,461,Metlife,MET,Finance,Life/Health Insurance,"MetLife, Inc. is the holding corporation for t...",0.671407,"[{'doc_id': 23, 'title': 'Citigroup floats $23...",1


('Finance', 'Real Estate Development')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
HGV,345,Hilton Grand Vacations,HGV,Finance,Real Estate Development,Hilton Grand Vacations Inc. is based in Orland...,0.692052,"[{'doc_id': 51, 'title': 'American Airlines vs...",1


('Finance', 'Real Estate Investment Trusts')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
INVH,387,Invitation Homes,INVH,Finance,Real Estate Investment Trusts,Invitation Homes Inc. is the largest owner of ...,0.717226,"[{'doc_id': 23, 'title': 'Citigroup floats $23...",1
SKT,663,Tanger Factory Outlet Centers,SKT,Finance,Real Estate Investment Trusts,"Tanger Factory Outlet Centers, Inc. (/ˈtæŋər/ ...",0.664196,"[{'doc_id': 36, 'title': 'Kohls CEO talks Amaz...",1


('Finance', 'Regional Banks')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
BSBR,86,Banco Santander (Brasil),BSBR,Finance,Regional Banks,Banco Santander (Brasil) S.A. is the Brazilian...,0.702378,"[{'doc_id': 22, 'title': 'Will SoFi Technologi...",3
BMA,85,Banco Macro,BMA,Finance,Regional Banks,Banco Macro is the second largest domestically...,0.762004,"[{'doc_id': 24, 'title': 'Banco Bradesco S.A. ...",2
ALLY,27,Ally Financial,ALLY,Finance,Regional Banks,Ally Financial is a bank holding company organ...,0.676079,"[{'doc_id': 23, 'title': 'Citigroup floats $23...",1
GGAL,327,Grupo Financiero Galicia,GGAL,Finance,Regional Banks,Grupo Financiero Galicia is a financial servic...,0.721772,"[{'doc_id': 24, 'title': 'Banco Bradesco S.A. ...",1
STT,644,State Street,STT,Finance,Regional Banks,State Street Corporation is an American financ...,0.734127,"[{'doc_id': 46, 'title': 'Does Didi's crash po...",1


('Health Technology', 'Medical Specialties')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
INO,374,Inovio Pharmaceuticals In,INO,Health Technology,Medical Specialties,Inovio Pharmaceuticals is an American biotechn...,0.701502,"[{'doc_id': 27, 'title': 'Moderna CEO Predicts...",2
EW,250,Edwards Lifesciences,EW,Health Technology,Medical Specialties,Edwards Lifesciences is an American medical te...,0.665758,"[{'doc_id': 28, 'title': 'US panel backs COVID...",1


('Health Technology', 'Pharmaceuticals: Generic')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
TEVA,677,Teva Pharmaceutical Industries,TEVA,Health Technology,Pharmaceuticals: Generic,"Teva Pharmaceutical Industries Ltd., also know...",0.651545,"[{'doc_id': 'oos_5', 'title': 'COVID-19: What ...",1
ZTS,776,Zoetis,ZTS,Health Technology,Pharmaceuticals: Generic,Zoetis Inc. (/zō-EH-tis/) is an American drug ...,0.706593,"[{'doc_id': 27, 'title': 'Moderna CEO Predicts...",1


('Industrial Services', 'Environmental Services')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
RSG,586,Republic Services,RSG,Industrial Services,Environmental Services,"Republic Services, Inc is the second largest p...",0.707574,"[{'doc_id': 10, 'title': 'Fresno County Will S...",1


('Industrial Services', 'Oilfield Services/Equipment')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
NOV,483,National Oilwell Varco,NOV,Industrial Services,Oilfield Services/Equipment,NOV Inc. is an American multinational corporat...,0.645415,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",2
HAL,331,Halliburton,HAL,Industrial Services,Oilfield Services/Equipment,Halliburton Company is an American multination...,0.576521,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1


('Non-Energy Minerals', 'Aluminum')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
ARNC,64,Arconic,ARNC,Non-Energy Minerals,Aluminum,Arconic Corporation is an American industrial ...,0.578776,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1


('Non-Energy Minerals', 'Precious Metals')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
IAG,364,Iamgold Corp,IAG,Non-Energy Minerals,Precious Metals,Iamgold Corporation (formerly Iamgold Internat...,0.736481,"[{'doc_id': 32, 'title': 'Freeport-McMoRan - A...",2
AUY,764,Yamana Gold,AUY,Non-Energy Minerals,Precious Metals,Inc. is a Canadian company that owns and opera...,0.723659,"[{'doc_id': 32, 'title': 'Freeport-McMoRan - A...",2
NEM,496,Newmont Mining,NEM,Non-Energy Minerals,Precious Metals,"Newmont, based in Greenwood Village, Colorado,...",0.716865,"[{'doc_id': 32, 'title': 'Freeport-McMoRan - A...",2
KGC,412,Kinross Gold Corporation,KGC,Non-Energy Minerals,Precious Metals,Kinross Gold Corporation is a Canadian-based g...,0.716297,"[{'doc_id': 32, 'title': 'Freeport-McMoRan - A...",2
AG,288,First Majestic Silver Corp,AG,Non-Energy Minerals,Precious Metals,First Majestic Silver Corp. is a Canadian silv...,0.714334,"[{'doc_id': 32, 'title': 'Freeport-McMoRan - A...",2
EGO,252,Eldorado Gold Corporation,EGO,Non-Energy Minerals,Precious Metals,Eldorado Gold Corporation is a Canadian compan...,0.689405,"[{'doc_id': 35, 'title': 'Mining Firm Responsi...",1


('Process Industries', 'Chemicals: Major Diversified')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
ESI,254,Element Solutions,ESI,Process Industries,Chemicals: Major Diversified,Element Solutions Inc (formerly Platform Speci...,0.631285,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",1


('Process Industries', 'Chemicals: Specialty')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
DQ,214,Daqo New Energy Corp,DQ,Process Industries,Chemicals: Specialty,Daqo New Energy Corp. is a Chinese company eng...,0.671857,"[{'doc_id': 11, 'title': 'AMD CEO Sees Chip Sh...",1


('Process Industries', 'Containers/Packaging')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
IP,381,International Paper Company,IP,Process Industries,Containers/Packaging,The International Paper Company (NYSE: IP) is ...,0.640076,"[{'doc_id': 'oos_8', 'title': 'How Fast Fashio...",1


('Process Industries', 'Industrial Specialties')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
OLN,523,Olin,OLN,Process Industries,Industrial Specialties,Olin Corporation is an American manufacturer o...,0.575475,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1


('Producer Manufacturing', 'Auto Parts: OEM')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
LI,430,Li Auto Inc.,LI,Producer Manufacturing,Auto Parts: OEM,"Li Auto Inc., also known as Li Xiang, is a Chi...",0.715414,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",4


('Producer Manufacturing', 'Electrical Products')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
SPWR,652,SunPower,SPWR,Producer Manufacturing,Electrical Products,SunPower Corporation (NASDAQ:SPWR) is an Ameri...,0.689248,"[{'doc_id': 15, 'title': 'Can India’s proposed...",1


('Producer Manufacturing', 'Miscellaneous Manufacturing')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
YETI,765,YETI Holdings Inc,YETI,Producer Manufacturing,Miscellaneous Manufacturing,YETI is an American outdoor manufacturer compa...,0.581991,"[{'doc_id': 'oos_6', 'title': 'When the Taliba...",1


('Retail Trade', 'Apparel/Footwear Retail')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
RL,577,Ralph Lauren,RL,Retail Trade,Apparel/Footwear Retail,Ralph Lauren Corporation is an American public...,0.668209,"[{'doc_id': 'oos_8', 'title': 'How Fast Fashio...",2
URBN,714,Urban Outfitters,URBN,Retail Trade,Apparel/Footwear Retail,"Urban Outfitters, Inc. (URBN) is a multination...",0.643979,"[{'doc_id': 'oos_8', 'title': 'How Fast Fashio...",1
GPS,306,GAP,GPS,Retail Trade,Apparel/Footwear Retail,"The Gap, Inc., commonly known as Gap Inc. or G...",0.712795,"[{'doc_id': 37, 'title': 'Macy’s And Kohl’s De...",1
ANF,6,Abercrombie & Fitch,ANF,Retail Trade,Apparel/Footwear Retail,Abercrombie & Fitch (A&F) is an American lifes...,0.696875,"[{'doc_id': 37, 'title': 'Macy’s And Kohl’s De...",1
CHS,160,Chico’s FAS,CHS,Retail Trade,Apparel/Footwear Retail,Chico's FAS is an American women's clothing an...,0.6932,"[{'doc_id': 37, 'title': 'Macy’s And Kohl’s De...",1


('Retail Trade', 'Drugstore Chains')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
RAD,592,Rite Aid,RAD,Retail Trade,Drugstore Chains,Rite Aid Corporation is an American drugstore ...,0.667155,"[{'doc_id': 36, 'title': 'Kohls CEO talks Amaz...",1


('Retail Trade', 'Electronics/Appliance Stores')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
GME,305,GameStop,GME,Retail Trade,Electronics/Appliance Stores,"GameStop Corp. is an American video game, cons...",0.7149,"[{'doc_id': 'oos_3', 'title': 'The Reddit revo...",1


('Retail Trade', 'Specialty Stores')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
CVNA,140,Carvana Co.,CVNA,Retail Trade,Specialty Stores,"Carvana, based in Tempe, Arizona, is a leading...",0.712581,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",1
CHPT,150,ChargePoint Holdings,CHPT,Retail Trade,Specialty Stores,ChargePoint (formerly Coulomb Technologies) is...,0.693346,"[{'doc_id': 10, 'title': 'Fresno County Will S...",1
ULTA,702,Ulta Beauty,ULTA,Retail Trade,Specialty Stores,"Ulta Beauty, Inc., formerly known as Ulta Salo...",0.68827,"[{'doc_id': 36, 'title': 'Kohls CEO talks Amaz...",1
TGT,666,Target,TGT,Retail Trade,Specialty Stores,Target Corporation is an American retail corpo...,0.697793,"[{'doc_id': 37, 'title': 'Macy’s And Kohl’s De...",1


('Technology Services', 'Data Processing Services')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
DXC,246,DXC Technology,DXC,Technology Services,Data Processing Services,DXC Technology is an American multinational co...,0.648068,"[{'doc_id': 'oos_5', 'title': 'COVID-19: What ...",1
PYPL,543,PayPal,PYPL,Technology Services,Data Processing Services,"PayPal Holdings, Inc. is an American multinati...",0.680379,"[{'doc_id': 17, 'title': 'In a huge blow, judg...",1
FISV,292,Fiserv,FISV,Technology Services,Data Processing Services,"Fiserv, Inc. (/faɪˈsərv/) is an American multi...",0.73917,"[{'doc_id': 26, 'title': 'Itaú Unibanco Holdin...",1


('Technology Services', 'Information Technology Services')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
SSNC,641,SS&C Technologies Holdings,SSNC,Technology Services,Information Technology Services,"SS&C Technologies Holdings, Inc. (known as SS&...",0.699369,"[{'doc_id': 22, 'title': 'Will SoFi Technologi...",2
BOX,114,Box,BOX,Technology Services,Information Technology Services,"Box, Inc. (formerly Box.net), is an American i...",0.69027,"[{'doc_id': 22, 'title': 'Will SoFi Technologi...",2
MOMO,471,Momo,MOMO,Technology Services,Information Technology Services,Momo (Chinese: 陌陌; pinyin: mò mò) is a free so...,0.682821,"[{'doc_id': 39, 'title': 'Alibaba stock tumble...",2
VMW,733,VMware,VMW,Technology Services,Information Technology Services,"VMware, Inc. is an American cloud computing an...",0.654105,"[{'doc_id': 'oos_9', 'title': 'The privacy par...",1
SQSP,640,Squarespace,SQSP,Technology Services,Information Technology Services,"Squarespace, Inc. is an American website build...",0.687037,"[{'doc_id': 22, 'title': 'Will SoFi Technologi...",1
WDAY,757,Workday,WDAY,Technology Services,Information Technology Services,"Workday, Inc., is an American on‑demand (cloud...",0.662156,"[{'doc_id': 36, 'title': 'Kohls CEO talks Amaz...",1


('Transportation', 'Railroads')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
UNP,706,Union Pacific,UNP,Transportation,Railroads,The Union Pacific Corporation (Union Pacific) ...,0.664005,"[{'doc_id': 34, 'title': 'US Steel Output Spik...",1


('Utilities', 'Alternative Power Generation')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
BIP,121,Brookfield Infrastructure Partners,BIP,Utilities,Alternative Power Generation,Brookfield Infrastructure Partners L.P. is a p...,0.707267,"[{'doc_id': 19, 'title': 'Why Chevron And Exxo...",2
RUN,653,Sunrun,RUN,Utilities,Alternative Power Generation,Sunrun Inc. is an American provider of residen...,0.706288,"[{'doc_id': 19, 'title': 'Why Chevron And Exxo...",1


('Utilities', 'Electric Utilities')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
NRG,513,NRG Energy,NRG,Utilities,Electric Utilities,"NRG Energy, Inc. is a large American energy co...",0.671679,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",5
SO,631,Southern Company,SO,Utilities,Electric Utilities,Southern Company is an American gas and electr...,0.664255,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",2
AEP,39,American Electric Power,AEP,Utilities,Electric Utilities,"American Electric Power (AEP), (railcar report...",0.652692,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",2
BEP,122,Brookfield Renewable Partners,BEP,Utilities,Electric Utilities,Brookfield Renewable Partners L.P. is a public...,0.653475,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",2
DTE,242,DTE Energy Co,DTE,Utilities,Electric Utilities,DTE Energy (formerly Detroit Edison until 1996...,0.658557,"[{'doc_id': 1, 'title': 'Ford to build $11.4 b...",1
CNP,147,CenterPoint Energy,CNP,Utilities,Electric Utilities,"CenterPoint Energy, Inc. is an American Fortun...",0.665088,"[{'doc_id': 'oos_1', 'title': 'Oil recovers fr...",1
PCG,550,PG&E Corporation,PCG,Utilities,Electric Utilities,The Pacific Gas and Electric Company (PG&E) is...,0.621199,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",1
PEG,568,Public Service Enterprise Group,PEG,Utilities,Electric Utilities,The Public Service Enterprise Group (PSEG) is ...,0.727483,"[{'doc_id': 29, 'title': 'Energy Transfer earn...",1


('Utilities', 'Gas Distributors')


Unnamed: 0,index,stock_name,ticker_symbol,sector,industry,comment,score,in_articles,in_articles_count
SRE,611,Sempra Energy,SRE,Utilities,Gas Distributors,Sempra is a North American energy infrastructu...,0.625225,"[{'doc_id': 'oos_2', 'title': 'COP26 Climate S...",2


88

In [17]:
for stock in stocks_matches.values():
    print(stock)

{'index': 11, 'stock_name': 'Advanced Micro Devices', 'ticker_symbol': 'AMD', 'sector': 'Electronic Technology', 'industry': 'Semiconductors', 'comment': "Advanced Micro Devices, Inc. (AMD) is an American multinational semiconductor company based in Santa Clara, California, that develops computer processors and related technologies for business and consumer markets. While it initially manufactured its own processors, the company later outsourced its manufacturing, a practice known as going fabless, after GlobalFoundries was spun off in 2009. AMD's main products include microprocessors, motherboard chipsets, embedded processors and graphics processors for servers, workstations, personal computers and embedded system applications."}
{'index': 23, 'stock_name': 'Alibaba', 'ticker_symbol': 'BABA', 'sector': 'Retail Trade', 'industry': 'Internet Retail', 'comment': 'Alibaba Group Holding Limited, also known as Alibaba Group and Alibaba.com, is a Chinese multinational technology company spec

In [19]:
for stock in stocks_matches.values():
    stock["in_articles"] = list()
    stock["not_in_articles"] = list()
    for _, article in articles.items():
        tickers_present = [sm["ticker_symbol"] for sm in article["stock_matches"]]
        article_ = {k:v for k, v in article.items() if k in ["doc_id", "title", "summary"]}
        if stock["ticker_symbol"] in tickers_present:
            stock["in_articles"].append(article_)
        else:
            stock["not_in_articles"].append(article_)


{'index': 11,
 'stock_name': 'Advanced Micro Devices',
 'ticker_symbol': 'AMD',
 'sector': 'Electronic Technology',
 'industry': 'Semiconductors',
 'comment': "Advanced Micro Devices, Inc. (AMD) is an American multinational semiconductor company based in Santa Clara, California, that develops computer processors and related technologies for business and consumer markets. While it initially manufactured its own processors, the company later outsourced its manufacturing, a practice known as going fabless, after GlobalFoundries was spun off in 2009. AMD's main products include microprocessors, motherboard chipsets, embedded processors and graphics processors for servers, workstations, personal computers and embedded system applications.",
 'in_articles': [{'doc_id': 11,
   'title': 'AMD CEO Sees Chip Shortage Easing in 2022',
   'summary': 'AMD CEO Sees Chip Shortage Easing in 2022. The semiconductor shortage will begin to ease in 2022, Advanced Micro Devices CEO Lisa Su said at a session

In [21]:
stocks_matches["AMD"]["in_articles"]

[{'doc_id': 11,
  'title': 'AMD CEO Sees Chip Shortage Easing in 2022',
  'summary': 'AMD CEO Sees Chip Shortage Easing in 2022. The semiconductor shortage will begin to ease in 2022, Advanced Micro Devices CEO Lisa Su said at a session Monday at the Code Conference in Beverly Hills. In an on-stage interview with Code Conference producer Kara Swisher and CNBC tech reporter Jon Fortt, the AMD (ticker: AMD) chief said that the semiconductor industry is investing aggressively in new capacity, with 20 new factories coming online this year and a similar number expected to come online in 2022.'},
 {'doc_id': 12,
  'title': "New Benchmark Leak Reveals Impressive Performance Of Intel's 12900K, 12700K And 12600K Alder Lake Processors",
  'summary': "New Benchmark Leak Reveals Impressive Performance Of Intel's 12900K, 12700K And 12600K Alder Lake Processors. Benchmarks have leaked supposedly revealing the performance of Intel's Core i9-12900K, Core i7-12700K and Core i5-12600K Alder Lake CPUs, c

In [22]:
stocks["AMD"]["in_articles"]

[{'doc_id': 11,
  'title': 'AMD CEO Sees Chip Shortage Easing in 2022',
  'url': 'https://www.barrons.com/articles/amd-ceo-chip-shortage-51632784863',
  'summary': 'AMD CEO Sees Chip Shortage Easing in 2022. The semiconductor shortage will begin to ease in 2022, Advanced Micro Devices CEO Lisa Su said at a session Monday at the Code Conference in Beverly Hills. In an on-stage interview with Code Conference producer Kara Swisher and CNBC tech reporter Jon Fortt, the AMD (ticker: AMD) chief said that the semiconductor industry is investing aggressively in new capacity, with 20 new factories coming online this year and a similar number expected to come online in 2022.'},
 {'doc_id': 12,
  'title': "New Benchmark Leak Reveals Impressive Performance Of Intel's 12900K, 12700K And 12600K Alder Lake Processors",
  'url': 'https://www.forbes.com/sites/antonyleather/2021/09/27/new-benchmark-leak-reveals-impressive-performance-of-intels-12900k-12700k-and-12600k-alder-lake-processors/',
  'summary

### Saving to file

In [31]:
with open("../../data/test/user_study/stocks/stocks.json", "w") as fp:
    json.dump(stocks_matches, fp, ensure_ascii=False, indent="\t")

In [30]:
stocks_matches["T"]

{'index': 72,
 'stock_name': 'AT&T',
 'ticker_symbol': 'T',
 'sector': 'Communications',
 'industry': 'Major Telecommunications',
 'comment': 'AT&T Inc. is an American multinational conglomerate holding company that is Delaware-registered but headquartered at Whitacre Tower in Downtown Dallas, Texas. It is the world\'s largest telecommunications company, it is also the largest provider of mobile telephone services in the U.S. As of 2020, AT&T was ranked 9th on the Fortune 500 rankings of the largest United States corporations, with revenues of $181 billion. The current AT&T reconstitutes much of the former Bell System, and includes four of the seven "Baby Bells" along with the original AT&T Corp., including the long-distance division.',
 'in_articles': [],
 'not_in_articles': [{'doc_id': 0,
   'title': 'AT&T CFO Pascal Desroches Updates Shareholders',
   'summary': 'AT&T CFO Pascal Desroches Updates Shareholders. DALLAS, September 15, 2021--(BUSINESS WIRE)--Pascal Desroches, senior exe