In [1]:
from solr_manager import SolrManager

url = 'http://localhost:8983/solr/'
core = 'articles'
db = 'sqlite:///../data/articles.db'
solr = SolrManager(url, core, db)

Container pri-solr-1  Created
Container pri-solr-1  Starting
Container pri-solr-1  Started


Solr container with core articles started successfully.


In [2]:
# run if you want to change an existent schema (in case of error, wait for core to load)
solr.reload_core()

Core articles unloaded successfully.


Container pri-solr-1  Stopping
Container pri-solr-1  Stopping
Container pri-solr-1  Stopped
Container pri-solr-1  Removing
Container pri-solr-1  Removed
Network pri_default  Removing
Network pri_default  Removed


Solr container with core articles stopped successfully.


Network pri_default  Creating
Network pri_default  Created
Container pri-solr-1  Creating
Container pri-solr-1  Created
Container pri-solr-1  Starting
Container pri-solr-1  Started


Solr container with core articles started successfully.


In [3]:
# run if you want to delete all documents in the core (in case of error, wait for core to load)
solr.clear_documents()

Data cleared successfully.


In [4]:
# in case of error, wait for core to load
schema = 'schema.json'
solr.submit_schema(schema)
solr.apply_stopwords('stopwords.txt')
solr.apply_synonyms('synonyms.txt')

Schema uploaded successfully.
Stopwords stored successfully.
Synonyms stored successfully.


In [5]:
solr.index_articles()

Indexing article 1...
Indexing article 2...
Indexing article 3...
Indexing article 4...
Indexing article 5...
Indexing article 6...
Indexing article 7...
Indexing article 8...
Indexing article 9...
Indexing article 10...
Articles indexed successfully.


In [6]:
# search for articles and their companies 
query = 'doc_type:article'
fields = 'id, article_companies, [child], company_tag'
params = {'fl': fields, 'rows': 100}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results.docs:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Companies:', result.get('article_companies'))

Number of results: 10
--------------------------------------------------
Article Id: 1
Article Companies: [{'company_tag': 'FTNT', 'id': '1/article_companies#0'}, {'company_tag': 'FANG', 'id': '1/article_companies#1'}, {'company_tag': 'AVGO', 'id': '1/article_companies#2'}, {'company_tag': 'NVDA', 'id': '1/article_companies#3'}, {'company_tag': 'TSLA', 'id': '1/article_companies#4'}]
--------------------------------------------------
Article Id: 2
Article Companies: [{'company_tag': 'MU', 'id': '2/article_companies#0'}, {'company_tag': 'AMD', 'id': '2/article_companies#1'}, {'company_tag': 'MRNA', 'id': '2/article_companies#2'}, {'company_tag': 'FTNT', 'id': '2/article_companies#3'}, {'company_tag': 'GEHC', 'id': '2/article_companies#4'}, {'company_tag': 'TSLA', 'id': '2/article_companies#5'}]
--------------------------------------------------
Article Id: 3
Article Companies: [{'company_tag': 'WBA', 'id': '3/article_companies#0'}, {'company_tag': 'AVGO', 'id': '3/article_companies#1'},

In [7]:
# search articles where a specific company is in the text
query = "article_text:(Tesla)"
fields = 'id, article_title, article_date'
params = {
    'defType': 'edismax',
    'fl': fields, 
    'rows': 10,
    'bf': 'recip(ms(NOW,article_date),1.65e-9,1,1)'
}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))

Number of results: 0


In [8]:
# search articles where a specific company is in the text
query = "article_text:(Tesla company)"
fields = 'id, article_title, article_date, article_link'
params = {
    'defType': 'edismax',
    'fl': fields, 
    'rows': 10,
    'mm': '75%',
}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))
    print('Article Link:', result.get('article_link'))

Number of results: 10
--------------------------------------------------
Article Id: 4
Article Title: Stocks making the biggest moves after hours: Snap, Electronic Arts, Advanced Micro Devices and more
Article Date: 2023-01-31T22:07:19Z
Article Link: https://www.cnbc.com/2023/01/31/stocks-moving-big-after-hours-snap-ea-amd-wdc.html
--------------------------------------------------
Article Id: 5
Article Title: Stocks making the biggest moves midday: GM, McDonald's, UPS, Pulte, International Paper and more
Article Date: 2023-01-31T17:07:23Z
Article Link: https://www.cnbc.com/2023/01/31/stocks-making-the-biggest-moves-midday-pentair-ups-and-more.html
--------------------------------------------------
Article Id: 2
Article Title: Stocks making the biggest moves midday: Carvana, Colgate-Palmolive, Tesla and more
Article Date: 2023-01-30T17:28:44Z
Article Link: https://www.cnbc.com/2023/01/30/stocks-making-the-biggest-moves-midday-carvana-amd-and-more.html
----------------------------------

In [9]:
# search stock events on a specific day
query = "doc_type:article"
fields = 'id, article_title, article_date'
filters = "article_date:[2023-05-30T00:00:00Z TO 2023-05-30T23:59:59Z]"
params = {'fl': fields, 'fq': filters, 'rows': 10}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))

Number of results: 3
--------------------------------------------------
Article Id: 1
Article Title: Stocks making the biggest moves midday: Nvidia, Tesla, Coinbase and more 
Article Date: 2023-05-30T16:37:55Z
--------------------------------------------------
Article Id: 7
Article Title: Stocks making the biggest premarket moves: ChargePoint, Ford, Nvidia, Tesla and more
Article Date: 2023-05-30T12:02:11Z
--------------------------------------------------
Article Id: 8
Article Title: Stocks making the biggest moves after hours: Ambarella, Box, HP and more
Article Date: 2023-05-30T21:10:25Z


In [10]:
# search for companies of a specific sector
query = "company_keywords:semiconductor"
fields = 'company_name, company_description'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Company Name:', result.get('company_name'))
    print('Company Description:', result.get('company_description'))

Number of results: 7
--------------------------------------------------
Company Name: KLA Corp
Company Description: KLA Corporation is a supplier of process control and yield management solutions and services for the semiconductor and related electronics industries. The Company offers a portfolio of inspection and metrology products, and related services, software and other offerings, supports research and development (R&D) and manufacturing of integrated circuits (ICs), wafers and reticles. Its segments include Semiconductor Process Control, Specialty Semiconductor Process and PCB, Display and Component Inspection. Semiconductor Process Control segment provides comprehensive portfolio of inspection, metrology and software products, and related services, helping IC, wafer, reticle/mask and chemical/materials manufacturers achieve target yields throughout the entire fabrication process, from R&D to final volume production. The Specialty Semiconductor Process segment develops and sells a

In [11]:
# search for articles with companies of a specific sector
query = "{!parent which='doc_type:article'}company_keywords:cybersecurity"
fields = 'id, article_title, article_companies, [child], company_tag'
params = {'fl': fields, 'rows': 100}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Companies:', result.get('article_companies'))


Number of results: 3
--------------------------------------------------
Article Id: 1
Article Title: Stocks making the biggest moves midday: Nvidia, Tesla, Coinbase and more 
Article Companies: [{'company_tag': 'FTNT', 'id': '1/article_companies#0'}, {'company_tag': 'FANG', 'id': '1/article_companies#1'}, {'company_tag': 'AVGO', 'id': '1/article_companies#2'}, {'company_tag': 'NVDA', 'id': '1/article_companies#3'}, {'company_tag': 'TSLA', 'id': '1/article_companies#4'}]
--------------------------------------------------
Article Id: 2
Article Title: Stocks making the biggest moves midday: Carvana, Colgate-Palmolive, Tesla and more
Article Companies: [{'company_tag': 'MU', 'id': '2/article_companies#0'}, {'company_tag': 'AMD', 'id': '2/article_companies#1'}, {'company_tag': 'MRNA', 'id': '2/article_companies#2'}, {'company_tag': 'FTNT', 'id': '2/article_companies#3'}, {'company_tag': 'GEHC', 'id': '2/article_companies#4'}, {'company_tag': 'TSLA', 'id': '2/article_companies#5'}]
---------

In [18]:
# search for stock ups for Tesla
query = "doc_type:article AND article_text:(Tesla gained)"
fields = 'id, article_text'
params = {'fl': fields, 'rows': 10, 'hl': 'true', 'hl.fl':'article_text'}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Text:', result.get('article_text'))

Number of results: 5
--------------------------------------------------
Article Id: 7
Article Text: Check out the companies making some of the biggest moves in premarket trading. ChargePoint - Shares of the electric vehicle charging station company jumped 5% premarket after Bank of America upgraded the stock to buy. The Wall Street firm called ChargePoint a best-in-class way to play the EV charging theme, highlighting the company's scale and diversity as keys to sustainable growth. Ford Motor - Shares of the automaker rose more than 2% after Jefferies upgraded the stock and said the automaker has a strong plan and management that can help it close the gap with rivals. The analyst also raised his price target on the shares, implying they could rally more than 30%. Tesla - Shares gained 3% premarket. On Monday, Reuters reported a private jet used by CEO Elon Musk arrived in China. Musk is expected to meet with senior Chinese officials and visit Tesla's Shanghai plant, Reuters said. Last 

In [19]:
# search for stock downs for Tesla
query = "doc_type:article AND article_text:(Tesla fell)"
fields = 'id, article_text'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Text:', result.get('article_text'))

Number of results: 8
--------------------------------------------------
Article Id: 2
Article Text: Check out the companies making headlines in midday trading Monday. Colgate-Palmolive - Shares gained 2. 8% after Morgan Stanley upgraded the stock to overweight from equal weight and named it the top pick in the household and personal care industry. The firm said the stock was at a good price point after a recent selloff. Tesla - Shares dropped 2. 8% after Berenberg lowered its earnings estimate for Tesla by around 25% for 2023 following the company's price cuts for its electric vehicles. However, the firm upgraded the stock to buy from hold. GE HealthCare Technologies - The stock rose 4% after the company reported its first earnings after being spun off as a public company from General Electric. GE Healthcare's revenue came in at $4. 9 billion, an 8% year-over-year increase, and its fourth-quarter adjusted EPS was $1. 31. Ford Motor Company - Shares fell nearly 1. 4% after the company a

In [22]:
#search for companies that provide software solutions
query = 'doc_type:company AND company_description:(software)'
fields = 'id, company_description'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Company id:', result.get('id'))
    print('company_description:', result.get('company_description'))



Number of results: 10
--------------------------------------------------
Company id: 1/article_companies#0
company_description: Fortinet, Inc. is engaged in offering cybersecurity and networking solutions. The Company provides cyber security solutions to a variety of organizations, including enterprises, communication service providers, security service providers, government organizations and small businesses. The Companyâ€™s product offerings consist of its core platform network security products and its enhanced platform technology products, which are offered in a range of forms spanning physical appliances, virtual appliances, software, and cloud-hosted services. Its Secure Networking solutions enable the convergence of networking and security across all edges to provide next generation firewall, software-defined wide area network (SD- WAN), and secure access service edge (SASE). Its Zero Trust Access solutions enable customers to know and control who and what is on their network. I

In [15]:
#solr.close()