In [None]:
from solr_manager import SolrManager

url = 'http://localhost:8983/solr/'
core = 'articles'
db = 'sqlite:///../data/articles.db'
solr = SolrManager(url, core, db)

In [None]:
# run if you want to change an existent schema (in case of error, wait for core to load)
solr.reload_core()

In [None]:
# run if you want to delete all documents in the core (in case of error, wait for core to load)
solr.clear_documents()

In [None]:
# in case of error, wait for core to load
schema = 'schema.json'
solr.submit_schema(schema)
solr.apply_stopwords('stopwords.txt')
solr.apply_synonyms('synonyms.txt')

In [None]:
solr.index_articles()

In [None]:
# search for articles and their companies 
query = 'doc_type:article'
fields = 'id, article_companies, [child], company_tag'
params = {'fl': fields, 'rows': 100}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results.docs:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Companies:', result.get('article_companies'))

In [None]:
# search articles where a specific company is in the text
query = "article_text:(Tesla)"
fields = 'id, article_title, article_date'
params = {
    'defType': 'edismax',
    'fl': fields, 
    'rows': 10,
    'bf': 'recip(ms(NOW,article_date),1.65e-9,1,1)'
}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))

In [None]:
# search articles where a specific company is in the text
query = "article_text:(Tesla company)"
fields = 'id, article_title, article_date, article_link'
params = {
    'defType': 'edismax',
    'fl': fields, 
    'rows': 10,
    'mm': '75%',
}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))
    print('Article Link:', result.get('article_link'))

In [None]:
# search stock events on a specific day
query = "doc_type:article"
fields = 'id, article_title, article_date'
filters = "article_date:[2023-05-30T00:00:00Z TO 2023-05-30T23:59:59Z]"
params = {'fl': fields, 'fq': filters, 'rows': 10}

results = solr.solr.search(query, **params)
print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Date:', result.get('article_date'))

In [None]:
# search for companies of a specific sector
query = "company_keywords:semiconductor"
fields = 'company_name, company_description'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Company Name:', result.get('company_name'))
    print('Company Description:', result.get('company_description'))

In [None]:
# search for articles with companies of a specific sector
query = "{!parent which='doc_type:article'}company_keywords:cybersecurity"
fields = 'id, article_title, article_companies, [child], company_tag'
params = {'fl': fields, 'rows': 100}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Title:', result.get('article_title'))
    print('Article Companies:', result.get('article_companies'))


In [None]:
# search for stock ups for Tesla
query = "doc_type:article AND article_text:\"Tesla gained\"~3"
fields = 'id, article_text'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Text:', result.get('article_text'))

In [None]:
# search for stock downs for Tesla
query = "doc_type:article AND article_text:\"Tesla fell\"~3"
fields = 'id, article_text'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

print("Number of results:", len(results))
for result in results:
    print('--------------------------------------------------')
    print('Article Id:', result.get('id'))
    print('Article Text:', result.get('article_text'))

In [None]:
#trending companies

query = "doc_type:article"

# Define a facet on the "company_name" field in the child documents
facet_field = "company_name"
filters = "article_date:[2023-01-01T00:00:00Z TO *]"
# Specify the number of companies you want to retrieve (adjust as needed)
facet_limit = 10

# Specify the fields you want to retrieve from the child documents
fields = 'id, company_name, article_companies, [child]'



# Build the parameters dictionary
params = {'fl': fields, 'fq': filters, 'facet': 'true', 'facet.field': facet_field, 'facet.limit': facet_limit}

# Execute the query
results = solr.solr.search(query, **params)
print(results.facets)
#Print the facet results


In [None]:
solr.close()