In [1]:
from solr_manager import SolrManager

url = 'http://localhost:8983/solr/'
core = 'articles'
db = 'sqlite:///../data/articles.db'
solr = SolrManager(url, core, db)

Solr container with core articles started successfully.


In [None]:
# run if you want to change an existent schema (in case of error, wait for core to load)
solr.reload_core()

In [None]:
# run if you want to delete all documents in the core (in case of error, wait for core to load)
solr.clear_documents()

In [None]:
# in case of error, wait for core to load
schema = 'schema.json'
solr.submit_schema(schema)
solr.apply_stopwords('stopwords.txt')
solr.apply_synonyms('synonyms.txt')

In [None]:
solr.index_articles()

In [17]:
# search for articles referencing AMD stocks
query = "doc_type:article AND article_text:(advanced micro devices stocks)"
fields = 'article_title, article_text'
params = {'fl': fields, 'rows': 100}
results = solr.solr.search(query, **params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Query: ' + query + '\n')
    file.write('Fields: ' + fields + '\n')
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

results.txt updated


In [6]:
# search for articles related with jira align solutions
query = "{!parent which='doc_type:article'}company_keywords:(jira align)"
fields = 'article_title, article_companies, [child], company_description'
params = {'fl': fields, 'rows': 100}
results = solr.solr.search(query, **params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Query: ' + query + '\n')
    file.write('Fields: ' + fields + '\n')
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        for company in result.get('article_companies'):
            solr.write_text(file, company.get('company_description'))
            file.write('\n')

print('results.txt updated')

results.txt updated


In [8]:
# search for stock downs for Tesla
query = "doc_type:article AND article_text:\"Tesla fell\"~3"
fields = 'article_title, article_text'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Query: ' + query + '\n')
    file.write('Fields: ' + fields + '\n')
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

results.txt updated


In [10]:
# search for stock market predictions
query = 'doc_type:article AND article_text:(stock market predictions)'
fields = 'article_title, article_text'
params = {
    'defType': 'edismax',
    'fl': fields,
    'rows': 100,
    'mm': '70%',
}
results = solr.solr.search(query, **params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Query: ' + query + '\n')
    file.write('Fields: ' + fields + '\n')
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

results.txt updated


In [18]:
# search for stock rises in artificial intelligence
query = "doc_type:article AND article_text:(AI rises)"
fields = 'article_title, article_text'
params = {'fl': fields, 'rows': 10}
results = solr.solr.search(query, **params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Query: ' + query + '\n')
    file.write('Fields: ' + fields + '\n')
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

results.txt updated


In [None]:
# TO DO
# query evaluation

In [19]:
solr.close()

Solr container with core articles stopped successfully.
