In [None]:
from solr_manager import SolrManager

url = 'http://localhost:8983/solr/'
core = 'articles'
db = 'sqlite:///../data/articles.db'
solr = SolrManager(url, core, db)

In [None]:
# run if you want to change an existent schema (in case of error, wait for core to load)
solr.reload_core()

In [None]:
# run if you want to delete all documents in the core (in case of error, wait for core to load)
solr.clear_documents()

In [None]:
# in case of error, wait for core to load
schema = 'schema.json'
solr.submit_schema(schema)
solr.apply_stopwords('stopwords.txt')
solr.apply_synonyms('synonyms.txt')

In [None]:
solr.index_articles(100)

In [None]:
# TO DO: add field/term/independent boosts, data range filter and wildcards/fuziness to the queries below
# TO DO: evaluate queries and also compare results with/without certain filters or query techniques

In [None]:
# search for articles referencing AMD stocks
params = {
    'defType': 'edismax',
    'qf': 'article_title article_text',
    'q': 'advanced micro devices stocks',
    'rows': 100
}
results = solr.solr.search(**params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

In [None]:
# search for articles related with jira align solutions
params = {
    'q': "{!parent which='doc_type:article'} company_keywords:(jira align)",
    'fl': 'article_title article_companies [child] company_description',
    'rows': 100
}
results = solr.solr.search(**params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        for company in result.get('article_companies'):
            solr.write_text(file, company.get('company_description'))
            file.write('\n')

print('results.txt updated')

In [None]:
# search for stock declines for Tesla
params = {
    'defType': 'edismax',
    'qf': 'article_title article_text',
    'q': '\"Tesla decline\"~3', 
    'rows': 100
}
results = solr.solr.search(**params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

In [None]:
# search for stock market predictions
params = {
    'defType': 'edismax',
    'qf': 'article_title article_text',
    'q': 'stock market predictions',
    'rows': 100,
    'mm': '70%'
}
results = solr.solr.search(**params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

In [None]:
# search for stock rises in artificial intelligence
params = {
    'defType': 'edismax',
    'qf': 'article_title article_text',
    'q': 'AI rises',
    'rows': 100
}
results = solr.solr.search(**params)

with open('results.txt', 'w', encoding='utf-8') as file:
    file.write('Parameters: ' + str(params) + '\n\n')
    file.write('--------------------------------------------------\n')
    file.write(f'Number of results: {len(results)}\n')
    for result in results:
        file.write('--------------------------------------------------\n\n')
        file.write(str(result.get('article_title')) + '\n\n')
        solr.write_text(file, result.get('article_text'))

print('results.txt updated')

In [None]:
solr.close()