<h1>Emotion Detection and Information Retrieval</h1>

<h2>Installing Libraries</h2>

In [55]:
#!pip install duckduckgo-search
#!pip install search-engines

<h2>Defining Relevant Tokens</h2>

In [56]:
cryptocurrency_name = "bitcoin"
cryptocurrency_symbol = "$BTC"
keywords = f"{cryptocurrency_name} {cryptocurrency_symbol} sentiment"
keywords_for_news = f"{cryptocurrency_name} crypto"

<h2>Retrieve Links from Search Engines</h2>

In [57]:
# References:
############# DuckDuckGo
# (GoogleSearch) 1. https://medium.com/@nutanbhogendrasharma/how-to-scrape-google-search-engines-in-python-44770b8eab5
# (DuckDuckGo)   2. https://pypi.org/project/duckduckgo-search/
# (DuckDuckGo vs GoogleSearch) 3. https://medium.com/hackernoon/duckduckgo-vs-google-what-you-need-to-know-869368b08c4f
# (DuckDuckGo vs GoogleSearch) 4. https://www.cnet.com/tech/mobile/in-ios-17-apple-adds-ability-to-change-search-engine-in-safari-private-browsing/

############# Serch engines like Bing or Yahoo
#https://pypi.org/project/search-engines/

<h3> Importing Libraries </h3>

In [58]:
from duckduckgo_search import DDGS
from search_engines import bing_search, yahoo_search
import requests

In [59]:
MAX_SITES_RESULTS = 100
TIMEOUT_SECONDS = 5

<h3>Functions</h3>

In [60]:
def get_results(search_engine, page_url):
    try:
        response = requests.get(page_url, timeout=TIMEOUT_SECONDS)
        response.raise_for_status()  # Raise an exception for HTTP errors
        html = response.text
        results, next_page_url = search_engine.extract_search_results(html, page_url)
        return results, response.url
    except requests.exceptions.RequestException as e:
        print(f"An error occurred during 'get_results' function execution: {e}")
        return [], None

In [61]:
def fetch_search_results(search_engine, query, max_results):
    search_results = []
    search_url = search_engine.get_search_url(query)

    while len(search_results) < max_results and search_url:
        try:
            next_search_results, search_url = get_results(search_engine, search_url)
            for result in next_search_results:
                if "url" in result:
                    search_results.append(result['url'])
        except Exception as e:
            print(f"An error occurred during 'fetch_search_results' function execution: {e}")
            break

    return search_results[:max_results]


<h3>DuckDuckGo for SearchEngine</h3>

In [62]:
ddgs_results = list(map(lambda r: r["href"], DDGS().text(keywords, max_results=MAX_SITES_RESULTS)))
ddgs_results

['https://alternative.me/crypto/fear-and-greed-index/',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://www.newsbtc.com/bitcoin-news/bitcoin-sentiment-close-extreme-fear-why-matters/',
 'https://cryptonews.com/exclusives/top-3-btc-sentiment-analysis-tools-how-they-work-how-to-use-6889.htm',
 'https://cryptonews.com/news/bitcoin-price-prediction-as-btc-smashes-past-27000-resistance-best-time-buy.htm',
 'https://twitter.com/btcsentimentCOM',
 'https://cryptonews.com/news/bitcoin-price-prediction-marathons-record-kraken-deposits-uzbekistan-regulations.htm',
 'https://www.forbes.com/sites/digital-assets/2023/10/03/elon-musk-declares-the-us-dollar-a-scam-amid-fears-of-33-trillion-us-debt-death-spiral-as-the-bitcoin-price-soars/',
 'https://www.wsj.com/market-data/quotes/fx/BTCUSD',
 'https://www.fxstreet.com/cryptocurrencies/news/top-3-price-prediction-bitcoin-ethereum-ripple-btc-could-shatter-28-200-during-next-run-202310041928',
 'https://www.bloomberg.com/news/articles/2023-10-0

<h3>Bing for SearchEngine</h3>

In [63]:
bing_search_results = fetch_search_results(bing_search, keywords, MAX_SITES_RESULTS)
bing_search_results

Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .
Extracted 10 results from page .


['https://alternative.me/crypto/fear-and-greed-index/',
 'https://es.investing.com/crypto/bitcoin/btc-usd-scoreboard',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://coincodex.com/sentiment/',
 'https://www.bittsanalytics.com/sentiment-index/BTC',
 'https://www.newsbtc.com/bitcoin-news/bitcoin-sentiment-close-extreme-fear-why-matters/',
 'https://www.fxstreet.com/cryptocurrencies/news/bitcoin-price-could-revisit-10-000-amid-growing-correlation-with-us-dollar-index-202310042140',
 'https://finbold.com/bitcoin-btc-immutable-imx-and-vc-spectra-spct-sustain-bullish-sentiment-in-crypto-community/',
 'https://www.bloomberg.com/news/articles/2023-10-03/bitcoin-btc-price-dips-from-six-week-high-amid-bond-rout',
 'https://www.dailyfx.com/news/bitcoin-ethereum-jump-btc-usd-eth-usd-price-action-20231002.html',
 'https://alternative.me/crypto/fear-and-greed-index/',
 'https://es.investing.com/crypto/bitcoin/btc-usd-scoreboard',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://coi

<h3>Yahoo for SearchEngine</h3>

In [64]:
yahoo_search_results = fetch_search_results(yahoo_search, keywords, MAX_SITES_RESULTS)
yahoo_search_results

['https://www.msn.com/en-us/money/careers/a-look-back-at-bitcoins-performance-in-2022/vi-AA15P9Zx',
 'https://alternative.me/crypto/fear-and-greed-index/',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://coincodex.com/sentiment/',
 'https://www.cryptoeq.io/sentiment-report/sentiment/bitcoin',
 'https://www.msn.com/en-us/money/careers/a-look-back-at-bitcoins-performance-in-2022/vi-AA15P9Zx',
 'https://alternative.me/crypto/fear-and-greed-index/',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://coincodex.com/sentiment/',
 'https://www.cryptoeq.io/sentiment-report/sentiment/bitcoin',
 'https://www.msn.com/en-us/money/careers/a-look-back-at-bitcoins-performance-in-2022/vi-AA15P9Zx',
 'https://alternative.me/crypto/fear-and-greed-index/',
 'https://www.augmento.ai/bitcoin-sentiment/',
 'https://coincodex.com/sentiment/',
 'https://www.lookintobitcoin.com/charts/bitcoin-fear-and-greed-index/',
 'https://www.msn.com/en-us/money/careers/a-look-back-at-bitcoins-performance-in-

<h3> Mergin all Results </h3>

In [67]:
search_engines_results = set().union(yahoo_search_results, bing_search_results, ddgs_results)
search_engines_results

{'https://alternative.me/crypto/fear-and-greed-index/',
 'https://beincrypto.com/learn/how-to-short-bitcoin/',
 'https://beincrypto.com/price/bitcoin/price-prediction/',
 'https://beincrypto.com/unbelievable-bitcoin-price-predictions-2023-top-analysts/',
 'https://bitcoinist.com/bitcoin-sentiment-returns-neutral-prices-down/',
 'https://cfgi.io/bitcoin-fear-greed-index/',
 'https://coincodex.com/crypto/bitcoin/price-prediction/',
 'https://coincodex.com/sentiment/',
 'https://coinpedia.org/price-prediction/bitcoin-price-prediction/',
 'https://cointelegraph.com/news/bitcoin-investor-sentiment-slumps-to-a-new-low-even-as-macro-and-equities-show-improvement',
 'https://cointelegraph.com/news/bitcoin-will-surge-in-2023-but-be-careful-what-you-wish-for',
 'https://cointelegraph.com/news/btc-price-fear-svb-5-things-bitcoin-this-week',
 'https://cointelegraph.com/news/crypto-fear-and-greed-index-hits-highest-level-since-bitcoin-s-all-time-high',
 'https://cointelegraph.com/news/the-economics

In [68]:
len(search_engines_results)

104

<h2>HTTP Requests for Page Retrieval</h2>

In [65]:
# References:
# (BeautifulSoup) 1. https://ai.plainenglish.io/mastering-web-scraping-and-sentiment-analysis-with-python-and-machine-learning-255d1d6234c5

<h2>Filter Out Irrelevant Pages</h2>

<h2>Parse HTML Content and Extract Text</h2>

<h2>Sentiment Analysis</h2>

<h2>Storage of Sentiment and Pages</h2>