In [29]:
!pip install textblob requests beautifulsoup4 python-dotenv pandas tqdm termcolor

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


### Importing the required libraries

In [30]:
import os
import bs4
import requests
import textblob
import pandas as pd
from tqdm import tqdm
from dotenv import main
from termcolor import colored

main.load_dotenv()

True

### CONSTANTS

In [31]:
NEWS_API = os.getenv("NEWS_API")

API_URL = "https://newsapi.org/v2/everything"
KEYWORD = "el salvador, politics"
SEARCHIN = "title,description,content"
LANGUAGE = ""

### Important functions

In [32]:
def fetch_urls() -> list:
    """
    Fetches news articles from the News API
    """

    params = {
    'q': KEYWORD,
    'apiKey': NEWS_API,
    'keyword': KEYWORD,
    'searchIn': SEARCHIN,
    'language': LANGUAGE,
    }

    response = requests.get(API_URL, params=params).json()
    urls = [article["url"] for article in response['articles']]
    
    return urls
    
def scrape_data(url: str) -> str:
    """
    Scrapes the text data from the given URL
    """
    
    try:
        response = requests.get(url)
        soup = bs4.BeautifulSoup(response.text, "html.parser")

        text_data = soup.get_text()

        text_data = text_data.replace("\n", " ")    
        text_data = text_data.replace("\t", " ")
        text_data = text_data.replace("\r", " ")
        
        return text_data
    
    except Exception as e:
        return None

def analyze_text(text: str) -> tuple:
    """
    Analyzes the sentiment of the given text
    """
    
    blob = textblob.TextBlob(text)
    polarity_str = blob.sentiment.polarity
    polarity_val = polarity_str
    nouns = str(" | ".join(list(blob.noun_phrases)))
    

    if polarity_str > 0:
        polarity_str = "Positive"
    elif polarity_str < 0:
        polarity_str = "Negative" 
    else:
        polarity_str = "Neutral"
        
    return text, len(text), polarity_str, round(blob.sentiment.subjectivity, 4), round(polarity_val, 4)

In [33]:
urls = fetch_urls()

extracted_data = []
for url in tqdm(urls):
    try:
        text_data = scrape_data(url)
        
        if text_data is not None:
            text_analysis = analyze_text(text_data)
            
            if text_analysis[3] or text_analysis[4] != 0:
                extracted_data.append([url, *text_analysis])
                print(colored(f"success: {url}", "green"))
        else:
            print(colored(f"failed: {url}", "red"))
            continue
        
    except Exception as e:
        print(colored(f"failed: {e}", "red"))
        continue
    
print(colored("List generated successfully", "green"))

  3%|▎         | 1/34 [00:12<06:51, 12.46s/it]

[32msuccess: https://mattlakeman.org/2024/03/30/notes-on-el-salvador/[0m


  9%|▉         | 3/34 [00:14<01:46,  3.45s/it]

[32msuccess: https://reason.com/2024/04/09/milei-sets-aside-dollarization-plan-due-to-politics/[0m


 12%|█▏        | 4/34 [00:14<01:10,  2.35s/it]

[32msuccess: https://www.thenation.com/article/world/jonathan-blitzer-immigration-policy/[0m


 15%|█▍        | 5/34 [00:15<00:52,  1.80s/it]

[32msuccess: https://www.nybooks.com/online/2024/04/07/where-next-for-mexico/[0m


 18%|█▊        | 6/34 [00:16<00:46,  1.66s/it]

[32msuccess: https://www.cbsnews.com/baltimore/news/baltimore-key-bridge-collapse-salvaging-port-of-baltimore-reopen/[0m


 21%|██        | 7/34 [00:18<00:40,  1.52s/it]

[32msuccess: https://ambcrypto.com/?p=379888[0m


 24%|██▎       | 8/34 [00:18<00:31,  1.21s/it]

[32msuccess: https://www.rawstory.com/two-bodies-recovered-from-submerged-truck-in-baltimore-bridge-collapse/[0m


 26%|██▋       | 9/34 [00:20<00:33,  1.33s/it]

[32msuccess: https://www.naturalnews.com/2024-03-28-michigan-state-pays-residents-monthly-house-illegals.html[0m


 29%|██▉       | 10/34 [00:21<00:34,  1.42s/it]

[32msuccess: https://www.cbsnews.com/baltimore/news/francis-scott-key-bridge-collapse-baltimore-victim-search/[0m


 32%|███▏      | 11/34 [00:23<00:35,  1.53s/it]

[32msuccess: https://www.cbsnews.com/baltimore/news/baltimore-key-bridge-port-collapse-search-victims-debris-removal/[0m


 38%|███▊      | 13/34 [00:27<00:35,  1.70s/it]

[32msuccess: https://www.coindesk.com/consensus-magazine/2024/04/01/12-future-bitcoin-scenarios-from-bullish-to-bearish/[0m


 41%|████      | 14/34 [00:28<00:33,  1.69s/it]

[32msuccess: https://www.globalresearch.ca/imperial-fruit-bananas-costs-climate-change/5853577[0m


 50%|█████     | 17/34 [00:33<00:25,  1.48s/it]

[32msuccess: https://reason.com/2024/03/27/is-javier-milei-making-argentina-great-again/[0m


 53%|█████▎    | 18/34 [00:34<00:24,  1.55s/it]

[32msuccess: https://freerepublic.com/focus/f-news/4230278/posts[0m


 59%|█████▉    | 20/34 [00:36<00:17,  1.22s/it]

[32msuccess: https://www.americanthinker.com/blog/2024/04/giving_context_and_reality_to_the_phrase_african_american.html[0m


 62%|██████▏   | 21/34 [00:37<00:14,  1.15s/it]

[31mfailed: https://removed.com[0m


 65%|██████▍   | 22/34 [00:42<00:25,  2.12s/it]

[32msuccess: https://www.globalresearch.ca/the-war-on-yugoslavia-25-years-later-natos-blueprint-for-the-conflicts-of-the-twenty-first-century/5852815[0m


 68%|██████▊   | 23/34 [00:43<00:21,  1.99s/it]

[32msuccess: https://aquariumdrunkard.com/2024/03/19/late-cold-war-style-in-songwriting-1978-1984/[0m


 74%|███████▎  | 25/34 [00:46<00:16,  1.79s/it]

[32msuccess: https://freerepublic.com/focus/f-news/4230080/posts[0m


 76%|███████▋  | 26/34 [00:57<00:35,  4.44s/it]

[32msuccess: https://www.washingtonpost.com/politics/2024/04/08/trump-immigration-rhetoric/[0m


 82%|████████▏ | 28/34 [00:58<00:14,  2.43s/it]

[32msuccess: https://www.americanthinker.com/blog/2024/04/biden_s_open_border_policy_is_compromising_our_national_security.html[0m


 85%|████████▌ | 29/34 [00:59<00:10,  2.12s/it]

[32msuccess: https://mondoweiss.net/2024/04/dr-ghassan-abu-sittah-tomorrow-is-a-palestinian-day/[0m


 88%|████████▊ | 30/34 [01:09<00:17,  4.32s/it]

[32msuccess: https://www.washingtonpost.com/opinions/2024/04/05/trump-religion-secularism-authoritarian-populism/[0m


 91%|█████████ | 31/34 [01:11<00:10,  3.55s/it]

[32msuccess: https://www.idolator.com/7920170/the-oldest-civilizations-in-history[0m


 94%|█████████▍| 32/34 [01:12<00:05,  2.96s/it]

[32msuccess: https://www.foxnews.com/politics/maryland-dem-forget-border-claims-gop-talking-point-call-legal-immigration[0m


 97%|█████████▋| 33/34 [01:14<00:02,  2.51s/it]

[32msuccess: https://www.breitbart.com/politics/2024/03/27/migration-advocates-use-bridge-deaths-to-push-for-more-migration/[0m


100%|██████████| 34/34 [01:17<00:00,  2.27s/it]

[32msuccess: https://reason.com/podcast/2024/04/03/why-palantir-cofounder-joe-lonsdale-left-california-for-texas/[0m
[32mList generated successfully[0m





In [34]:
columns = ["url", "content", "length", "sentiment", "subjectivity", "polarity"]
df  = pd.DataFrame(extracted_data, columns=columns)
df

Unnamed: 0,url,content,length,sentiment,subjectivity,polarity
0,https://mattlakeman.org/2024/03/30/notes-on-el...,Notes on El Salvador – Matt Lakeman ...,191071,Positive,0.4459,0.0773
1,https://reason.com/2024/04/09/milei-sets-aside...,Milei Sets Aside Dollariz...,6619,Positive,0.3766,0.0907
2,https://www.thenation.com/article/world/jonath...,The Brutal Cycle of US...,21777,Positive,0.417,0.038
3,https://www.nybooks.com/online/2024/04/07/wher...,Where Next for Mexico? | ...,37448,Positive,0.3865,0.0621
4,https://www.cbsnews.com/baltimore/news/baltimo...,Timeline to reopen Port of Baltimore unc...,9599,Positive,0.5049,0.0675
5,https://ambcrypto.com/?p=379888,'Just because they embraced Bi...,7148,Positive,0.4167,0.1158
6,https://www.rawstory.com/two-bodies-recovered-...,Two bodies recovered from submerged truck in B...,7576,Positive,0.3964,0.0187
7,https://www.naturalnews.com/2024-03-28-michiga...,Great Replacement Theory is already a real...,12263,Positive,0.4382,0.1347
8,https://www.cbsnews.com/baltimore/news/francis...,Two bodies recovered from vehicle underw...,10380,Positive,0.4835,0.0484
9,https://www.cbsnews.com/baltimore/news/baltimo...,Search efforts paused after 2 bodies fou...,10953,Positive,0.4577,0.0789
