In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd

# Initialize Selenium WebDriver (adjust the path to your chromedriver)
service = Service('path_to_chromedriver')
driver = webdriver.Chrome(service=service)

# Function to scrape news using Selenium
def scrape_news_with_selenium():
    driver.get('https://www.bbc.com/news')
    
    # Let the page load and get the page source
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # List to store scraped news data
    news_data = []

    # Scrape the first 5 articles
    articles = soup.find_all('a', class_='gs-c-promo-heading', limit=5)
    
    if not articles:
        print("No articles found. The CSS selectors may have changed.")
        return []

    for article in articles:
        title = article.get_text()
        link = 'https://www.bbc.com' + article['href']
        
        # Visit each article link
        driver.get(link)
        article_soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # Scrape article text
        paragraphs = article_soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        
        news_data.append({
            'title': title,
            'text': text,
            'label': 1 # Assign a label later or classify
        })

    return news_data

# Scrape the news using Selenium
scraped_news = scrape_news_with_selenium()

# Close the browser after scraping
driver.quit()

# Process scraped data (same as before)
if scraped_news:
    scraped_news_df = pd.DataFrame(scraped_news)
    data = pd.read_csv('WELFake_Dataset.csv')
    data.drop('Unnamed: 0', axis=1, inplace=True)
    data = pd.concat([data, scraped_news_df], ignore_index=True)
    data.to_csv('WELFake_Dataset_updated_with_scraped_news.csv', index=False)
    print(data.tail(10))
else:
    print("No news articles were scraped.")


NoSuchDriverException: Message: Unable to obtain driver for chrome; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the existing dataset
data = pd.read_csv('WELFake_Dataset.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)

# Split the dataset into training and testing data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Save the split datasets to CSV files (optional)
train_data.to_csv('welfake_training_data.csv', index=False)
test_data.to_csv('welfake_testing_data.csv', index=False)

# Display the shapes of the training and testing datasets
print(f'Training data shape: {train_data.shape}')
print(f'Testing data shape: {test_data.shape}')


Training data shape: (57707, 3)
Testing data shape: (14427, 3)


In [None]:
import pandas as pd

# New data entry
new_entry = {
    'title': 'Biden Announces: ‘2.4 billion more to Ukraine’ but ‘no more aid for Hurricane Helene.',
    'text': "Hurricane Helene killed more than 100 people in six states as it left a trail of destruction across the southeastern U.S. President Joe Biden briefly talked about the storm Sept. 29 in remarks to reporters at Dover Air Force base, and his response to one question has since been mischaracterized across social media. 'Biden announces ‘$2.4 billion more to Ukraine' but 'No more aid for Hurricane Helene,' a Sept. 30 Facebook post said. We found other posts claiming that Biden said no more aid would be coming for states affected by the storm, with some tying it to aid for Ukraine. These Facebook and Instagram posts were flagged as part of Meta’s efforts to combat false news and misinformation on its News Feed. A closer look at his Biden’s full remarks show that he did not rule out future aid for states ravaged by the storm: Reporter: 'Do you have any words for the victims of the hurricane?' Biden: 'Yes, it’s tragic. As a matter of fact, we’re trying to get the exact number. My FEMA advisor’s on the ground in Florida right now. There’s a distinction between the number that FEMA’s used and the ones that are used by the locals. So, it really is amazing. You saw the photographs. It’s stunning. It really is.' Reporter: 'Such a wide area.' Biden: 'We’ve given them all … everything that we have on the ground ahead of time. So, we’re working hard.' Reporter question: 'Are there any more resources the federal government could be giving them?' Biden: 'No, we've given them … We have preplanned a significant amount of it, even though they didn't ask for it yet … hadn't asked for it yet.' What aid has Biden’s administration provided so far? The same day Biden made those comments to reporters, the White House released a fact sheet detailing the administration’s relief efforts. It said Biden directed FEMA Administrator Deanne Criswell to 'determine what more can be done' to speed aid to those having trouble accessing help. The fact sheet describes actions the Biden administration is already taking, including: Approving major disaster declarations for Florida and North Carolina, which lets people immediately access money and resources. FEMA aid could include upfront funds to pay for essential items such as food and water. Money may also be available to repair homes or find temporary shelter. On Sept. 30, Biden said in a news conference that South Carolina had also been granted a major disaster declaration. Declaring emergency declarations for Florida, North Carolina, Tennessee, South Carolina, Georgia, Virginia and Alabama. That declaration allows FEMA to provide direct support to states for emergency activities.",
    'label': 0
}

# Convert to DataFrame and append
new_entry_df = pd.DataFrame([new_entry])
train_data = pd.concat([train_data, new_entry_df], ignore_index=True)

# Save the updated dataset back to the CSV
train_data.to_csv('welfake_training_data.csv', index=False)
train_data.tail(5)

Unnamed: 0,title,text,label
57730,Up to 100 rockets fired at northern Israel thi...,Sirens warning of incoming rocket fire have so...,1
57731,IDF issues new evacuation warning for southern...,The Israel Defense Forces has issued a new war...,1
57732,JD Vance actually sent a letter last year to t...,"U.S. Sen. Elizabeth Warren, D-Mass., said she ...",1
57733,Tim Walz “changed the Minnesota flag so it cou...,A day before Vice President Kamala Harris chos...,0
57734,Biden Announces: ‘2.4 billion more to Ukraine’...,Hurricane Helene killed more than 100 people i...,0


In [5]:
test_data.tail(5)

Unnamed: 0,title,text,label
59237,U.S. Senate Republican tax proposal diverges f...,WASHINGTON (Reuters) - U.S. Senate Republicans...,0
69083,U.S. presidential election drives record ratin...,(Reuters) - The 2016 election cycle had been a...,0
55138,MUST SEE RESULTS OF NEW POLL Asking Americans ...,Somehow we don t think this is what the Queen ...,1
32672,US Coalition Airstrike on Syrian Army in Al-Ta...,"Bouthaina Shaaban, Political & Media Advisor t...",1
64614,Trump Just Praised Republicans For Stripping ...,Senate Republicans voted to repeal the Afforda...,1
