In [1]:
# Relevant libraries
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

In [14]:
# Set up headless Chrome
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager(driver_version="135.0.7049.116").install()), options=options)


# Base search URL
search_url = "https://www.snopes.com/search/?q=GMO#gsc.tab=0&gsc.q=GMO&gsc.page=1"
driver.get(search_url)
time.sleep(3)

def scroll_to_bottom():
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# Extract articles from a page
def extract_search_results():
    articles = driver.find_elements(By.CSS_SELECTOR, '.gsc-webResult.gsc-result')
    result = []
    for article in articles:
        try:
            title_el = article.find_element(By.CSS_SELECTOR, ".gs-title a")
            title = title_el.text
            url = title_el.get_attribute("href")
            result.append({"Title": title, "URL": url})
        except:
            continue
    return result

# Extract claim and rating from an article
def extract_article_details(article_url):
    driver.get(article_url)
    time.sleep(3)

    claim, rating = "", ""

    try:
        # Extract Rating
        rating_el = driver.find_element(By.CSS_SELECTOR, "div.rating_wrapper a.rating_link_wrapper")
        rating = rating_el.text.strip()
    except Exception as e:
        print(f"Rating not found: {e}")

    try:
        # Strategy 1: Search claim in article-content with keywords
        paragraphs = driver.find_elements(By.CSS_SELECTOR, "article#article-content p")
        for p in paragraphs:
            text = p.text.strip()
            if any(kw in text.lower() for kw in ["claimed", "alleged", "says", "states", "\"", "according to"]):
                claim = text
                break
        
        # Strategy 2: Fallback to article_main if claim is still empty
        if not claim:
            main_paragraphs = driver.find_elements(By.CSS_SELECTOR, "main#article_main p")
            for p in main_paragraphs:
                text = p.text.strip()
                if len(text) > 50 and not text.lower().startswith("advertisement"):
                    claim = text
                    break

    except Exception as e:
        print(f"Claim extraction failed: {e}")

    return claim, rating
# Main scraping loop through all pages
all_articles = []
page = 1
while True:
    print(f"Scraping search page {page}")
    scroll_to_bottom()
    articles = extract_search_results()
    all_articles.extend(articles)
    
    # Click next if available
    try:
        page += 1
        next_button = driver.find_element(By.XPATH, f"//div[@class='gsc-cursor-page'][text()='{page}']")
        next_button.click()
        time.sleep(3)
    except:
        break  # No more pages

# Now visit each article and get claim + rating
final_data = []
for idx, item in enumerate(all_articles):
    print(f"Processing article {idx + 1} of {len(all_articles)}: {item['Title']}")
    try:
        claim, rating = extract_article_details(item['URL'])
        final_data.append({
            "Title": item['Title'],
            "Claim": claim,
            "Rating": rating,
            "URL": item['URL']
        })
    except Exception as e:
        print(f"Failed to process: {item['URL']} - {e}")

driver.quit()
# Save to CSV
df = pd.DataFrame(final_data)
df.to_csv("snopes_gmo_articles.csv", index=False)
print("✅ Data saved to snopes_gmo_articles.csv")

Scraping search page 1
Scraping search page 2
Scraping search page 3
Scraping search page 4
Scraping search page 5
Scraping search page 6
Processing article 1 of 81: Putin Tells Russian Security Council That GMOs and Vaccines Are a ...
Processing article 2 of 81: Were 37 Million Bees Killed by a Large GMO Corn Field?
Processing article 3 of 81: Chipotle Sabotaged by GMO Activists?
Processing article 4 of 81: Is This Really How to Tell the Difference Between Organic and GMO ...
Processing article 5 of 81: Can Farmers Sue Monsanto Over Invasive GMO Crops?
Processing article 6 of 81: Zika Virus Caused by GMO Mosquitoes?
Processing article 7 of 81: Has Vaccine mRNA Entered the Food Supply via GMO Plants or ...
Processing article 8 of 81: Kraft Macaroni & Cheese Made with GMO Wheat?
Rating not found: Message: no such element: Unable to locate element: {"method":"css selector","selector":"div.rating_wrapper a.rating_link_wrapper"}
  (Session info: chrome=135.0.7049.116); For documentation on

In [3]:
file_path = "snopes_gmo_articles.csv"

# Read the CSV file
df = pd.read_csv(file_path)
# Display the first few rows of the DataFrame
print(df.head())

                                               Title  \
0  Putin Tells Russian Security Council That GMOs...   
1  Were 37 Million Bees Killed by a Large GMO Cor...   
2               Chipotle Sabotaged by GMO Activists?   
3  Is This Really How to Tell the Difference Betw...   
4  Can Farmers Sue Monsanto Over Invasive GMO Crops?   

                                               Claim  \
0  The statement attributed to Putin claimed West...   
1  "Once the corn started to get planted our bees...   
2  In late 2015, Chipotle outlets in multiple sta...   
3  The image shown above — a "Public Service Anno...   
4  Under the headline "New bill would allow farme...   

                        Rating  \
0  Unproven\nAbout this rating   
1  Unproven\nAbout this rating   
2  Unproven\nAbout this rating   
3     False\nAbout this rating   
4  Outdated\nAbout this rating   

                                                 URL  
0  https://www.snopes.com/fact-check/putin-gmo-ev...  
1  https://

In [7]:
# Drop rows with NaN values
df.dropna(inplace=True)
# checking for duplicate values in the 'Claim' column
duplicates = df[df.duplicated(['Claim'], keep=False)]
df['Cleaned_Rating'] = df['Rating'].str.split('\n').str[0]
# Display the duplicate rows
print("Duplicate Claims:")
print(duplicates[['Claim', 'Title']])
# drop the rating column
df.drop(columns=['Rating'], inplace=True)
# Rename the columns
df.rename(columns={'Cleaned_Rating': 'Rating'}, inplace=True)
# Remove leading and trailing whitespace from the 'Claim' column
df['Claim'] = df['Claim'].str.strip()
# Remove leading and trailing whitespace from the 'Title' column
df['Title'] = df['Title'].str.strip()
# Remove leading and trailing whitespace from the 'Rating' column
df['Rating'] = df['Rating'].str.strip()
# Remove leading and trailing whitespace from the 'URL' column
df['URL'] = df['URL'].str.strip()
# Remove rows with empty 'Claim' or 'Title' values
df = df[(df['Claim'] != '') & (df['Title'] != '')]
# Save the DataFrame to a new CSV file
df.to_csv("snopes_gmo_articles_cleaned.csv", index=False)
print("✅ Cleaned data saved to snopes_gmo_articles_cleaned.csv")
# Display the cleaned DataFrame
print("Cleaned Data:")
print(df.head())

Duplicate Claims:
Empty DataFrame
Columns: [Claim, Title]
Index: []
✅ Cleaned data saved to snopes_gmo_articles_cleaned.csv
Cleaned Data:
                                               Title  \
0  Putin Tells Russian Security Council That GMOs...   
1  Were 37 Million Bees Killed by a Large GMO Cor...   
2               Chipotle Sabotaged by GMO Activists?   
3  Is This Really How to Tell the Difference Betw...   
4  Can Farmers Sue Monsanto Over Invasive GMO Crops?   

                                               Claim  \
0  The statement attributed to Putin claimed West...   
1  "Once the corn started to get planted our bees...   
2  In late 2015, Chipotle outlets in multiple sta...   
3  The image shown above — a "Public Service Anno...   
4  Under the headline "New bill would allow farme...   

                                                 URL    Rating  
0  https://www.snopes.com/fact-check/putin-gmo-ev...  Unproven  
1  https://www.snopes.com/fact-check/bees-gmo-cor...  Unpr