In [2]:
import pandas as pd

In [3]:
news_df = pd.read_csv("newsapi_articles.csv")

guardian_df = pd.read_csv("guardian_articles.csv")

In [4]:
news_df.head()

Unnamed: 0,Source,Title,Description,Content,URL,PublishedAt
0,Hackaday,Plastic On The Mind: Assessing the Risks From ...,Perhaps one of the clearest indications of the...,Perhaps one of the clearest indications of the...,https://hackaday.com/2025/02/12/plastic-on-the...,2025-02-12T15:00:32Z
1,BBC News,Scenic loch becomes magnet for Scotland's plas...,"One of Scotland's biggest plastic problems, th...",Kevin KeaneBBC Scotland's environment correspo...,https://www.bbc.com/news/articles/cp8256l20l0o,2025-02-03T00:53:58Z
2,Yahoo Entertainment,Trump rolls back standards for water-using app...,WASHINGTON (Reuters) -President Donald Trump o...,WASHINGTON (Reuters) -President Donald Trump o...,https://ca.finance.yahoo.com/news/trump-roll-b...,2025-02-11T12:20:50Z
3,Scientific American,Why Aren’t We Losing Our Minds Over the Plasti...,New research on microplastics in brains remind...,Our brains are full of plastic.\r\nThis was th...,https://www.scientificamerican.com/article/why...,2025-02-14T12:00:00Z
4,Buzzfeed,36 Products That’ll Help Get You Through TSA F...,You can pick up your Security Pro merit badge ...,Cadence is an AAPI woman-owned small business ...,https://www.buzzfeed.com/elizabethlilly/produc...,2025-02-04T13:31:02Z


In [5]:
guardian_df.head()

Unnamed: 0,PublishedAt,Section,Title,Content,URL
0,2025-02-12T10:00:21Z,Environment,My no-plastic life: I tried to cut out single-...,The scale of plastic pollution is so terrifyin...,https://www.theguardian.com/p/xxg97n
1,2025-02-17T17:04:42Z,Environment,"Reducing plastic use is possible, but it’s not...","It’s difficult to avoid plastic, but Emma Bedd...",https://www.theguardian.com/p/xxhf3h
2,2024-11-27T13:14:27Z,Environment,Plastics lobbyists make up biggest group at vi...,Record numbers of plastic industry lobbyists a...,https://www.theguardian.com/p/xx2f3z
3,2025-02-12T17:08:57Z,Environment,A challenge for readers to go plastic-free,I can fully endorse Emma Beddington’s experien...,https://www.theguardian.com/p/xxh8mp
4,2024-12-03T12:34:17Z,Environment,Coca-Cola accused of quietly dropping its 25% ...,Coca-Cola has been accused of quietly abandoni...,https://www.theguardian.com/p/xx3jt3


In [6]:
news_df["Text"] = news_df["Title"].fillna("") + " " + news_df["Content"].fillna("")
guardian_df["Text"] = guardian_df["Title"].fillna("") + " " + guardian_df["Content"].fillna("")

news_df = news_df[["Text"]]
guardian_df = guardian_df[["Text"]]

merged_df = pd.concat([news_df, guardian_df], ignore_index=True)

merged_df.drop_duplicates(subset=["Text"], inplace=True)

merged_df.dropna(subset=["Text"], inplace=True)

merged_df.to_csv("merged_news_data.csv", index=False)

merged_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3448 entries, 0 to 3447
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    3448 non-null   object
dtypes: object(1)
memory usage: 27.1+ KB


In [7]:
merged_df

Unnamed: 0,Text
0,Plastic On The Mind: Assessing the Risks From ...
1,Scenic loch becomes magnet for Scotland's plas...
2,Trump rolls back standards for water-using app...
3,Why Aren’t We Losing Our Minds Over the Plasti...
4,36 Products That’ll Help Get You Through TSA F...
...,...
3443,The new green revolution in crop genetics It i...
3444,You can't deny it - nuclear weapons are a stup...
3445,US study shows GM crops 'no better' A two-year...
3446,Monsanto hints at U-turn on GM food in Britain...


In [4]:
# Define keyword lists for labeling
yes_keywords = [
    "ban plastic", "plastic ban", "stop plastic", "reduce plastic", "single-use plastic ban",
    "environmental impact", "plastic pollution", "ocean pollution", "eco-friendly", "sustainable",
    "reduce waste", "microplastics", "save the planet", "plastic crisis", "plastic free",
    "plastic alternatives", "biodegradable plastic", "protect oceans", "reduce carbon footprint",
    "marine life danger", "plastic waste problem", "harmful to environment", "zero waste",
    "plastic reduction", "no more plastic", "plastic ban policy", "government plastic ban",
    "phase out plastic", "plastic harming wildlife", "replacing plastic", "plastic regulation"
]

no_keywords = [
    "plastic industry", "economic impact", "job loss", "affordable plastic", "necessary plastic",
    "plastic is convenient", "against plastic ban", "plastic benefits", "plastic alternatives expensive",
    "recycling better", "plastic essential", "plastic innovation", "cost-effective packaging",
    "protect plastic industry", "plastic economy", "ban hurts businesses", "recycle instead of ban",
    "plastic is not the problem", "plastic has advantages", "plastic demand", "impact on plastic jobs",
    "banning is not the solution", "plastic safer", "plastic packaging necessary",
    "no alternative to plastic", "plastic is reusable", "plastic sustainability", "plastic tax harmful"
]


def label_text(text):
    text = str(text).lower() 
    if any(word in text for word in yes_keywords):
        return "Yes"  # Supports banning plastics
    elif any(word in text for word in no_keywords):
        return "No"  # Against banning plastics
    else:
        return "Unlabeled"  # Neutral or unclear stance

merged_df["Label"] = merged_df["Text"].apply(label_text)

merged_df.to_csv("labeled_news_data.csv", index=False)

merged_df["Label"].value_counts()

Label
Unlabeled    2183
Yes          1241
No             24
Name: count, dtype: int64

In [5]:
merged_df

Unnamed: 0,Text,Label
0,Plastic On The Mind: Assessing the Risks From ...,Unlabeled
1,Scenic loch becomes magnet for Scotland's plas...,Unlabeled
2,Trump rolls back standards for water-using app...,Unlabeled
3,Why Aren’t We Losing Our Minds Over the Plasti...,Unlabeled
4,36 Products That’ll Help Get You Through TSA F...,Unlabeled
...,...,...
3443,The new green revolution in crop genetics It i...,Yes
3444,You can't deny it - nuclear weapons are a stup...,Unlabeled
3445,US study shows GM crops 'no better' A two-year...,Unlabeled
3446,Monsanto hints at U-turn on GM food in Britain...,Unlabeled
