**Parsera Scraping Notebook**

This notebook demonstrates how to use the Parsera library to scrape AI news from Wired.
It uses a language model to interpret and extract specific elements from the webpage.

https://github.com/raznem/parsera


In [None]:
# --- Setup and Installation ---
# Install required libraries
!pip install parsera langchain-together -q
!playwright install

In [None]:
# Apply nest_asyncio to allow asynchronous operations in Jupyter
import nest_asyncio
nest_asyncio.apply()

# --- Environment Setup ---
from google.colab import userdata
import os

# Set up the API key (make sure to add this to your Colab secrets)
os.environ["TOGETHER_API_KEY"] = userdata.get('TOGETHER_API_KEY')

# --- Imports ---
from parsera import Parsera
from langchain_together import ChatTogether

In [None]:
# --- Language Model Configuration ---
# Initialize the language model
llm = ChatTogether(
    model="google/gemma-2-9b-it",
    temperature=0.2,
    together_api_key=os.environ["TOGETHER_API_KEY"],
)

In [None]:
# --- Web Scraping Configuration ---
# Define the URL to scrape
url = "https://www.wired.com/tag/artificial-intelligence/?page=1"

In [None]:
# Specify the elements to extract
elements = {
    "Title": "News title",
    "URL": "URL of the Article",
}

In [None]:
# --- Scraping Execution ---
# Initialize the Parsera scraper with our language model
scraper = Parsera(model=llm)

# Run the scraping operation
result = scraper.run(url=url, elements=elements)

In [None]:
# --- Display Results ---
print("Scraped AI News from Wired:")
for item in result:
    print(f"Title: {item['Title']}")
    print(f"URL: {item['URL']}")
    print("---")