    This code will:

    -Fetch the weather.gov page for the specified location
    -Extract current weather conditions
    -Extract the forecast data
    -Save both to separate CSV files with timestamps in the filenames

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time
import os
from datetime import datetime
import logging
import csv
from pathlib import Path

In [6]:
class WeatherGovScraper:
    def __init__(self, url):
        self.url = url

    def run_once(self):
        # Your logic for scraping the data
        print(f"Scraping data from {self.url}")
        # Return some dummy data or actual scraped data
        return {"temperature": "72F", "conditions": "Clear"}

# Usage:
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
scraper = WeatherGovScraper(url)
weather_data = scraper.run_once()
print(weather_data)



Scraping data from https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093
{'temperature': '72F', 'conditions': 'Clear'}


In [9]:
# Run a few scrapes with delay
import time

# Scraping code
weather_data = scraper.run_once()

# Check the structure of the returned data
print(weather_data)  # This will help you understand the data format

# Now safely handle 'conditions' if it's a string
conditions = weather_data.get('conditions', 'N/A')

# If the conditions are a string like "72°F, Clear", split it
if isinstance(conditions, str):
    # Example split: "72°F, Clear" -> ['72°F', 'Clear']
    temp, condition = conditions.split(',') if ',' in conditions else (conditions, 'N/A')
else:
    temp, condition = 'N/A', 'N/A'

# Clean up the temp string (if necessary) and print results
print(f"Current temp: {temp.strip() if temp != 'N/A' else temp}")
print(f"Condition: {condition.strip() if condition != 'N/A' else condition}")



Scraping data from https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093
{'temperature': '72F', 'conditions': 'Clear'}
Current temp: Clear
Condition: N/A


In [12]:
import requests
from bs4 import BeautifulSoup

class WeatherGovScraper:
    def __init__(self, url):
        self.url = url
    
    def run_once(self):
        # Send an HTTP request to the weather page
        response = requests.get(self.url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the data. Status code: {response.status_code}")
            return None

        # Parse the page with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Try to extract the current weather conditions
        weather_data = {}

        # Extract current temperature and condition (customize the selectors as needed)
        try:
            temp_element = soup.find('p', class_='myforecast-current-lrg')
            condition_element = soup.find('div', class_='tombstone-container')

            if temp_element:
                weather_data['conditions'] = temp_element.get_text(strip=True)
            if condition_element:
                condition_desc = condition_element.find('p', class_='short-desc')
                if condition_desc:
                    weather_data['condition'] = condition_desc.get_text(strip=True)
            else:
                weather_data['condition'] = 'N/A'
        except Exception as e:
            print(f"Error parsing weather data: {e}")
            weather_data['conditions'] = 'N/A'
            weather_data['condition'] = 'N/A'

        return weather_data


# Usage
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
scraper = WeatherGovScraper(url)

# Running the scraper once and getting the data
weather_data = scraper.run_once()

# Print the fetched data
if weather_data:
    print(f"Conditions: {weather_data.get('conditions', 'N/A')}")
    print(f"Condition: {weather_data.get('condition', 'N/A')}")


Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain


In [16]:
import requests
from bs4 import BeautifulSoup
import time  # For adding delay between scrapes

class WeatherGovScraper:
    def __init__(self, url):
        self.url = url
    
    def run_once(self):
        # Send an HTTP request to the weather page
        response = requests.get(self.url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the data. Status code: {response.status_code}")
            return None

        # Parse the page with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Try to extract the current weather conditions
        weather_data = {}

        # Extract current temperature and condition (customize the selectors as needed)
        try:
            temp_element = soup.find('p', class_='myforecast-current-lrg')
            condition_element = soup.find('div', class_='tombstone-container')

            if temp_element:
                weather_data['conditions'] = temp_element.get_text(strip=True)
            if condition_element:
                condition_desc = condition_element.find('p', class_='short-desc')
                if condition_desc:
                    weather_data['condition'] = condition_desc.get_text(strip=True)
            else:
                weather_data['condition'] = 'N/A'
        except Exception as e:
            print(f"Error parsing weather data: {e}")
            weather_data['conditions'] = 'N/A'
            weather_data['condition'] = 'N/A'

        return weather_data


# Usage
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
scraper = WeatherGovScraper(url)

# Run the scraper 5 times
for i in range(5):
    print(f"Scrape #{i+1}")
    weather_data = scraper.run_once()
    
    # Print the fetched data
    if weather_data:
        print(f"Conditions: {weather_data.get('conditions', 'N/A')}")
        print(f"Condition: {weather_data.get('condition', 'N/A')}")
    
    if i < 2:  # Don't sleep after the last scrape
        print("Waiting for 10 seconds before the next scrape...\n")
        time.sleep(10)  # Delay of 5 seconds between scrapes


Scrape #1
Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain
Waiting for 10 seconds before the next scrape...

Scrape #2
Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain
Waiting for 10 seconds before the next scrape...

Scrape #3
Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain
Scrape #4
Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain
Scrape #5
Conditions: 49°F
Condition: Mostly Cloudythen ChanceRain


In [21]:
import requests
import time
import logging
from requests.exceptions import RequestException, Timeout

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def fetch_page(url, retries=3, delay=5):
  
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    for attempt in range(retries):
        try:
            # Attempt to send the request
            response = requests.get(url, headers=headers, timeout=30)
            
            # Check for successful response
            response.raise_for_status()
            logger.info(f"Successfully fetched {url}")
            return response.text

        except Timeout:
            logger.warning(f"Timeout error fetching {url}. Attempt {attempt + 1} of {retries}.")
        except RequestException as e:
            logger.error(f"Request error fetching {url}: {e}. Attempt {attempt + 1} of {retries}.")
        
        # If retries are left, wait before trying again
        if attempt < retries - 1:
            logger.info(f"Retrying in {delay} seconds...")
            time.sleep(delay)
    
    # If all retries fail, log and return None
    logger.error(f"Failed to fetch {url} after {retries} attempts.")
    return None


In [23]:
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
page_content = fetch_page(url)

if page_content:
    print("Page fetched successfully.")
else:
    print("Failed to fetch the page after multiple attempts.")


INFO:__main__:Successfully fetched https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093


Page fetched successfully.


In [31]:
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import logging

# Set up logging for debugging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Define the function to fetch the page
def fetch_page(url):
    """Fetch the weather.gov web page"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        logger.info(f"Successfully fetched {url}")
        return response.text
    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching {url}: {e}")
        return None

# Define the function to parse the weather data
def parse_current_conditions(html_content):
    """Extract current weather conditions from the HTML content"""
    
    if not html_content:
        logger.error("No HTML content provided.")
        return None
    
    try:
        soup = BeautifulSoup(html_content, 'html.parser')
        current_conditions = {}

        # Get the current conditions section
        current_conditions_div = soup.find('div', id='current_conditions-summary')
        if current_conditions_div:
            # Get the temperature
            temp_element = current_conditions_div.find('p', class_='myforecast-current-lrg')
            if temp_element:
                current_conditions['temperature'] = temp_element.text.strip()
            else:
                logger.warning("Temperature element not found.")
            
            # Get the weather condition text
            condition_element = current_conditions_div.find('p', class_='myforecast-current')
            if condition_element:
                current_conditions['condition'] = condition_element.text.strip()
            else:
                logger.warning("Condition element not found.")
            
            # Extract additional details
            details_table = soup.find('div', id='current_conditions_detail')
            if details_table:
                rows = details_table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td')
                    if len(cells) == 2:
                        label = cells[0].text.strip().rstrip(':').lower().replace(' ', '_')
                        value = cells[1].text.strip()
                        current_conditions[label] = value
                    else:
                        logger.debug("Skipping row with invalid cell count: %s", row)

        # Add timestamp of data retrieval
        current_conditions['timestamp'] = datetime.now().isoformat()
        
        return current_conditions

    except Exception as e:
        logger.error(f"Error parsing the weather data: {e}")
        return None


# Fetch the page and parse the data
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
html_content = fetch_page(url)

if html_content:
    weather_data = parse_current_conditions(html_content)
    if weather_data:
        print(weather_data)
    else:
        print("Failed to parse weather data.")
else:
    print("Failed to fetch the page.")


INFO:__main__:Successfully fetched https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093


{'temperature': '48°F', 'condition': 'Clear', 'humidity': '61%', 'wind_speed': 'SSW 6 MPH', 'barometer': '30.17 in (1021.67 mb)', 'dewpoint': '35°F (2°C)', 'visibility': '10.00 mi', 'wind_chill': '45°F (7°C)', 'last_update': '07 Mar 05:00 PM PST', 'timestamp': '2025-03-08T04:40:53.130795'}


In [33]:
# Fetch the page content using the previous fetch_page function
url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
html_content = fetch_page(url)

if html_content:
    # Parse the forecast data
    forecast_data = parse_forecast(html_content)
    if forecast_data:
        for item in forecast_data:
            print(item)
    else:
        print("No forecast data found.")
else:
    print("Failed to fetch the page.")


INFO:__main__:Successfully fetched https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093


{'period': 'Tonight', 'description': 'Mostly Cloudythen ChanceRain', 'temperature': 'Low: 41 °F', 'condition': 'Tonight: A 30 percent chance of rain, mainly after 3am.  Mostly cloudy, with a low around 41. South southeast wind 7 to 9 mph. ', 'timestamp': '2025-03-08T04:45:55.934593'}
{'period': 'Saturday', 'description': 'Rain Likely', 'temperature': 'High: 58 °F', 'condition': 'Saturday: Rain likely, mainly before 11am.  Mostly cloudy, with a high near 58. South southwest wind 8 to 13 mph.  Chance of precipitation is 60%. New precipitation amounts of less than a tenth of an inch possible. ', 'timestamp': '2025-03-08T04:45:55.934593'}
{'period': 'Saturday Night', 'description': 'Chance Rain', 'temperature': 'Low: 45 °F', 'condition': 'Saturday Night: A 30 percent chance of rain, mainly after 4am.  Mostly cloudy, with a low around 45. South wind around 9 mph. ', 'timestamp': '2025-03-08T04:45:55.934593'}
{'period': 'Sunday', 'description': 'Rain', 'temperature': 'High: 54 °F', 'conditio

In [37]:
import csv
import logging

# Set up logging for debugging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def save_to_csv(data, filename):
    """Save data to a CSV file"""
    
    if not data:
        logger.warning("No data to save.")
        return
    
    # If data is a dictionary (current conditions), convert to a list for CSV writing
    if isinstance(data, dict):
        data = [data]
    
    try:
        # Ensure that the data is a list of dictionaries
        if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
            logger.error("Data should be a list of dictionaries.")
            return
        
        # Get field names from the first item in the data
        fieldnames = data[0].keys()
        
        # Write to CSV file
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(data)
        
        logger.info(f"Data successfully saved to {filename}")
    
    except Exception as e:
        logger.error(f"Error saving data to {filename}: {e}")


In [35]:
data = [
    {'period': 'Today', 'description': 'Sunny', 'temperature': 'High: 72°F', 'condition': 'Sunny', 'timestamp': '2025-03-08T12:34:56.789123'},
    {'period': 'Tonight', 'description': 'Clear', 'temperature': 'Low: 65°F', 'condition': 'Clear', 'timestamp': '2025-03-08T12:34:56.789123'}
]

filename = 'forecast_data.csv'
save_to_csv(data, filename)


INFO:__main__:Data successfully saved to forecast_data.csv


In [36]:
import logging
from datetime import datetime

# Set up logging for debugging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def main():
    """Main entry point"""
    # Weather.gov URL
    url = "https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093"
    
    try:
        # Fetch the page
        html_content = fetch_page(url)
        
        if html_content:
            # Get current conditions
            current_conditions = parse_current_conditions(html_content)
            if current_conditions:
                # Save current conditions to CSV
                current_filename = f"current_weather_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                save_to_csv(current_conditions, current_filename)
            else:
                logger.warning("No current conditions found.")
        
            # Get forecast data
            forecast_data = parse_forecast(html_content)
            if forecast_data:
                # Save forecast to CSV
                forecast_filename = f"forecast_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                save_to_csv(forecast_data, forecast_filename)
            else:
                logger.warning("No forecast data found.")
        else:
            logger.error("Failed to fetch page content.")
    
    except Exception as e:
        logger.error(f"An error occurred in the main process: {e}")
        
if __name__ == "__main__":
    main()



INFO:__main__:Successfully fetched https://forecast.weather.gov/MapClick.php?lat=47.449&lon=-122.3093
INFO:__main__:Data successfully saved to current_weather_20250308_045044.csv
INFO:__main__:Data successfully saved to forecast_20250308_045044.csv


In [38]:
import os
print("Current working directory:", os.getcwd())


Current working directory: C:\Users\USER
