## Henry Hub Natural Gas Spot Price

### Reuseable Script

In [3]:
# Import the necessary libraries

import requests
import pandas as pd
from bs4 import BeautifulSoup 

def extract_excel_from_url(url, output_filename):
    """
    Download an Excel file from the given URL and save it locally.
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Check for HTTP errors
        with open(output_filename, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"Excel file downloaded successfully to {output_filename}")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading file: {e}")

def process_excel_to_csv(excel_filename, csv_filename, sheet_name="Data 1", skiprows=2):
    """
    Process the Excel file and convert it to CSV using the following flow:
      - Read the Excel file using pandas with the specified parameters.
      - Rename the long column name to "Price".
      - Remove rows with null values in either "Date" or "Price".
      - Write the cleaned DataFrame to a CSV file.
    """
    try:
        # Read the Excel file, selecting the desired columns
        df = pd.read_excel(
            excel_filename,
            sheet_name=sheet_name,
            skiprows=skiprows,
            usecols=["Date", "Henry Hub Natural Gas Spot Price (Dollars per Million Btu)"]
        )
        # Rename the column to "Price"
        df = df.rename(columns={
            "Henry Hub Natural Gas Spot Price (Dollars per Million Btu)": "Price"
        })
        # Remove rows with null values in "Date" or "Price"
        df = df.dropna(subset=["Date", "Price"])
        # Save the DataFrame to CSV
        df.to_csv(csv_filename, index=False)
        print(f"CSV file saved as {csv_filename}")
    except Exception as e:
        print(f"Error processing Excel file {excel_filename}: {e}")

def main():
    # Example for daily data:
    daily_url = "https://www.eia.gov/dnav/ng/hist_xls/RNGWHHDd.xls"
    daily_excel_filename = "Natural Gas Prices Day.xls"
    daily_csv_filename = "Henry_Hub_Gas_Price_Daily.csv"
    
    print("Downloading daily Excel file...")
    extract_excel_from_url(daily_url, daily_excel_filename)
    
    print("Processing daily Excel file to CSV...")
    process_excel_to_csv(daily_excel_filename, daily_csv_filename)
    
    # Example for monthly data (if the structure is the same, adjust as needed):
    monthly_url = "https://www.eia.gov/dnav/ng/hist_xls/RNGWHHDm.xls"
    monthly_excel_filename = "Natural Gas Prices Month.xls"
    monthly_csv_filename = "Henry_Hub_Gas_Price_Monthly.csv"
    
    print("Downloading monthly Excel file...")
    extract_excel_from_url(monthly_url, monthly_excel_filename)
    
    print("Processing monthly Excel file to CSV...")
    process_excel_to_csv(monthly_excel_filename, monthly_csv_filename)

if __name__ == "__main__":
    main()


Downloading daily Excel file...
Excel file downloaded successfully to Natural Gas Prices Day.xls
Processing daily Excel file to CSV...
CSV file saved as Henry_Hub_Gas_Price_Daily.csv
Downloading monthly Excel file...
Excel file downloaded successfully to Natural Gas Prices Month.xls
Processing monthly Excel file to CSV...
CSV file saved as Henry_Hub_Gas_Price_Monthly.csv
