In [1]:
import requests
import pandas as pd

url = "https://www.eia.gov/dnav/ng/hist/rngwhhdD.htm"

headers = {
    "User-Agent": "Mozilla/5.0"
}

# 1) download HTML
resp = requests.get(url, headers=headers)
resp.raise_for_status()
html = resp.text

# 2) read all tables from that HTML
tables = pd.read_html(html)

# 3) pick the table that has 'Week Of' in the first column
weekly = None
for t in tables:
    cols = [str(c) for c in t.columns]
    if any("Week Of" in c for c in cols):
        weekly = t.copy()
        break

if weekly is None:
    raise ValueError("Could not find weekly Henry Hub table.")

# 4) clean column names
weekly.columns = ["week_of", "mon", "tue", "wed", "thu", "fri"]

print(weekly.head(10))

                 week_of   mon   tue   wed   thu   fri
0  1997 Jan- 6 to Jan-10   NaN  3.82  3.80  3.61  3.92
1  1997 Jan-13 to Jan-17  4.00  4.01  4.34  4.71  3.91
2  1997 Jan-20 to Jan-24  3.26  2.99  3.05  2.96  2.62
3  1997 Jan-27 to Jan-31  2.98  3.05  2.91  2.86  2.77
4  1997 Feb- 3 to Feb- 7  2.49  2.59  2.65  2.51  2.39
5                    NaN   NaN   NaN   NaN   NaN   NaN
6  1997 Feb-10 to Feb-14  2.42  2.34  2.42  2.22  2.12
7  1997 Feb-17 to Feb-21   NaN  1.84  1.95  1.92  1.92
8  1997 Feb-24 to Feb-28  1.92  1.77  1.81  1.80  1.78
9  1997 Mar- 3 to Mar- 7  1.80  1.87  1.92  1.82  1.89


  tables = pd.read_html(html)


In [4]:
# Save the scraped weekly Henry Hub data to CSV
weekly.to_csv("henry_hub_weekly.csv", index=False)

print("Saved as henry_hub_weekly.csv")

Saved as henry_hub_weekly.csv


In [2]:
import pandas as pd
import requests
from io import StringIO

TABLE_URL = "https://www.eia.gov/totalenergy/data/browser/csv.php?tbl=T01.11"

headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://www.eia.gov/totalenergy/data/browser/index.php?tbl=T01.11",
    "Accept": "text/csv,*/*"
}

r = requests.get(TABLE_URL, headers=headers, timeout=60)
r.raise_for_status()

df = pd.read_csv(StringIO(r.text))

# Inspect columns (important because EIA tables differ slightly)
print(df.columns)
print(df.head())

# OPTIONAL: filter your range (197301 to 202509)
# Adjust these column names after you print(df.columns)
# Example patterns you might see: 'YYYYMM', 'Month', 'Date', etc.
# df = df[(df["YYYYMM"] >= 197301) & (df["YYYYMM"] <= 202509)]

df.to_csv("EIA_T01_11_HDD_by_CensusDivision.csv", index=False)
print("Saved: EIA_T01_11_HDD_by_CensusDivision.csv")

Index(['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'], dtype='object')
       MSN  YYYYMM  Value  Column_Order                       Description  \
0  ZWHDPC1  194913   6148             1  Heating Degree-Days, New England   
1  ZWHDPC1  195013   6793             1  Heating Degree-Days, New England   
2  ZWHDPC1  195113   6476             1  Heating Degree-Days, New England   
3  ZWHDPC1  195213   6500             1  Heating Degree-Days, New England   
4  ZWHDPC1  195313   5945             1  Heating Degree-Days, New England   

     Unit  
0  Number  
1  Number  
2  Number  
3  Number  
4  Number  
Saved: EIA_T01_11_HDD_by_CensusDivision.csv


In [3]:
import pandas as pd
import requests
from io import StringIO

TABLE_URL = "https://www.eia.gov/totalenergy/data/browser/csv.php?tbl=T01.12"

headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://www.eia.gov/totalenergy/data/browser/index.php?tbl=T01.12",
    "Accept": "text/csv,*/*"
}

r = requests.get(TABLE_URL, headers=headers, timeout=60)
r.raise_for_status()

df = pd.read_csv(StringIO(r.text))

print(df.columns)
print(df.head())

df.to_csv("EIA_T01_12_CDD_by_CensusDivision.csv", index=False)
print("Saved: EIA_T01_12_CDD_by_CensusDivision.csv")

Index(['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'], dtype='object')
       MSN  YYYYMM  Value  Column_Order                       Description  \
0  ZWCDPC1  194913    572             1  Cooling Degree-Days, New England   
1  ZWCDPC1  195013    296             1  Cooling Degree-Days, New England   
2  ZWCDPC1  195113    326             1  Cooling Degree-Days, New England   
3  ZWCDPC1  195213    503             1  Cooling Degree-Days, New England   
4  ZWCDPC1  195313    372             1  Cooling Degree-Days, New England   

     Unit  
0  Number  
1  Number  
2  Number  
3  Number  
4  Number  
Saved: EIA_T01_12_CDD_by_CensusDivision.csv


In [4]:
import pandas as pd
import requests
from io import StringIO

TABLE_URL = "https://www.eia.gov/totalenergy/data/browser/csv.php?tbl=T04.01"

headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://www.eia.gov/totalenergy/data/browser/index.php?tbl=T04.01",
    "Accept": "text/csv,*/*"
}

r = requests.get(TABLE_URL, headers=headers, timeout=60)
r.raise_for_status()

df = pd.read_csv(StringIO(r.text))

# Inspect structure
print(df.columns)
print(df.head())

# Save locally
df.to_csv("EIA_T04_01_Natural_Gas_Overview_Monthly.csv", index=False)
print("Saved: EIA_T04_01_Natural_Gas_Overview_Monthly.csv")

Index(['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'], dtype='object')
       MSN  YYYYMM      Value  Column_Order                    Description  \
0  NGGWPUS  194913   7546.825             1  Natural Gas Gross Withdrawals   
1  NGGWPUS  195013    8479.65             1  Natural Gas Gross Withdrawals   
2  NGGWPUS  195113   9689.372             1  Natural Gas Gross Withdrawals   
3  NGGWPUS  195213  10272.566             1  Natural Gas Gross Withdrawals   
4  NGGWPUS  195313  10645.798             1  Natural Gas Gross Withdrawals   

                 Unit  
0  Billion Cubic Feet  
1  Billion Cubic Feet  
2  Billion Cubic Feet  
3  Billion Cubic Feet  
4  Billion Cubic Feet  
Saved: EIA_T04_01_Natural_Gas_Overview_Monthly.csv


In [5]:
pwd

'/Users/richardlesko/Desktop/Data Processing in Python/Lectures/Codes; Lecture + Seminar'