In [11]:
import requests

url = "https://transmission.bpa.gov/business/operations/Wind/baltwg.txt"
req = requests.get(url, timeout=0.5)
req.raise_for_status()
text = req.text

In [13]:
import pandas
from io import StringIO

df = pandas.read_csv(StringIO(text), skiprows=11, delimiter='\t')
df.columns = df.columns.str.strip()             # remove space in columns name
df['Datetime'] = pandas.to_datetime(df['Date/Time'])
df.drop(columns=['Date/Time'], axis=1, inplace=True)
df.dropna(inplace=True)  
df.head()

Unnamed: 0,Date/Time,Load,Wind,Hydro,Fossil/Biomass,Nuclear,Datetime
0,12/08/2019 00:00,5626.0,77.0,5923.0,1134.0,1164.0,2019-12-08 00:00:00
1,12/08/2019 00:05,5596.0,88.0,5743.0,1162.0,1166.0,2019-12-08 00:05:00
2,12/08/2019 00:10,5583.0,94.0,5455.0,1183.0,1165.0,2019-12-08 00:10:00
3,12/08/2019 00:15,5596.0,86.0,5354.0,1190.0,1165.0,2019-12-08 00:15:00
4,12/08/2019 00:20,5571.0,96.0,5289.0,1199.0,1167.0,2019-12-08 00:20:00


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1918 entries, 0 to 1918
Data columns (total 6 columns):
Load              1918 non-null float64
Wind              1918 non-null float64
Hydro             1918 non-null float64
Fossil/Biomass    1918 non-null float64
Nuclear           1918 non-null float64
Datetime          1918 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(5)
memory usage: 104.9 KB


In [8]:
import pymongo

client = pymongo.MongoClient()

In [9]:
db = client.get_database("energy")
collection = db.get_collection("energy")
update_count = 0
for record in df.to_dict('records'):
    result = collection.replace_one(
        filter={'Datetime': record['Datetime']},    # locate the document if exists
        replacement=record,                         # latest document
        upsert=True)                                # update if exists, insert if not
    if result.matched_count > 0:
        update_count += 1
print(f"rows={df.shape[0]}, update={update_count}, "
      f"insert={df.shape[0]-update_count}")

rows=1918, update=1911, insert=7
