In [1]:
import boto3
import s3fs
import pandas as pd
import re
import os
from io import StringIO

In [21]:
%reload_ext dotenv
%dotenv

## Transformation of Daily and hourly station-based Fire Weather Index calculations over the Western US

In [3]:
fs = s3fs.S3FileSystem()

client = boto3.client("s3")

BUCKET = "veda-data-store-staging"
PREFIX = "EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/"
DST_PREFIX = "EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI-transformed/"
StationIds = "s3://veda-data-store-staging/EIS/other/station-FWI/20000101.NRT.nStns.1/isd-history.csv"

In [4]:
r = client.list_objects_v2(
    Bucket = BUCKET,
    Prefix = PREFIX,
    )

In [5]:
files = [i["Key"] for i in r["Contents"] if re.search("InterpContinuous", i["Key"])]

In [20]:
with fs.open(StationIds) as fileobj:
    stations = pd.read_csv(fileobj)

    for file in files:
        with fs.open(f"{BUCKET}/{file}") as fwi_obj:
            print(f"Reading file: {file}")

            id = file.split("/")[-1].split("-")[0]

            try: 
                df = pd.read_csv(fwi_obj)
                df = df.reset_index()
                df.rename(columns={"index": "year", "YYYY": "month","MM":"day","DD":"hour","HH":"minute"}, inplace=True)

                dt = pd.to_datetime(df[['year', 'month', 'day','hour','minute']])
                df.insert(loc=0, column='t', value=dt)

                filter = stations[stations["USAF"]==id].iloc[0]
                
                df["lat"]=filter["LAT"]
                df["lon"]=filter["LON"]
                df["stn_id"]=id
                df["stn_name"]=filter["STATION NAME"]
    
                filename = file.split("/")[-1].replace('.csv', '_transformed.csv')
                print(f"Uploading to S3: {filename}")
    
                csv_buffer = StringIO()
                df.to_csv(csv_buffer, index=False)
                client.put_object(Body=csv_buffer.getvalue(), Bucket=BUCKET, Key=DST_PREFIX+filename)
                
            except Exception as e:
                print(f"Exception: {file.split('/')[-1]} {e}")
            

Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/722400-03937.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 722400-03937.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/722410-12917.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 722410-12917.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/722430-12960.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 722430-12960.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/722480-13957.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 722480-13957.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/722500-12919.spline.HourlyFWIFromHourlyInterpContinuous.csv

  df = pd.read_csv(fwi_obj)


Uploading to S3: 723600-23051.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/723630-23047.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 723630-23047.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/723650-23050.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 723650-23050.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/723740-23194.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 723740-23194.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/723840-23155.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 723840-23155.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/200

  df = pd.read_csv(fwi_obj)


Uploading to S3: 724650-23065.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/724660-93037.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 724660-93037.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/724760-23066.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 724760-23066.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/724800-23157.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 724800-23157.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/724830-23232.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 724830-23232.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/200

  df = pd.read_csv(fwi_obj)


Uploading to S3: 727830-24149.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/727850-24157.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 727850-24157.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/727910-94224.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 727910-94224.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/727920-24227.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 727920-24227.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/20000101.20221021.hrlyInterp/FWI/727930-24233.spline.HourlyFWIFromHourlyInterpContinuous.csv
Uploading to S3: 727930-24233.spline.HourlyFWIFromHourlyInterpContinuous_transformed.csv
Reading file: EIS/other/station-FWI/200