pip install cdsapi  
pip install pygrid

In [18]:
import os, sys, cdsapi, pygrib, calendar
from QueryHandler import QueryHandler
from shapely.geometry import Point
from dotenv import load_dotenv
import sqlalchemy as sq 
import geopandas as gpd
import numpy as np

sys.path.append('../')
from DataService import DataService


load_dotenv()
PG_DB = os.getenv('POSTGRES_DB')
PG_ADDR = os.getenv('POSTGRES_ADDR')
PG_PORT = os.getenv('POSTGRES_PORT')
PG_USER = os.getenv('POSTGRES_USER')
PG_PW = os.getenv('POSTGRES_PW')

MIN_MONTH = 3
MAX_MONTH = 12

MIN_YEAR = 1995
MAX_YEAR = 2023

30


In [None]:
queryHandler = QueryHandler()

In [None]:
def createTable(db):
    query = sq.text(queryHandler.tableExistsReq('copernicus_satelite_data'))
    tableExists = queryHandler.readTableExists(db.execute(query))
    
    if not tableExists:
        query = sq.text(queryHandler.createCopernicusTableReq('copernicus_satelite_data'))
        db.execute(query)

In [None]:
def calcAgRegion(agRegions, point):
    region = None

    for index, region in agRegions.iterrows():
        if region['geomtry'].contains(point):
            region = region['car_name']
            break
    
    return region

In [None]:
def storeData(db, lon, lat, year, month, day, hour, region, attr, value):
    datetime = datetime = np.datetime64(f'{year}-{month}-{day}T{hour}')
    query = sq.text(queryHandler.createRowExistsInDBReq(lon, lat, datetime))
    rowExists = queryHandler.readRowExistsInDB(db.execute(query))
    
    if rowExists:
        query = sq.text(queryHandler.createUpdateRowReq(attr, value))
        db.execute(query)
    else:
        query = sq.text(queryHandler.createInsertRowReq(lon, lat, datetime, year, month, day, hour, region, attr, value))
        db.execute(query)

In [None]:
def processFile(grbs, db, agRegions, year, month, day, hour, attr):
    data, lats, lons = grbs[1].data()

    for index, value in enumerate(data):
        point = Point(lons[index], lats[index])
            
        if agRegions.contains(point):
            region = calcAgRegion(agRegions, point)
            storeData(db, lons[index], lats[index], year, month, day, hour, region, attr, value)

In [None]:
def loadGeometry(conn):
    query = sq.text('select car_name, geometry FROM public.census_ag_regions')
    agRegions = gpd.read_postgis(query, conn, geom_col='geometry')
    agRegions.set_crs("EPSG:4326", allow_override=True)

    return agRegions
    

In [None]:
c = cdsapi.Client()
years = [str(year) for year in range(MIN_YEAR, MAX_YEAR + 1)]
months = [str(month) for month in range(MIN_MONTH, MAX_MONTH + 1)]
attrs = [
    '2m_dewpoint_temperature', '2m_temperature', 'evaporation_from_bare_soil', 'skin_reservoir_content', 'skin_temperature',
    'snowmelt', 'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4',
    'surface_net_solar_radiation', 'surface_pressure', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 
    'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4'
]
hours = [
    '00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00', '10:00', '11:00','12:00', '13:00', 
    '14:00', '15:00', '16:00', '17:00', '18:00', '19:00', '20:00', '21:00', '22:00', '23:00'
]

db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

createTable(db)
agRegions, Canada = loadGeometry(conn)

for year in years:
    for month in months:
        numDays = calendar.monthrange(year, month)[1]
        days = [str(day) for day in range(1, numDays + 1)]

        for day in days:
            for hour in hours:
                for attr in attrs:
                    c.retrieve(
                        'reanalysis-era5-land',
                        {
                            'format': 'grib',
                            'variable': [attr],
                            'year': year,
                            'month': month,
                            'day': [day],
                            'time': [hour],
                            'area': [61, -140, 48, -88],
                        },
                        'download.grib'
                    )

                    # read the file, process it, delete it then go onto the next set of data
                    grbs = pygrib.open('download.grib')
                    processFile(grbs, db, agRegions, year, month, day, hour, attr)
                    os.remove('download.grib')

In [None]:
# print(grbs[1].latlons())
# lats, lons = grbs[1].latlons()
# print(lats[0][0])
# print(lons[0][0])
# for lat in lats:
#     for lon in lons:
#         print()

In [27]:
# data, lat, lon = grbs[1].data()
# print(len(data[2]))
# print(len(lat[2]))
# print(len(lon[2]))
# data, lat, lon = grbs[2].data(lat1=70,lon1=-142)
# print(lat)


#print(grbs[5].values)
#print(grbs[5].data(lat1=70,lon1=-142))

891
891
891


In [None]:
# local_data,lats,lons = grb.data(lat1=y1,lat2=y2,lon1=x1,lon2=x2)

In [None]:
# grbs= pygrib.open("download.grib")

# num = 0
# for n in grbs:
#     print(n.latlons())
#     print(n)
#     num += 1

# print(num)