In [1]:
# %pip install geopandas pandas sqlalchemy psycopg2-binary openpyxl geoalchemy2 python-dotenv

# Import Block

In [2]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import text

import os
from dotenv import load_dotenv
import sys
sys.path.append("../src")

from utils.Database import Database

# Load Environment

In [3]:
load_dotenv(".env")

CWEEDS_METADATA_PATH = os.environ.get("CWEEDS_METADATA_PATH")
TABLE_WEATHER_METADATA = os.environ.get("TABLE_WEATHER_METADATA")

# Establish Database Connection

In [4]:
connection = Database().get_connection()
connection

Engine(postgresql://wireaiadmin:***@localhost:5434/weather_db)

# Read Data

In [5]:
# read the dataset
cweeds_meta_df = pd.read_excel(
    CWEEDS_METADATA_PATH, 
    index_col=4
    )
cweeds_meta_df.head()

Unnamed: 0_level_0,version,name,prov,country,lat,lon,UTC_offset,elev(m),first_yr,last_yr
climate_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3010010,CWEEDS2011,ABEE AGDM,AB,CAN,54.28,-112.97,-7,664.0,2003,2017
3010237,CWEEDS2011,ANDREW AGDM,AB,CAN,53.92,-112.28,-7,625.0,2003,2017
3060406,CWEEDS2011,ATMORE AGDM,AB,CAN,54.78,-112.82,-7,574.0,2003,2017
3050519,CWEEDS2011,BANFF CS,AB,CAN,51.19,-115.55,-7,1396.9,1998,2017
3030525,CWEEDS2011,BARNWELL AGDM,AB,CAN,49.8,-112.3,-7,824.2,2003,2017


# Data Pre-Processing

In [6]:
# convert to Geodataframe
cweeds_meta_gdf = gpd.GeoDataFrame(
    cweeds_meta_df,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(
        cweeds_meta_df.lon, 
        cweeds_meta_df.lat
    ),
)

In [7]:
# drop columns that are not needed
cweeds_meta_gdf.drop(
    [
        "version",
        "country",
        "lat",
        "lon",
    ],
    axis = 1,
    inplace = True
)

In [8]:
# rest index to set "climate_id" as column
cweeds_meta_gdf = cweeds_meta_gdf.reset_index()

In [9]:
# keep all columns names in upper case
cweeds_meta_gdf.columns = [col.upper() if col != 'geometry' else col for col in cweeds_meta_gdf.columns]
cweeds_meta_gdf.rename(
    {
        "ELEV(M)": "ELEV_IN_M"
    },
    axis = 1,
    inplace = True
)
cweeds_meta_gdf.head(2)

Unnamed: 0,CLIMATE_ID,NAME,PROV,UTC_OFFSET,ELEV_IN_M,FIRST_YR,LAST_YR,geometry
0,3010010,ABEE AGDM,AB,-7,664.0,2003,2017,POINT (-112.97 54.28)
1,3010237,ANDREW AGDM,AB,-7,625.0,2003,2017,POINT (-112.28 53.92)


In [10]:
# store in DB
cweeds_meta_gdf.to_postgis(
    name = TABLE_WEATHER_METADATA, 
    con = connection, 
    if_exists = 'replace', 
    index=False
)  

In [11]:
# add keys to data for faster retrival

alter_statement = f"""ALTER TABLE "{TABLE_WEATHER_METADATA}" ADD PRIMARY KEY ( "CLIMATE_ID" );"""

with connection.connect() as con:
    con.execute(text(alter_statement))

# Test Read Data

In [12]:
gpd.read_postgis(
    sql = f"""SELECT * from "{TABLE_WEATHER_METADATA}"; """,
    con = connection,
    geom_col = "geometry",
)

Unnamed: 0,CLIMATE_ID,NAME,PROV,UTC_OFFSET,ELEV_IN_M,FIRST_YR,LAST_YR,geometry
0,3010010,ABEE AGDM,AB,-7,664.0,2003,2017,POINT (-112.97 54.28)
1,3010237,ANDREW AGDM,AB,-7,625.0,2003,2017,POINT (-112.28 53.92)
2,3060406,ATMORE AGDM,AB,-7,574.0,2003,2017,POINT (-112.82 54.78)
3,3050519,BANFF CS,AB,-7,1396.9,1998,2017,POINT (-115.55 51.19)
4,3030525,BARNWELL AGDM,AB,-7,824.2,2003,2017,POINT (-112.3 49.8)
...,...,...,...,...,...,...,...,...
559,2100805,OLD CROW RCS,YT,-8,251.2,2005,2017,POINT (-139.84 67.57)
560,2100935,ROCK RIVER,YT,-8,731.0,2007,2017,POINT (-136.22 66.98)
561,2101102,TESLIN (AUT),YT,-8,705.0,2005,2017,POINT (-132.73 60.17)
562,2101201,WATSON LAKE A,YT,-8,687.3,2005,2017,POINT (-128.82 60.12)
