## import data from database

In [None]:
import os
import xarray as xr
import geopandas as gpd  # type: ignore

from dotenv import load_dotenv
import sqlalchemy as sq
import sys
import pandas as pd
import matplotlib.pyplot as plt  # type: ignore

sys.path.append("../Shared/")
from DataService import DataService  # type: ignore

In [None]:
load_dotenv()
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")

In [None]:
# connicting to database
db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

In [None]:
query = sq.text("select * FROM public.ab_station_data")
ab_df = pd.read_sql(query, conn)

In [None]:
query = sq.text("select * FROM public.mb_station_data")
mb_df = pd.read_sql(query, conn)

In [None]:
query = sq.text("select * FROM public.sk_station_data")
sk_df = pd.read_sql(query, conn)

In [None]:
ab_df.head()

In [None]:
mb_df.head()

In [None]:
sk_df.head()

In [None]:
ab_df.info()

In [None]:
mb_df.info()

In [None]:
sk_df.info()

In [None]:
ab_df.describe()

In [None]:
mb_df.describe()

In [None]:
sk_df.describe()

In [None]:
plt.hist(ab_df['mean_temp'], alpha=0.5, label="ab")
plt.hist(mb_df['mean_temp'], alpha=0.5, label="mb")
plt.hist(sk_df['mean_temp'], alpha=0.5, label="sk")
plt.legend(loc="upper right")
plt.show()

In [None]:
plt.scatter( ab_df['mean_temp'], ab_df['total_rain'], alpha=0.5, label="ab")
plt.scatter( mb_df['mean_temp'], mb_df['total_rain'], alpha=0.5, label="mb")
plt.scatter( sk_df['mean_temp'], sk_df['total_rain'], alpha=0.5, label="sk")
plt.legend(loc="upper right")
plt.show()

In [None]:
plt.scatter( ab_df['mean_temp'], ab_df['total_snow'], alpha=0.5, label="ab")
plt.scatter( mb_df['mean_temp'], mb_df['total_snow'], alpha=0.5, label="mb")
plt.scatter( sk_df['mean_temp'], sk_df['total_snow'], alpha=0.5, label="sk")
plt.legend(loc="upper right")
plt.show()

In [None]:
plt.scatter( ab_df['mean_temp'], ab_df['total_precip'], alpha=0.5, label="ab")
plt.scatter( mb_df['mean_temp'], mb_df['total_precip'], alpha=0.5, label="mb")
plt.scatter( sk_df['mean_temp'], sk_df['total_precip'], alpha=0.5, label="sk")
plt.legend(loc="upper right")
plt.show()

In [None]:
plt.scatter(ab_df['date'], ab_df['mean_temp'])
plt.show()

In [None]:
temp = ab_df[["mean_temp", "total_rain", "total_snow", "total_precip", "snow_on_grnd"]]
temp.corr()

In [None]:
temp = mb_df[["mean_temp", "total_rain", "total_snow", "total_precip", "snow_on_grnd"]]
temp.corr()

In [None]:
temp = sk_df[["mean_temp", "total_rain", "total_snow", "total_precip", "snow_on_grnd"]]
temp.corr()

# Visualization of the weather station

In [30]:
# connicting to database
db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

In [31]:
# import weather statioin data
query = sq.text("select * FROM public.stations_dly")
station_df = pd.read_sql(query, conn)

In [32]:
station_df

Unnamed: 0,station_name,province,latitude,longitude,elevation,station_id,wmo_identifier,tc_identifier,first_year,last_year,hly_first_year,hly_last_year,dly_first_year,dly_last_year,mly_first_year,mly_last_year,geometry,district,cr_num,scraped
0,ABEE AGDM,AB,54.28,-112.97,664.0,3010010,71285.0,XAF,1990,2022,1990.0,2022.0,2002.0,2022.0,2002.0,2007.0,0101000020130D0000FBC2F8EA629752416FABBFCAA509...,4860.0,10.0,False
1,ACADIA VALLEY,AB,51.07,-110.32,735.0,3020035,71048.0,PAC,2009,2022,2009.0,2022.0,2009.0,2022.0,,,0101000020130D00006E82DFCB90D2524193B304E93E20...,4810.0,8.0,False
2,ALBERT HALL AGCM,AB,53.11,-111.18,658.0,3010060,71005.0,PAH,2007,2022,2007.0,2022.0,2007.0,2022.0,2007.0,2007.0,0101000020130D000024634AAA42DC5241033C6EFD98A9...,4840.0,9.0,False
3,ALLIANCE AGCM,AB,52.32,-111.78,737.0,3010162,71006.0,PAA,2007,2022,2007.0,2022.0,2007.0,2022.0,2007.0,2007.0,0101000020130D00003F246CE4499D52416C5C0D1A0496...,4840.0,9.0,False
4,ANDREW AGDM,AB,53.92,-112.28,625.0,3010237,71286.0,XAG,1998,2022,1998.0,2022.0,2002.0,2022.0,2002.0,2007.0,0101000020130D0000063649EF1BB45241B3DEDAF6F4A4...,4840.0,9.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467,YORKTON,SK,51.26,-102.46,498.3,4019075,71292.0,YQV,2011,2022,2011.0,2022.0,2011.0,2022.0,,,0101000020130D0000209C5B32BEDD5441AA72D09DE08D...,4750.0,6.0,False
468,YORKTON,SK,51.26,-102.46,498.4,4019073,71886.0,PRJ,2011,2022,2011.0,2022.0,2011.0,2022.0,,,0101000020130D0000209C5B32BEDD5441AA72D09DE08D...,4750.0,6.0,False
469,YORKTON,SK,51.26,-102.46,498.3,4019085,71138.0,YQV,2005,2011,2005.0,2011.0,2005.0,2011.0,2005.0,2007.0,0101000020130D0000209C5B32BEDD5441AA72D09DE08D...,4750.0,6.0,False
470,YORKTON A,SK,51.27,-102.47,498.3,4019080,71138.0,YQV,1941,2005,1953.0,2005.0,1941.0,2005.0,1941.0,2005.0,0101000020130D0000CFC33C4741DD5441E868CA9A9392...,4750.0,6.0,False


In [36]:
from shapely import wkb

In [37]:
station_df['geometry'] = station_df['geometry'].apply(lambda x: wkb.loads(bytes.fromhex(x)))

In [39]:
station_gdf = gpd.GeoDataFrame(station_df, geometry='geometry')