In [None]:
import os
import xarray as xr
import geopandas as gpd
# from QueryHandler import QueryHandler
from dotenv import load_dotenv
import sqlalchemy as sq
import sys
import pandas as pd

sys.path.append("../")
from DataService import DataService

In [2]:
load_dotenv()
PG_USER = os.getenv('POSTGRES_USER')
PG_PW = os.getenv('POSTGRES_PW')
PG_DB = os.getenv('POSTGRES_DB')
PG_ADDR = os.getenv('POSTGRES_ADDR')
PG_PORT = os.getenv('POSTGRES_PORT')

In [3]:
# connicting to database
db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

In [40]:

query = sq.text('select geometry, car_uid, cr_num FROM public.census_ag_regions')
agRegions = gpd.GeoDataFrame.from_postgis(query, conn, crs='EPSG:3347', geom_col='geometry')

In [41]:
agRegions

Unnamed: 0,geometry,car_uid,cr_num
0,"POLYGON ((5614040.030 1525793.204, 5614049.916...",4601,3
1,"POLYGON ((5626194.299 1583517.983, 5626196.761...",4602,3
2,"POLYGON ((5604868.041 1657581.280, 5604597.366...",4603,3
3,"POLYGON ((5572893.654 1735557.501, 5572702.245...",4604,2
4,"POLYGON ((5642129.777 1764975.307, 5640570.825...",4605,2
5,"POLYGON ((5650771.936 1802331.590, 5650891.341...",4606,2
6,"POLYGON ((5699696.737 1638839.856, 5699169.985...",4607,1
7,"POLYGON ((5811211.812 1504579.905, 5813077.828...",4608,3
8,"POLYGON ((5881996.131 1582644.382, 5881995.457...",4609,1
9,"MULTIPOLYGON (((5963272.940 1505971.912, 59632...",4610,1


In [6]:

query = sq.text('select * FROM public.soil_moisture')
sm_df = pd.read_sql(query, conn)

In [27]:
sm_df.head(10)

Unnamed: 0,id,lon,lat,date,cr_num,soil_moisture
0,874895,-117.125,59.375,1978-12-13,10,0.24927
1,874896,-116.875,59.375,1978-12-13,10,0.24926
2,874897,-116.625,59.375,1978-12-13,10,0.25155
3,874898,-117.375,59.125,1978-12-13,10,0.24682
4,874899,-116.875,59.125,1978-12-13,10,0.24773
5,874900,-117.625,58.125,1978-12-13,10,0.19784
6,874901,-117.125,58.125,1978-12-13,10,0.190395
7,874902,-117.875,57.875,1978-12-13,10,0.193394
8,874903,-115.625,57.875,1978-12-13,10,0.21926
9,874904,-118.375,57.625,1978-12-13,10,0.184413


In [33]:
#converting to geopands dataframe
from shapely.geometry import Point

geometry = [Point(xy) for xy in zip(sm_df.lon, sm_df.lat)]
# sm_gdf = sm_df.drop(['lon', 'lat', 'id'], axis=1)
sm_gdf = gpd.GeoDataFrame(sm_df, crs="EPSG:3347", geometry=geometry)

In [34]:
sm_gdf

Unnamed: 0,id,lon,lat,date,cr_num,soil_moisture,geometry
0,874895,-117.125,59.375,1978-12-13,10,0.249270,POINT (-117.125 59.375)
1,874896,-116.875,59.375,1978-12-13,10,0.249260,POINT (-116.875 59.375)
2,874897,-116.625,59.375,1978-12-13,10,0.251550,POINT (-116.625 59.375)
3,874898,-117.375,59.125,1978-12-13,10,0.246820,POINT (-117.375 59.125)
4,874899,-116.875,59.125,1978-12-13,10,0.247730,POINT (-116.875 59.125)
...,...,...,...,...,...,...,...
16696610,17571505,-96.625,49.125,1991-08-26,1,0.139403,POINT (-96.625 49.125)
16696611,17571506,-96.375,49.125,1991-08-26,1,0.181812,POINT (-96.375 49.125)
16696612,17571507,-96.125,49.125,1991-08-26,1,0.145877,POINT (-96.125 49.125)
16696613,17571508,-95.875,49.125,1991-08-26,1,0.184481,POINT (-95.875 49.125)


In [50]:
# final_df = gpd.sjoin(sm_gdf, agRegions, how='left', predicate='within') 
final_df = sm_gdf.merge(agRegions, on='cr_num', how='left')

In [51]:
final_df

Unnamed: 0,id,lon,lat,date,cr_num,soil_moisture,geometry_x,geometry_y,car_uid
0,874895,-117.125,59.375,1978-12-13,10,0.249270,POINT (-117.125 59.375),"POLYGON ((5017939.237 2842117.199, 5018512.665...",4860
1,874895,-117.125,59.375,1978-12-13,10,0.249270,POINT (-117.125 59.375),"POLYGON ((4944069.265 2870017.791, 4944068.613...",4870
2,874896,-116.875,59.375,1978-12-13,10,0.249260,POINT (-116.875 59.375),"POLYGON ((5017939.237 2842117.199, 5018512.665...",4860
3,874896,-116.875,59.375,1978-12-13,10,0.249260,POINT (-116.875 59.375),"POLYGON ((4944069.265 2870017.791, 4944068.613...",4870
4,874897,-116.625,59.375,1978-12-13,10,0.251550,POINT (-116.625 59.375),"POLYGON ((5017939.237 2842117.199, 5018512.665...",4860
...,...,...,...,...,...,...,...,...,...
46796153,17571508,-95.875,49.125,1991-08-26,1,0.184481,POINT (-95.875 49.125),"POLYGON ((5881996.819 1582654.300, 5881991.545...",4611
46796154,17571509,-95.625,49.125,1991-08-26,1,0.258034,POINT (-95.625 49.125),"POLYGON ((5699696.737 1638839.856, 5699169.985...",4607
46796155,17571509,-95.625,49.125,1991-08-26,1,0.258034,POINT (-95.625 49.125),"POLYGON ((5881996.131 1582644.382, 5881995.457...",4609
46796156,17571509,-95.625,49.125,1991-08-26,1,0.258034,POINT (-95.625 49.125),"MULTIPOLYGON (((5963272.940 1505971.912, 59632...",4610


In [52]:
final_df = final_df.drop(['lon', 'lat', 'id', 'geometry_x', 'geometry_y'], axis=1)

In [54]:
final_df.groupby(['date', 'cr_num', 'car_uid']).mean().reset_index()

Unnamed: 0,date,cr_num,car_uid,soil_moisture
0,1978-11-01,0,4612,0.237053
1,1978-11-01,5,4730,0.166317
2,1978-11-01,5,4732,0.166317
3,1978-11-01,5,4733,0.166317
4,1978-11-01,5,4740,0.166317
...,...,...,...,...
396960,2021-12-27,10,4870,0.266554
396961,2021-12-28,0,4612,0.243087
396962,2021-12-28,10,4860,0.172003
396963,2021-12-28,10,4870,0.172003


In [49]:
final_df

Unnamed: 0,date,cr_num,soil_moisture,car_uid
0,1978-11-01,0,0.237053,inf
1,1978-11-01,5,0.166317,inf
2,1978-11-01,7,0.156877,1360422013632797252459936142309893839052622442...
3,1978-11-01,8,0.186500,inf
4,1978-11-01,9,0.188634,inf
...,...,...,...,...
110761,2021-12-25,0,0.178774,4612.0
110762,2021-12-27,10,0.266554,3037804405378043881882041188378493336028977153...
110763,2021-12-28,0,0.243087,4612.0
110764,2021-12-28,10,0.172003,40504058738373920116790778416864079015978205184.0


In [8]:
avg_df = sm_df.groupby(['date', 'cr_num']).mean().reset_index()

In [11]:
avg_df

Unnamed: 0,date,cr_num,soil_moisture
0,1978-11-01,0,0.237053
1,1978-11-01,5,0.166317
2,1978-11-01,7,0.156877
3,1978-11-01,8,0.186500
4,1978-11-01,9,0.188634
...,...,...,...
110761,2021-12-25,0,0.178774
110762,2021-12-27,10,0.266554
110763,2021-12-28,0,0.243087
110764,2021-12-28,10,0.172003


NameError: name 'df' is not defined

In [None]:
final_df = gpd.sjoin(avg_df, agRegions, how='left', predicate='within') 