In [None]:
from matplotlib import pyplot as plt
from colorhash import ColorHash
import sqlalchemy as sq
import geopandas as gpd
import pandas as pd
from dotenv import load_dotenv
import os, sys

sys.path.append('../')
from DataService import DataService

In [None]:
TABLENAME = 'labeled_soil'

load_dotenv()
PG_DB = os.getenv('POSTGRES_DB')
PG_ADDR = os.getenv('POSTGRES_ADDR')
PG_PORT = os.getenv('POSTGRES_PORT')
PG_USER = os.getenv('POSTGRES_USER')
PG_PW = os.getenv('POSTGRES_PW')

In [None]:
db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

In [None]:
# load the boundaries for the agriculture regions
query = sq.text('select cr_num, car_uid, geometry FROM public.census_ag_regions')
agRegions = gpd.GeoDataFrame.from_postgis(query, conn, crs='EPSG:3347', geom_col='geometry')

In [None]:
# load the polygon components
query = pd.read_sql_query('select "SOIL_ID", "POLY_ID" FROM public.soil_cmp', conn)
components = pd.DataFrame(query, columns=['SOIL_ID', 'POLY_ID'])
components = components.groupby('POLY_ID')['SOIL_ID'].apply(set)

In [None]:
# load the boundaries for the polygons
query = sq.text('select "POLY_ID", geometry FROM public.soil_map')
soilRegions = gpd.GeoDataFrame.from_postgis(query, conn, crs='EPSG:3347', geom_col='geometry')

In [None]:
# merge the components and the soil boundaries into one 
regionsAndComponents = soilRegions.merge(components, on='POLY_ID')

In [None]:
# join the the newly merged soil dataframe with the agriculture boundaries
soil = gpd.sjoin(regionsAndComponents, agRegions, how='left', predicate='intersects')    
soil = soil[soil['cr_num'].notna()]                                     # Take rows that are valid numbers
soil.drop(columns=['index_right'], inplace=True)
soil = soil.reset_index()

In [None]:
soil.drop(columns=['index'], inplace=True)
soil = soil.reset_index()

In [None]:
soil.plot()

In [None]:
# hash each set of soil to create a consistant color scheme for similar soil typings
colors = []

for index in range(len(soil.index)):
    value = soil.iloc[index]['SOIL_ID']
    colors.append(ColorHash(str(value)).hex)

soil['color'] = colors

In [None]:
soil

In [None]:
minx, miny, maxx, maxy = soil.total_bounds
fig, ax = plt.subplots(figsize=(13, 13))
ax.set_ylim(miny, maxy)
soil.plot(ax=ax, color=soil['color'])
agRegions.geometry.boundary.plot(color=None,edgecolor='k',linewidth = 2,ax=ax)

plt.show()

In [None]:
# prepare the data for database storage
labeledSoil = pd.DataFrame(soil.drop(columns=['geometry', 'color']))

labeledSoil.rename(columns={labeledSoil.columns[0]: 'id'}, inplace=True)
labeledSoil.rename(columns={labeledSoil.columns[1]: 'poly_id'}, inplace=True)
labeledSoil.rename(columns={labeledSoil.columns[2]: 'soil_ids'}, inplace=True)

In [None]:
listOfSoilIDs = []

for index, row in labeledSoil.iterrows():
    listOfSoilIDs.append(list(row['soil_ids']))

labeledSoil['soil_ids'] = listOfSoilIDs

In [None]:
labeledSoil.to_sql(TABLENAME, conn, schema='public', if_exists="append", index=False)

In [None]:
db.cleanup()