In [None]:
import sys
import os
from dotenv import load_dotenv

import geopandas as gpd
import sqlalchemy as sq
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import cm, colors

sys.path.append("../")
from Shared.DataService import DataService

In [None]:
load_dotenv()
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")

In [None]:
# connecting to database
db = DataService(PG_DB, PG_ADDR, PG_PORT, PG_USER, PG_PW)
conn = db.connect()

In [None]:
query = sq.text("select * FROM public.ergot_sample")
ergot_df = pd.read_sql(query, conn)

In [None]:
ergot_df

In [None]:
# car : canadian agriculture region
ergot_df.loc[ergot_df["province"] == "MB", "car_uid"] = ergot_df.loc[ergot_df["province"] == "MB", "crop_district"] + 4600
ergot_df.loc[ergot_df["province"] == "SK", "car_uid"] = (ergot_df.loc[ergot_df["province"] == "SK", "crop_district"] - 1) + 4700
ergot_df.loc[ergot_df["province"] == "AB", "car_uid"] = (ergot_df.loc[ergot_df["province"] == "AB", "crop_district"] * 10) + 4800

In [None]:
ergot_df.drop(columns=['crop_district', 'sample_id'], inplace=True)

In [None]:
ergot_df['car_uid'] = pd.to_numeric(ergot_df['car_uid'], downcast='integer')
ergot_df

In [None]:
# sample with incidence = True: per year, per province
samples_df = ergot_df[ergot_df['incidence'] == True].groupby(['province', 'year'])['incidence'].count().reset_index()
mb_df = samples_df[samples_df['province'] == 'MB']
ab_df = samples_df[samples_df['province'] == 'AB']
sk_df = samples_df[samples_df['province'] == 'SK']

year = mb_df['year'].tolist()
mb_incidence = mb_df['incidence'].tolist()
ab_incidence = ab_df['incidence'].tolist()
sk_incidence = sk_df['incidence'].tolist()


plt.figure(figsize=(10,5))
plt.xlabel('Year')
plt.ylabel('Incidence')
plt.plot(year, mb_incidence, color="blue")
plt.plot(year, ab_incidence, color="green")
plt.plot(year, sk_incidence, color="red")
plt.legend(['Manitoba', 'Alberta', 'Saskatchewan'])
plt.show()


In [None]:
ratio_df = ergot_df.groupby(['province', 'year'])['incidence'].count().reset_index()
ratio_df['ratio'] = (samples_df['incidence']/ratio_df['incidence']).to_frame()
ratio_df.drop(columns = {'incidence'}, inplace=True)
ratio_df

In [None]:
# Min, max for each province
mb_ratio = ratio_df[ratio_df['province'] == 'MB']['ratio'].tolist()
ab_ratio = ratio_df[ratio_df['province'] == 'AB']['ratio'].tolist()
sk_ratio = ratio_df[ratio_df['province'] == 'SK']['ratio'].tolist()


plt.figure(figsize=(10,5))
plt.xlabel('Year')
plt.ylabel('Incidence')
plt.plot(year, mb_ratio, color="blue")
plt.plot(year, ab_ratio, color="green")
plt.plot(year, sk_ratio, color="red")
plt.legend(['Manitoba', 'Alberta', 'Saskatchewan'])
plt.show()

print('Highest ratio in MB: {}, in year: {}'.format(max(mb_ratio), mb_ratio.index(max(mb_ratio)) + 1995))
print('Highest ratio in AB: {}, in year: {}'.format(max(ab_ratio), ab_ratio.index(max(ab_ratio)) + 1995))
print('Highest ratio in SK: {}, in year: {}'.format(max(sk_ratio), sk_ratio.index(max(sk_ratio)) + 1995))

print('Lowest ratio in MB: {}, in year: {}'.format(min(mb_ratio), mb_ratio.index(min(mb_ratio)) + 1995))
print('Lowest ratio in AB: {}, in year: {}'.format(min(ab_ratio), ab_ratio.index(min(ab_ratio)) + 1995))
print('Lowest ratio in SK: {}, in year: {}'.format(min(sk_ratio), sk_ratio.index(min(sk_ratio)) + 1995))

In [None]:
total_df = ergot_df.groupby(['year', 'car_uid'])['incidence'].count().reset_index()
region_df = ergot_df[ergot_df['incidence'] == True].groupby(['province', 'year', 'car_uid'])['incidence'].count().reset_index()
region_df['ratio'] = (region_df['incidence']/total_df['incidence']) * 100
region_df


In [None]:
regionQuery = sq.text("select district, color, geometry FROM public.census_ag_regions")
agRegions = gpd.GeoDataFrame.from_postgis(
    regionQuery, conn, crs="EPSG:3347", geom_col="geometry"
)

In [None]:
def color_map_color(value, cmap_name='Wistia', vmin=0, vmax=100):
    norm = colors.Normalize(vmin=vmin, vmax=vmax)
    cmap = cm.get_cmap(cmap_name)
    rgb = cmap(norm(abs(value)))[:3]
    color = colors.rgb2hex(rgb)
    return color

In [None]:
def get_color(ratio_year):
    color_map = []
    for district in agRegions['district'].tolist():
        if district in ratio_year['car_uid'].tolist():
            ratio = ratio_year[ratio_year['car_uid'] == district]['ratio'].tolist()[0]
            color_map.append(color_map_color(ratio))
        else:
            color_map.append(color_map_color(0))
    return pd.Series(color_map)

In [None]:
def plot_map(color_map, year):
    minx, miny, maxx, maxy = agRegions.total_bounds
    fig, ax = plt.subplots(figsize=(20, 20))
    ax.set_ylim(miny, maxy)
    ax.set_xlim(minx, maxx)
    ax.set_title("Incident level for district in " + str(year))
    agRegions.plot(ax=ax, color=color_map, edgecolor="black")
    agRegions.apply(
        lambda x: ax.annotate(
            text=x["district"],
            xy=x.geometry.centroid.coords[0],
            ha="center",
            color="black",
            size=10,
        ),
        axis=1,
    )
    plt.show()

In [None]:
for year in range(1995, 2023):
    ratio_year = region_df.loc[region_df['year'] == year]
    color = get_color(ratio_year)
    plot_map(color, year)