In [None]:
import matplotlib.pyplot as plt  # type: ignore
from dotenv import load_dotenv
import sqlalchemy as sq
import seaborn as sns  # type: ignore
import pandas as pd
import numpy as np
import os, sys

sys.path.append("../")
from Shared.DataService import DataService

Psuedocode:  
- Load the environment database variables
- Connect to the database

In [None]:
load_dotenv()
PG_DB = os.getenv("POSTGRES_DB")
PG_ADDR = os.getenv("POSTGRES_ADDR")
PG_PORT = os.getenv("POSTGRES_PORT")
PG_USER = os.getenv("POSTGRES_USER")
PG_PW = os.getenv("POSTGRES_PW")

In [None]:
if (
    PG_DB is None
    or PG_ADDR is None
    or PG_PORT is None
    or PG_USER is None
    or PG_PW is None
):
    raise ValueError("Environment variables not set")

db = DataService(PG_DB, PG_ADDR, int(PG_PORT), PG_USER, PG_PW)
conn = db.connect()

Purpose : Self contained data retrieval for the ergot visualization for the data before aggregation

Psuedocode: 
- Create the ergot data SQL query
- [Load the data from the database directly into a DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html) 
- drop irrelevant attributes
- [Compute pairwise correlation of columns, excluding NA/null values.](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corr.html)

In [None]:
ergotQuery = sq.text("SELECT * FROM agg_ergot_samples;")

ergotDF = pd.read_sql_query(ergotQuery, conn)

In [None]:
ergotDF.drop(columns=["district", "year"], inplace=True)

In [None]:
# Compute the correlation matrix
corr = ergotDF.corr()

In [None]:
corr

Purpose:
- The purpose of this code is to create a visually appealing heatmap to represent the correlation matrix. Heatmaps are an effective way to quickly identify patterns and relationships between variables in a dataset. 

Psuedocode:
- [Set the default theme ](https://seaborn.pydata.org/generated/seaborn.set_theme.html)
- [Create a Mase for Upper Triangle](https://numpy.org/doc/stable/reference/generated/numpy.triu.html)
- [Generate a Custom Colormap](https://seaborn.pydata.org/generated/seaborn.diverging_palette.html)
- [Create the heatmap](https://seaborn.pydata.org/generated/seaborn.heatmap.html)

In [None]:
sns.set_theme(style="white")

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=1.0,
    center=0,
    square=True,
    linewidths=0.5,
    cbar_kws={"shrink": 0.5},
)

Purpose:
- The purpose of these pair plots is to provide a quick visual exploration of the relationships and patterns between various variables in the DataFrame. 
- [sns.pairplot()](https://seaborn.pydata.org/generated/seaborn.pairplot.html)

In [None]:
sns.pairplot(ergotDF)
plt.show()

In [None]:
sns.pairplot(ergotDF[["percnt_true", "severity_in_neighbor"]])
plt.show()

In [None]:
sns.pairplot(ergotDF[["present_prev1", "present_prev2", "present_prev3"]])
plt.show()

In [None]:
db.cleanup()