In [3]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [4]:
from os import environ

from sqlalchemy import Connection, create_engine, sql
import pandas as pd
import altair as alt
from pandas import DataFrame
from dotenv import load_dotenv

load_dotenv()

ModuleNotFoundError: No module named 'pandas'

In [None]:
def get_database_connection() -> Connection:
    """
    Establishes a database connection to the database specified.
    """

    try:
        engine = create_engine(
            f"mssql+pymssql://{environ['DB_USER']}:{environ['DB_PASSWORD']}@{environ['DB_HOST']}/?charset=utf8")

        return engine.connect()

    except ConnectionError as error:
        print(error)


conn = get_database_connection()

: 

In [None]:
def load_all_plant_data(conn: Connection) -> DataFrame:

    conn.execute(sql.text("USE plants;"))

    query = sql.text(
        """SELECT plant_condition.plant_condition_id,
        plant_condition.at, plant_condition.soil_moisture,
        plant_condition.temperature, plant_condition.last_watered,
        plant.plant_id, plant.plant_name,
        plant.scientific_name, botanist.botanist_id, botanist.first_name,
        botanist.surname, botanist.email, botanist.phone_number,
        origin.origin_id, origin.latitude, origin.longitude, origin.region
        FROM s_epsilon.plant_condition
        JOIN s_epsilon.plant ON s_epsilon.plant.plant_id = s_epsilon.plant_condition.plant_id
        JOIN s_epsilon.botanist ON s_epsilon.botanist.botanist_id = s_epsilon.plant.botanist_id
        JOIN s_epsilon.origin ON s_epsilon.origin.origin_id = s_epsilon.plant.origin_id;""")

    conn.execute(sql.text("COMMIT;"))
    res = conn.execute(query).fetchall()
    df = pd.DataFrame(res)

    return df

df = load_all_plant_data(conn)

df.head(45)

: 

## Average soil moisture

In [None]:
df["soil_moisture"] = df["soil_moisture"].astype(float)

average_soil_moisture = df["soil_moisture"].mean()

average_soil_moisture

: 

In [None]:

def get_latest_data(df: DataFrame):
    """
    Returns the latest data of each plant id.
    """
    latest_indices = df.groupby('plant_id')['at'].idxmax()

    latest_conditions = df.loc[latest_indices]

    return latest_conditions



def get_latest_temperature_readings(plants: DataFrame):
    """
    Returns an altair bar chart that shows the latest temperature readings for each plant.
    """
    latest_data = get_latest_data(plants)

    latest_data = latest_data[["plant_name", "temperature"]]
    latest_temp_readings = alt.Chart(latest_data).mark_bar().encode(
        y=alt.Y('plant_name:N'),
        x='temperature:Q',
        tooltip=['plant_name:N', 'temperature:Q'],
        color=alt.Color('plant_name:N', legend=None).scale(scheme='blues')
    ).properties(
        title='Latest Temperature of Plants',
        width=600
    )
    return latest_temp_readings

get_latest_temperature_readings(df)

: 

In [None]:
unique = df["plant_name"].unique()

unique[0:2]

: 

In [None]:
from os import environ
from boto3 import client

from sqlalchemy import Connection, create_engine, sql
import pandas as pd
from pandas import DataFrame
from dotenv import load_dotenv

from parquet_extract import convert_to_df, download_parquet_files, remove_old_files, get_parquet


s3 = client("s3",
                aws_access_key_id=environ["AWS_ACCESS_KEY_ID"],
                aws_secret_access_key=environ["AWS_SECRET_ACCESS_KEY"])

download_parquet_files(s3, get_parquet(s3))

long_plants = convert_to_df()

remove_old_files()



def merge_long_and_short_dataframes(long_plants: DataFrame, plants: DataFrame):
    """
    Returns a merged dataframe.
    """

    long_plants["temperature"] = long_plants["temp"]
    merged = pd.concat([plants, long_plants])
    return merged


merged = merge_long_and_short_dataframes(long_plants, df)


merged

: 