## imports

In [11]:
from os import environ as ENV
from datetime import datetime, timezone
from dotenv import load_dotenv
from pymssql import connect
import pandas as pd
from boto3 import client

## load data in

In [2]:
load_dotenv()

True

In [3]:
def get_db_connection(config: dict) -> connect:
    """Returns database connection."""

    return connect(
        server=config["DB_HOST"],
        port=config["DB_PORT"],
        user=config["DB_USER"],
        database=config["DB_NAME"],
        password=config["DB_PASSWORD"],
        as_dict=True
    )

In [4]:
connection = get_db_connection(ENV)

In [5]:
def get_df(conn: connect) -> pd.DataFrame:
    """Returns a Dataframe of method data from database."""

    query = """ 
            SELECT *
            FROM s_beta.recording AS r
            FULL JOIN s_beta.plant AS p
                ON r.plant_id = p.plant_id
            """
    
    with conn.cursor() as cur:
        cur.execute(query)
        rows = cur.fetchall()

    df = pd.DataFrame(rows)[
        ["plant_id", "plant_name", "scientific_name", "soil_moisture", "temperature"]]
    
    return df

In [6]:
data = get_df(connection)

In [7]:
data

Unnamed: 0,plant_id,plant_name,scientific_name,soil_moisture,temperature
0,0,Epipremnum Aureum,Epipremnum aureum,94.2520,13.2095
1,1,Venus flytrap,,93.6619,12.0518
2,2,Corpse flower,,95.0359,9.1838
3,3,Rafflesia arnoldii,,97.0180,10.0402
4,4,Black bat flower,,91.3188,11.3533
...,...,...,...,...,...
1753,45,Begonia,Begonia 'Art Hodes',36.3051,8.9875
1754,46,Medinilla Magnifica,Medinilla magnifica,39.7980,13.3047
1755,47,Calliandra Haematocephala,Calliandra haematocephala,2.9719,13.3351
1756,48,Zamioculcas Zamiifolia,Zamioculcas zamiifolia,37.2374,14.8864


## transform data

### summary

In [8]:
def get_summary(df: pd.DataFrame) -> pd.DataFrame:
    """Gets 1 mean per parameter per plant.
    Returns pd.DF."""
    
    df = df.groupby(["plant_id", "plant_name", "scientific_name"],as_index=False
                    ).agg(["mean", "std", "min", "max"]
                          ).droplevel(1, axis=1)
    
    df.columns = ['plant_id', 'plant_name', 'scientific_name'] + \
        [param+"_"+metric
         for param in ['soil_moisture', 'temperature']
         for metric in ["mean", "std", "min", "max"]]
    
    return df

In [9]:
summary = get_summary(data)

In [10]:
summary

Unnamed: 0,plant_id,plant_name,scientific_name,soil_moisture_mean,soil_moisture_std,soil_moisture_min,soil_moisture_max,temperature_mean,temperature_std,temperature_min,temperature_max
0,0,Epipremnum Aureum,Epipremnum aureum,78.122289,25.256592,36.1562,94.252,13.555887,2.047374,13.1356,25.6808
1,5,Pitcher plant,Sarracenia catesbaei,81.25853,24.909889,40.6315,97.4099,12.13133,3.982354,11.1584,29.1808
2,6,Wollemi pine,Wollemia nobilis,76.748311,24.621489,37.3608,92.974,11.554414,2.472303,10.946,23.1398
3,8,Bird of paradise,Heliconia schiedeana 'Fire and Ice',76.309708,25.266831,32.1192,91.7218,11.601586,0.430215,11.4868,14.1283
4,9,Cactus,Pereskia grandifolia,78.334816,25.439602,36.8522,94.832,10.726816,0.76208,10.5782,15.2341
5,11,Asclepias Curassavica,Asclepias curassavica,78.134094,24.046534,36.8174,92.997,12.756372,16.564403,9.4361,108.0111
6,14,Colocasia Esculenta,Colocasia esculenta,79.173289,24.755318,38.7978,95.256,13.288322,0.921718,13.0936,18.7336
7,16,Euphorbia Cotinifolia,Euphorbia cotinifolia,66.054386,37.604992,4.7355,90.4646,13.290114,0.033154,13.2622,13.3546
8,17,Ipomoea Batatas,Ipomoea batatas,77.217558,25.753691,36.0109,94.2281,10.150694,2.124394,9.7414,22.5272
9,19,Musa Basjoo,Musa basjoo,77.861232,23.707122,36.3864,92.3086,14.412657,2.745477,12.7998,29.6748


### anomalies

In [None]:
def is_healthy_moisture(df: pd.DataFrame) -> bool:
    
    last_hour = pd.Timestamp(datetime.now(timezone.utc)-timedelta(hours=1))
    df_in_last_hour = df[(df['recording_taken'] >= last_hour)]
    
    mean_moist = df.groupby('plant_id')['soil_moisture'].mean().reset_index()
    std_moist = df.groupby('plant_id')['soil_moisture'].std().reset_index()
    
    merged_df = pd.merge(mean_moist, std_moist, on='plant_id').rename(
        columns={'soil_moisture_x': 'mean', 'soil_moisture_y': 'std'})
    merged_df['anomalous +'] = merged_df['mean'] + \
        merged_df['std'].apply(lambda x: x*2)
    merged_df['anomalous -'] = merged_df['mean'] - \
        merged_df['std'].apply(lambda x: x*2)
    merge_2 = pd.merge(merged_df, df_in_last_hour, on='plant_id')
    merge_2 = merge_2.apply(lambda x: (x['anomolous -'] <= x['temperature']) & (
        x['temperature'] <= x['anomolous +']), axis=1).tolist()
    return False in merge_2


def is_healthy_temp(df: pd.DataFrame) -> bool:
    last_hour = pd.Timestamp(datetime.now(timezone.utc)-timedelta(hours=1))
    df_in_last_hour = df[(df['recording_taken'] >= last_hour)]
    mean_temperature = df.groupby(
        'plant_id')['temperature'].mean().reset_index()
    std_temperature = df.groupby('plant_id')['temperature'].std().reset_index()
    merged_df = pd.merge(mean_temperature, std_temperature, on='plant_id').rename(
        columns={'temperature_x': 'mean', 'temperature_y': 'std'})
    merged_df['anomolous +'] = merged_df['mean'] + \
        merged_df['std'].apply(lambda x: x*1.5)
    merged_df['anomolous -'] = merged_df['mean'] - \
        merged_df['std'].apply(lambda x: x*1.5)
    merge_2 = pd.merge(merged_df, df_in_last_hour, on='plant_id')
    merge_2 = merge_2.apply(lambda x: (x['anomolous -'] <= x['temperature']) & (
        x['temperature'] <= x['anomolous +']), axis=1).tolist()
    return False in merge_2

## upload data

In [None]:
S3 = client('s3',
            aws_access_key_id=ENV["AWS_ACCESS_KEY_ID"],
            aws_secret_access_key=ENV["AWS_SECRET_ACCESS_KEY"])

In [None]:
def upload_object(client: client, file: str, bucket: str, key: str) -> None:
    """Upload file to S3 bucket.
    Returns nothing."""
    
    client.upload_file(file, bucket, key)

In [None]:
# upload_object(mean.csv, "late-ordovician", )

## clear database