In [1]:
import os
import sys
import warnings
import numpy as np
import pandas as pd
import time
import datetime as dt
import re

from IPython.display import display, HTML, clear_output
from IPython.core.interactiveshell import InteractiveShell
import ipywidgets as widgets

from matplotlib import pyplot as plt
import seaborn as sns

from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
InteractiveShell.ast_node_interactivity = "all"
display(HTML("<style>.container { width:79% !important; }</style>"))

# easier-to-read notebook:
from IPython.display import display, HTML, clear_output
pd.options.display.max_columns = 70
pd.options.display.max_rows = 500
%load_ext autoreload
%autoreload 2

r = re.compile('.*maoz*.')
if len(list(filter(r.match, sys.path))) != 0:
    PHYTECH_DRIVE_PATH = os.environ['PHYTECH_DRIVE_PATH']
    GITHUB_PATH = PHYTECH_DRIVE_PATH + 'GitHub'
    DATA_WD = PHYTECH_DRIVE_PATH + '/Fruit_Growth_prediction/data'
    CERT_PATH = '/Users/maozdotan/Google Drive/My Drive/Data'
    sys.path.append(CERT_PATH)
    from sql_import_export import SqlImporter
    import cert_aws as c
    sql_importer = SqlImporter(database = c.database_research, user = c.user_research, password = c.password_research,
                                host = c.host_research, port = c.port_research, verbose=True)

local aws_cert.py loaded


In [2]:
import project_class_data_extract
from logic_parameters import default_latitude, default_height
from tqdm import tqdm_notebook
#import common_db as cdb

- You call the 'Project' class
- you bring in the data: project.load_sm_project_data
- you apply transformers on the data (to fill missing values, for example): project.apply_transformers()
 - and then you group the TS by depths:
    project.group_data_to_depths()

In [5]:
#project_results = find_not_responding_events(project_data)
query = f"""
    SELECT distinct(pm.project_id)
    FROM projects_metadata pm
    JOIN soil_sensors_metadata ssm
    ON pm.project_id = ssm.project_id
    JOIN project_irrigation_spans_v2 pis
    ON pis.project_id = pm.project_id
    WHERE time_zone like '%Los_Angeles'
    AND season = 2023
    AND type_id IN (90, 91, 92, 98, 117, 118, 124, 127, 135, 137)
    AND start_date >= CAST((CAST('{start_date}' AS timestamp)) AS date)
    """
sql_importer = SqlImporter(query=query, database=c.database_production, user=c.user_production, password=c.password_production,
                                host=c.host_production, port=c.port_production, verbose=True)

sql_importer.get_data()
project_list = sql_importer.data
project_list

Loaded table with 2316 lines from projects_metadata


Unnamed: 0,project_id
0,852000
1,857614
2,856835
3,852740
4,853861
...,...
2311,852319
2312,876357
2313,852562
2314,854644


In [7]:
SM_HOURLY_DIFF_FIRST_DEPTH = 0.5
SM_HOURLY_DIFF_SECOND_DEPTH = 0.25
MIN_IRR_AMOUNT = 1
MAX_RESPONDING_EVENTS = 1
LOW_RESPONSE_FACTOR = 0.5


#project_list = [852093, 871812, 851995, 852015]
summary_df = pd.DataFrame()
yesterday = (dt.date.today() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
start_date = (dt.date.today() - dt.timedelta(days=8)).strftime("%Y-%m-%d")
failed_list = []

In [7]:



for p_id in project_list['project_id'][:50]:
    #print(list(project_list['project_id']).index(p_id))
    try:
        project_data = load_project_data(project_id=p_id, min_date=start_date,
                      max_date=yesterday, min_depth=10, max_depth=91, debug=False)
        if len(project_data.df_irrigation) == 0:
            continue
        project_df = get_project_results(project_data)
        summary_df = pd.concat([summary_df,project_df],axis=0)
    except:
        failed_list.append(p_id)
        pass

display(summary_df[summary_df.not_responding_events_count > 0])

Unnamed: 0,project_id,sensor_id,probe_depths,irrigation_events,not_responding_events_count,event_timestamp,support_status,support_updated_at,remarks,link,timezone
0,852000,952505,"[16.0, 31.0]",13,2,"{2023-04-29 19:20:00, 2023-05-05 08:43:00, 202...",completed,2023-04-19 03:10:45.553121,"{, Not responding to irrigation}",https://app.phytech.com/12525/62175/852000,America/Los_Angeles
0,853861,947367,"[16.0, 31.0]",4,1,{2023-04-30 20:55:00},,,{Not responding to irrigation},https://app.phytech.com/3414/3386/853861,America/Los_Angeles
0,855327,930853,"[16.0, 31.0]",6,1,{2023-05-02 12:26:00},closed,2022-12-23 01:07:54.664161,{Not responding to irrigation},https://app.phytech.com/8535/39907/855327,America/Los_Angeles
0,855929,1148430,"[16.0, 31.0]",5,5,"{2023-05-02 08:45:00, 2023-05-03 07:20:00, 202...",closed,2023-04-11 01:27:42.094563,{Not responding to irrigation},https://app.phytech.com/9321/44177/855929,America/Los_Angeles
0,851614,1120854,"[16.0, 31.0]",5,1,{2023-05-03 10:15:00},completed,2023-02-17 04:10:13.348095,{},https://app.phytech.com/13369/58147/851614,America/Los_Angeles
0,853864,908848,"[16.0, 31.0]",10,3,"{2023-04-29 17:20:00, 2023-04-28 06:19:00, 202...",,,{},https://app.phytech.com/4274/3404/853864,America/Los_Angeles
0,852142,1120584,"[16.0, 31.0]",5,4,"{2023-05-02 11:15:00, 2023-05-04 11:15:00, 202...",completed,2023-03-15 03:10:10.539992,{},https://app.phytech.com/13370/62290/852142,America/Los_Angeles
0,853527,900987,"[16.0, 31.0]",3,2,"{2023-05-03 08:00:00, 2023-05-04 07:44:00}",completed,2022-07-20 02:11:23.589639,{Not responding to irrigation},https://app.phytech.com/4177/27006/853527,America/Los_Angeles
0,858091,1134612,"[16.0, 31.0]",7,5,"{2023-05-03 13:58:00, 2023-04-28 09:28:00, 202...",completed,2023-03-07 04:10:18.822967,{Not responding to irrigation},https://app.phytech.com/10428/50111/858091,America/Los_Angeles
0,855921,1148497,"[16.0, 31.0]",6,4,"{2023-05-01 09:35:00, 2023-05-03 06:29:00, 202...",new,2023-04-21 00:15:36.897139,"{Not responding to irrigation, }",https://app.phytech.com/9321/44173/855921,America/Los_Angeles


In [50]:
project_df[project_df.project_id==855929]['event_timestamp'][0]
total = 0
total+=1 if True else total
total

{Timestamp('2023-05-01 09:35:00'),
 Timestamp('2023-05-01 14:50:00'),
 Timestamp('2023-05-03 07:20:00'),
 Timestamp('2023-05-03 15:40:00')}

1

In [8]:
project_data = load_project_data(project_id=855929, min_date=start_date,
                  max_date=yesterday, min_depth=10, max_depth=91, debug=False)
if project_data.valid_project:
#project_data.multi_depths_sm
    project_data.df_irrigation

project_df = get_project_results(project_data, False) #find_not_responding_events(project_data,debug=False)
project_df

Unnamed: 0,project_id,amount,start_ts,end_ts,psi,start_date,start,end
0,855929,1.140883,1682958900000,1682961900000,34.662799,2023-05-01,2023-05-01 09:35:00,2023-05-01 10:25:00
1,855929,1.140883,1682977800000,1682980800000,35.678028,2023-05-01,2023-05-01 14:50:00,2023-05-01 15:40:00
2,855929,1.140883,1683042300000,1683045300000,43.074692,2023-05-02,2023-05-02 08:45:00,2023-05-02 09:35:00
3,855929,1.140883,1683123600000,1683126600000,39.883974,2023-05-03,2023-05-03 07:20:00,2023-05-03 08:10:00
4,855929,1.140883,1683153600000,1683156600000,35.532995,2023-05-03,2023-05-03 15:40:00,2023-05-03 16:30:00
5,855929,1.140883,1683393300000,1683396300000,34.227701,2023-05-06,2023-05-06 10:15:00,2023-05-06 11:05:00


irrigation event: 2023-05-01 09:35:00
0 {Timestamp('2023-05-01 09:35:00')}
0 {Timestamp('2023-05-01 09:35:00')}
not_responding flag: True
total: 1 ,2023-05-01 09:35:00 finished

            ##########################
irrigation event: 2023-05-01 14:50:00
1 {Timestamp('2023-05-01 14:50:00'), Timestamp('2023-05-01 09:35:00')}
1 {Timestamp('2023-05-01 14:50:00'), Timestamp('2023-05-01 09:35:00')}
not_responding flag: True
total: 2 ,2023-05-01 14:50:00 finished

            ##########################
irrigation event: 2023-05-02 08:45:00
responding well, 2023-05-02 08:45:00
not_responding flag: False
total: 4 ,2023-05-02 08:45:00 finished

            ##########################
irrigation event: 2023-05-03 07:20:00
4 {Timestamp('2023-05-03 07:20:00'), Timestamp('2023-05-01 14:50:00'), Timestamp('2023-05-01 09:35:00')}
4 {Timestamp('2023-05-03 07:20:00'), Timestamp('2023-05-01 14:50:00'), Timestamp('2023-05-01 09:35:00')}
not_responding flag: True
total: 5 ,2023-05-03 07:20:00 finished

   

Unnamed: 0,project_id,sensor_id,probe_depths,irrigation_events,not_responding_events_count,event_timestamp,support_status,support_updated_at,remarks,link,timezone
0,855929,1148430,"[16.0, 31.0]",6,7,"{2023-05-03 07:20:00, 2023-05-06 10:15:00, 202...",closed,2023-04-11 01:27:42.094563,{Low sensor respnse},https://app.phytech.com/9321/44177/855929,America/Los_Angeles


In [4]:
def find_not_responding_events(project_data, debug=False):
    not_responding_SM_sensors_project_dict,probe_events_dict = {},{}
    event_timestamp = set()
    probe_depths = project_data.df_sm_data.depth_cm.unique()
    total = 0
    for irr_event_counter,row in project_data.df_irrigation.iterrows():
        print(f"irrigation event: {project_data.df_irrigation.start[irr_event_counter]}")
        not_responding = False
        if row['amount'] > MIN_IRR_AMOUNT:
            for probe_depth in probe_depths:
                probe_events_list = []

                if debug:
                    print("\nprobe_depth", probe_depth)

                probe_dict = {}
                probe_depth_index = list(probe_depths).index(probe_depth)

                probe_local_saturation = project_data.local_saturation_by_depth[probe_depth_index][1]
                df = project_data.multi_depths_sm[probe_depth].reset_index(drop=False)
                df['date'] = df.local_time.dt.date
                #################################
                # Not responding conditions:
                #
                # time frame irrigation span -1hr/ +3hr
                # soil moisture hourly diff > SM_HOURLY_DIFF according to depth
                # initial probe moisture is less than local saturation minus 0.5%
                #################################
                df_irr_span = df[(df.local_time > project_data.df_irrigation.start[irr_event_counter] - pd.Timedelta(hours=1)) 
                                 & (df.local_time < project_data.df_irrigation.end[irr_event_counter] + pd.Timedelta(hours=4))
                                ]
                if debug:
                    display(project_data.df_irrigation.start[irr_event_counter],project_data.df_irrigation.end[irr_event_counter],
                            df_irr_span)
                
                
                SM_HOURLY_DIFF = SM_HOURLY_DIFF_FIRST_DEPTH if probe_depth_index==0 else SM_HOURLY_DIFF_SECOND_DEPTH
                probe_dict['remarks'] = ''
                # check if probe responding = at least one hourly diff above SM_HOURLY_DIFF
                if max(df_irr_span.sm_diff) < SM_HOURLY_DIFF:
                    # check if the probe initial moisture is near local saturation 
                    if ("not_responding" in project_data.flag) | (max(df_irr_span.sm_val.iloc[:3]) < probe_local_saturation - 0.5):
                        #counter+=1 # Not responding to irrigation
                        not_responding = True
                        #not_responding_sensor_id = int(project_data.df_sm_data[project_data.df_sm_data['depth_cm']==probe_depth].loc[0,'sensor_id'])
                        probe_events_list.append((probe_depth,project_data.df_irrigation.start[irr_event_counter]))
                        event_timestamp.add(project_data.df_irrigation.start[irr_event_counter])
                        probe_dict['remarks'] = 'Not responding to irrigation'
                        print(total,event_timestamp)
                # Find sensors with low response (low peak in sensor graph)
                    if (max(df_irr_span.sm_diff) > SM_HOURLY_DIFF*LOW_RESPONSE_FACTOR) & (max(df_irr_span.sm_diff) < SM_HOURLY_DIFF):
                        probe_dict['remarks'] = 'Low sensor respnse'
                else:
                    print(f"responding well, {project_data.df_irrigation.start[irr_event_counter]}")
                    break

                if debug:
                    display(f"max hourly diff: {max(df_irr_span.sm_diff)}",
                            f"probe local saturation: {probe_local_saturation}",
                            f"initial probe moisture: {max(df_irr_span.sm_val.iloc[:3])}")
                    
                probe_dict['irrigation_events'] = len(project_data.df_irrigation)
                probe_dict['events dates'] = probe_events_list
                probe_events_dict[probe_depth] = probe_dict
            
            print(f"not_responding flag: {not_responding}")
            total+=1 if not_responding else total
            print(f"""total: {total} ,{project_data.df_irrigation.start[irr_event_counter]} finished\n
            ##########################""")
            not_responding_SM_sensors_project_dict.update({'probe_depths':list(probe_depths),
                                                           'events_count':total,
                                                         'event_timestamp':event_timestamp,
                                                         'events_details':probe_events_dict})
    return(not_responding_SM_sensors_project_dict)

In [3]:
def find_not_responding_events(project_data, debug=False):
    not_responding_SM_sensors_project_dict,probe_events_dict = {},{}
    event_timestamp = set()
    probe_depths = project_data.df_sm_data.depth_cm.unique()
    for probe_depth in probe_depths:
        probe_events_list = []
        
        if debug:
            print("\nprobe_depth", probe_depth)
            
        probe_dict = {}
        counter = 0
        probe_depth_index = list(probe_depths).index(probe_depth)
        
        for irr_event_counter,row in project_data.df_irrigation.iterrows():
            #probe_depth = project_data.df_sm_data.depth_cm.unique()[0]
            probe_local_saturation = project_data.local_saturation_by_depth[probe_depth_index][1]
            df = project_data.multi_depths_sm[probe_depth].reset_index(drop=False)
            df['date'] = df.local_time.dt.date

            if row['amount'] > MIN_IRR_AMOUNT:
                #################################
                # Not responding conditions:
                #
                # time frame irrigation span -1hr/ +3hr
                # soil moisture hourly diff > SM_HOURLY_DIFF according to depth
                # initial probe moisture is less than local saturation minus 0.5%
                #################################
                df_irr_span = df[(df.local_time > project_data.df_irrigation.start[irr_event_counter] - pd.Timedelta(hours=1)) 
                                 & (df.local_time < project_data.df_irrigation.end[irr_event_counter] + pd.Timedelta(hours=4))
                                ]
                if debug:
                    display(project_data.df_irrigation.start[irr_event_counter],project_data.df_irrigation.end[irr_event_counter],
                            df_irr_span)
                
                
                SM_HOURLY_DIFF = SM_HOURLY_DIFF_FIRST_DEPTH if probe_depth_index==0 else SM_HOURLY_DIFF_SECOND_DEPTH
                probe_dict['remarks'] = ''
                # check if probe responding = at least one hourly diff above SM_HOURLY_DIFF
                if max(df_irr_span.sm_diff) < SM_HOURLY_DIFF:
                    # check if the probe initial moisture is near local saturation 
                    if ("not_responding" in project_data.flag) | (max(df_irr_span.sm_val.iloc[:3]) < probe_local_saturation - 0.5):
                        counter+=1 # Not responding to irrigation
                        not_responding_sensor_id = int(project_data.df_sm_data[project_data.df_sm_data['depth_cm']==probe_depth].loc[0,'sensor_id'])
                        probe_events_list.append((probe_depth,project_data.df_irrigation.start[irr_event_counter]))
                        event_timestamp.add(project_data.df_irrigation.start[irr_event_counter])
                        probe_dict['remarks'] = 'Not responding to irrigation'
                # Find sensors with low response (short peak in sensor graph)
                    if (max(df_irr_span.sm_diff) > SM_HOURLY_DIFF*LOW_RESPONSE_FACTOR) & (max(df_irr_span.sm_diff) < SM_HOURLY_DIFF):
                        probe_dict['remarks'] = 'Low sensor respnse'

                if debug:
                    display(f"max hourly diff: {max(df_irr_span.sm_diff)}",
                            f"probe local saturation: {probe_local_saturation}",
                            f"initial probe moisture: {max(df_irr_span.sm_val.iloc[:3])}")
                    
            probe_dict['irrigation_events'] = len(project_data.df_irrigation)
            probe_dict['not_responding_events_count'] = counter
            probe_dict['events dates'] = probe_events_list
            probe_events_dict[probe_depth] = probe_dict

    not_responding_SM_sensors_project_dict.update({'probe_depths':list(probe_depths),
                                                 'event_timestamp':event_timestamp,
                                                 'events_details':probe_events_dict})
    return(not_responding_SM_sensors_project_dict)

def get_project_results(project_data, debug_=False):
    project_results = find_not_responding_events(project_data,debug_)
    #project_df = pd.DataFrame.from_dict({k: v for k,v in probe_results.items()
    #                         if k in project_depths}).T.reset_index(drop=False).rename(columns=
    #                                                                                  {'index':'depth'})
    project_df = pd.DataFrame(columns=['project_id','sensor_id','probe_depths','irrigation_events',
                                      'not_responding_events_count','event_timestamp',
                                       'support_status', 'support_updated_at', 'remarks',
                                       'link','timezone'])
    project_dict = {}
    max_val = 0
    remarks = set()
    
    if not project_results['events_details']: # NO events of not responding
        pass
    else:
        # find probe with max not responding events
        for d in project_results.get('probe_depths'):
            #max_val = max(project_results['events_details'][d]['not_responding_events_count'],max_val)
            remarks.add(project_results.get('events_details')[d]['remarks'])
    event_count = project_results['events_count']
    if not project_data.sensor_support_status: # No support status information
        sensor_status = {'status':'None', 'updated_at':'None'}
    else:
        sensor_status = project_data.sensor_support_status_dict[0]

    project_dict.update({'project_id' : project_data.project_id,
                         'sensor_id' : project_data.sensor_id,
                         'probe_depths' : project_results.get('probe_depths'),
                         'irrigation_events' : len(project_data.df_irrigation),
                         'not_responding_events_count' : event_count, #max_val,
                         'event_timestamp' : project_results.get('event_timestamp'),
                         'support_status' : sensor_status['status'],
                         'support_updated_at' : sensor_status['updated_at'],
                         'remarks' : remarks,
                         'link' : project_data.app_link,
                         'timezone' : project_data['timezone']})

    project_df = project_df.append(project_dict,ignore_index=True)

    return(project_df)

In [412]:
query = f"""
            SELECT serial_number as sensor_id, updated_at, status
            FROM work_order_line_items
            WHERE serial_number = '{project_data.sensor_id}'
            --AND status not in ('closed','completed')
            order by updated_at desc
            limit 1
        """
sql_importer = SqlImporter(query=query, database=c.database_ruby_production, user=c.user_production, password=c.password_production, host=c.host_production, port=c.port_production, verbose=True)

sql_importer.get_data()
sql_importer.data

Loaded table with 1 lines from work_order_line_items



Unnamed: 0,sensor_id,updated_at,status
0,934741,2023-04-27 08:52:33.235331,new


In [5]:
def load_project_data(project_id, min_date, max_date, min_depth=10, max_depth=91, debug=True):
    from logic_parameters import default_latitude, default_height,default_max_depth
    from project_class_data_extract import Project
    project = Project(
        project_id=project_id,
        min_depth=min_depth,
        max_depth=default_max_depth,  # set the depth range we're interested in
        min_date=min_date,
        max_date=max_date,
        debug=debug)

   
    project.load_project_metadata()
    project.get_sm_depths()

    if len(project.depths_found) == 0:
        project.valid_project = False
        return project

    project.load_sm_project_data(min_depth=project.depths_found[0], max_depth=project.depths_found[-1], )  # change the min/max_depth if you dont want to load all depths
    if not project.valid_project:
        return project
    #project.load_project_weather_data(future=14)  # load the weather date until max_date + 14 days.
    project.apply_transformers()
    project.group_data_to_depths()
    project.load_irrigation_spans()
    project.find_probe_local_saturation()

    project.meta_data = {'project_id': project.project_id, 'latitude': project.latitude if project.latitude else default_latitude,
                         'height': project.height if project.height else default_height, 'app_link': project.app_link}
    project.sensor_support_status_dict = project.get_sensor_support_status()
    
    return project

In [29]:
def transform_data(project_data, depth, max_datetime, max_history_days):
    from config.logic_parameters import max_hours_after_peak, rise_th, peak_th
    dt = DataTransformer(project_data['meta_data'], project_data['multi_depths_sm'], project_data['weather_data'], depth=depth, max_history_days=max_history_days,
                         max_datetime=max_datetime, rise_th=rise_th, peak_th=peak_th)

    dt.collect_historical_peak2rise_events()
    dt.calc_sm_diffs()
    dt.extract_data_from_p2r(max_hours_after_peak=max_hours_after_peak)
    dt.add_lower_depth_data()
    dt.add_weather_data()
    project_data.update({'model_data': dt.data})

    model_data = dict(meta_data=project_data['meta_data'], model_data=project_data['model_data'])
    run_data = {'depth': dt.depth, 'max_datetime': dt.max_datetime, 'max_history_days': dt.max_history_days}
    model_data.update(run_data)
    return model_data

In [None]:
def load_irrigation_spans(self):
        query = f"""
            SELECT project_id, amount, start_ts, end_ts, psi, start_date, 
            timezone('{self.timezone}', to_timestamp(start_ts / 1000)) start_lt,
            timezone('{self.timezone}', to_timestamp(end_ts / 1000)) end_lt
            FROM {DB_TABLES['project_irrigation_spans']}
            WHERE project_id = {self.project_id} 
            AND start_date >= CAST((CAST('{self.min_date}' AS timestamp)) AS date)
            AND start_date <= CAST((CAST('{self.max_date}' AS timestamp)) AS date)
        """

        sql_importer = SqlImporter(query, conn_str=os.environ['DATABASE_URL'], verbose=self.debug)
        sql_importer.get_data()
        
        self.df_irrigation = sql_importer.data.copy().dropna()
        self.df_irrigation.rename(columns={'start_lt': 'start', 'end_lt': 'end'}, inplace=True)