In [1]:
from matplotlib import pyplot as plt
import json
import pandas as pd
import numpy as np
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
import stumpy
import matplotlib.dates as dates
from matplotlib.patches import Rectangle
import datetime as dt
from collections import Counter
import urllib
import ssl
from tqdm.notebook import tqdm
import io
import os
from kando import kando_client
%run utils.ipynb

In [4]:
with open('key.json') as f:
    api_login = json.load(f)

url = "https://kando-staging.herokuapp.com"
client = kando_client.client(url, api_login['key'], api_login['secret'])

In [5]:
RESAMPLE = '15min'
WINDOW = 12
THRESHOLD = 0.05
POINT_ID = 1377

In [6]:
def _parser(node, graph):
    # recursively build graph from end node
    if len(node['children']) == 0:
        graph.add_node(node['point_id'], name=node['point']['name'])
        return
    for child in node['children']:
        graph.add_edges_from([(node['point_id'], child['point_id'])],
                             weight=child['parent_distance'])
        _parser(child, graph)


def get_graph(point_id):
    g = client.network_graph(point_id)
    G = nx.DiGraph()
    _parser(g, G)
    return G.reverse(
    )  # we reverse because "children" in API are parents in NetworkX terms

In [7]:
import json
import networkx as nx

In [8]:
G = get_graph(1012)

Kando - GET /api/data/network_graph?point_id=1012


In [9]:
gichon_nodes = nx.get_node_attributes(G, 'name')

In [10]:
gichon_nodes

{2045: 'מאסף שורק ישן',
 941: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 963: 'תעשייתי',
 1159: 'אחוד',
 1056: 'אחוד',
 1087: 'אחוד',
 1133: 'אחוד',
 1088: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1134: 'אחוד',
 1130: 'אחוד',
 1085: 'אחוד',
 1061: 'אחוד',
 1238: 'שוחה אחרונה',
 1062: 'אחוד',
 1531: 'כוללת',
 1127: 'אחוד',
 1164: 'אחוד',
 1076: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 990: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 966: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 999: 'תעשייתי',
 1111: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1078: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1023: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1057: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1155: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1530: 'חיצונית כוללת',
 933: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 987: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 977: 'שוחה אחרונה לפני הזרמת השפכים לביוב העירוני',
 1917: 'מאסף',
 898

In [11]:
mifal = {k:v for k,v in gichon_nodes.items() if client.get_data(k)['point']['group']['sector']['name'] == 'Main Collector*'}    

Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=941&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=963&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1159&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1056&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1087&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1133&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1088&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1134&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1130&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1085&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1061&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1238&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1062&unit_id=&st

Kando - GET /api/data/fetch?point_id=1044&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1052&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1034&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1149&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1146&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1138&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1082&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1091&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1074&unit_id=&start=&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1112&unit_id=&start=&end=&raw_data=


In [12]:
collectors = list(mifal.keys())
collectors

[2045, 1531, 1545, 1333, 1561, 1332, 2385, 2391, 2387]

In [49]:
def create_sectorial_dataframe(sites_list, start_date=2016):
    for i, site in tqdm(enumerate(sites_list)):
        print(f'getting info from {site}')
        site_dic = client.get_all(point_id=site, start=start_date)            
        if len(site_dic['samplings'])>0:
            print(f'creating a dataframe')
            sector_tmp = get_data_for_sectorial_motif_detection(site_dic)
            print(f'adding datafram to the sectorial dataframe')
            if i==0:
                sector = sector_tmp
            else:
                sector = pd.concat([sector, sector_tmp])
        else:
            print(f'No data about site {site}')
    sector.reset_index(drop=True, inplace=True)
    return sector

In [53]:
def get_data_for_sectorial_motif_detection(site_dic):

    df = pd.DataFrame.from_dict(site_dic['samplings'], orient='index')[[
        'DateTime', 'PH', 'EC', 'ORP', 'TEMPERATURE'
    ]]
    df['date'] = df['DateTime']
    df['DateTime'] = pd.to_datetime(df['DateTime'], unit='s')
    df = df.set_index('DateTime')

    impute_nulls_with_time_interpolation(df, df.columns , '5min')
    df['date'] = pd.to_datetime(df['date'], unit='s')
    df['weekday'] = df.date.apply(lambda x: x.weekday())
    df['month'] = df.date.apply(lambda x: x.month)
    df['hour'] = df.date.apply(lambda x: x.hour)
    
    values = [
        site_dic['point_id'], site_dic['point']['pipe_info']['channel_shape'],
        site_dic['point']['pipe_info']['diameter'],
        site_dic['point']['group']['water_authority']['id'],
        site_dic['point']['group']['sector']['id']
    ]
    df[['point_id', 'channel_shape', 'diameter', 'water_authority',
        'sector']] = values

    return (df)

In [54]:
df = create_sectorial_dataframe(collectors)









0it [00:00, ?it/s][A[A[A[A[A[A[A[A

getting info from 2045
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1543816800&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1551307500&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1573277400&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1581095400&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1588677900&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2045&unit_id=&start=1593918000&end=&raw_data=
creating a dataframe










1it [00:31, 31.08s/it][A[A[A[A[A[A[A[A

adding datafram to the sectorial dataframe
getting info from 1531
Kando - GET /api/data/fetch?point_id=1531&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1531&unit_id=&start=1512648240&end=&raw_data=
creating a dataframe










2it [00:35, 22.97s/it][A[A[A[A[A[A[A[A

adding datafram to the sectorial dataframe
getting info from 1545
Kando - GET /api/data/fetch?point_id=1545&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1545&unit_id=&start=1509030120&end=&raw_data=










3it [00:37, 16.64s/it][A[A[A[A[A[A[A[A

creating a dataframe
adding datafram to the sectorial dataframe
getting info from 1333
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1499289600&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1507028100&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1514664900&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1527043800&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1534543500&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1542537000&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1553383800&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1560881700&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1568372400&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1333&unit_id=&start=1575855600&end=&raw_data=
Kando 









4it [01:49, 33.37s/it][A[A[A[A[A[A[A[A

adding datafram to the sectorial dataframe
getting info from 1561
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1530606000&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1538062800&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1545572700&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1553721900&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1561182300&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1568617200&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1576769400&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1584392100&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1591962300&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1561&unit_id=&start=1593918000&end=&raw_data=
creating a dataframe










5it [02:46, 40.49s/it][A[A[A[A[A[A[A[A

adding datafram to the sectorial dataframe
getting info from 1332
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1500592800&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1508546700&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1516689600&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1525740600&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1533222900&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1540712400&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1548772500&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1556246100&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1563732000&end=&raw_data=
Kando - GET /api/data/fetch?point_id=1332&unit_id=&start=1571226000&end=&raw_data=
Kando - GET /api/data/fetch









6it [03:59, 50.31s/it][A[A[A[A[A[A[A[A

adding datafram to the sectorial dataframe
getting info from 2385
Kando - GET /api/data/fetch?point_id=2385&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2385&unit_id=&start=None&end=&raw_data=










7it [04:01, 35.73s/it][A[A[A[A[A[A[A[A

No data about site 2385
getting info from 2391
Kando - GET /api/data/fetch?point_id=2391&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2391&unit_id=&start=None&end=&raw_data=










8it [04:03, 25.52s/it][A[A[A[A[A[A[A[A

No data about site 2391
getting info from 2387
Kando - GET /api/data/fetch?point_id=2387&unit_id=&start=2016&end=&raw_data=
Kando - GET /api/data/fetch?point_id=2387&unit_id=&start=None&end=&raw_data=










9it [04:04, 27.22s/it][A[A[A[A[A[A[A[A

No data about site 2387





In [55]:
df

Unnamed: 0,PH,EC,ORP,TEMPERATURE,date,weekday,month,hour,point_id,channel_shape,diameter,water_authority,sector
0,6.900,1304.0,-467.0,28.4,2018-09-05 03:05:00,2,9,3,2045,circular_pipe,0.125,24,48
1,6.900,1312.0,-466.0,28.4,2018-09-05 03:10:00,2,9,3,2045,circular_pipe,0.125,24,48
2,6.800,1328.0,-466.0,28.3,2018-09-05 03:15:00,2,9,3,2045,circular_pipe,0.125,24,48
3,6.800,1336.0,-465.0,28.3,2018-09-05 03:20:00,2,9,3,2045,circular_pipe,0.125,24,48
4,6.800,1344.0,-465.0,28.3,2018-09-05 03:25:00,2,9,3,2045,circular_pipe,0.125,24,48
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1003582,1.544,96.0,359.0,23.9,2020-07-05 02:10:00,6,7,2,1332,circular_pipe,0.250,24,48
1003583,1.542,96.0,359.0,23.9,2020-07-05 02:20:00,6,7,2,1332,circular_pipe,0.250,24,48
1003584,1.541,96.0,359.0,24.0,2020-07-05 02:30:00,6,7,2,1332,circular_pipe,0.250,24,48
1003585,1.546,96.0,359.0,24.0,2020-07-05 02:40:00,6,7,2,1332,circular_pipe,0.250,24,48


In [57]:
df.to_pickle("./gichon_collector.pkl")

In [2]:
df = pd.read_pickle("./gichon_collector.pkl")

In [3]:
df

Unnamed: 0,PH,EC,ORP,TEMPERATURE,date,weekday,month,hour,point_id,channel_shape,diameter,water_authority,sector
0,6.900,1304.0,-467.0,28.4,2018-09-05 03:05:00,2,9,3,2045,circular_pipe,0.125,24,48
1,6.900,1312.0,-466.0,28.4,2018-09-05 03:10:00,2,9,3,2045,circular_pipe,0.125,24,48
2,6.800,1328.0,-466.0,28.3,2018-09-05 03:15:00,2,9,3,2045,circular_pipe,0.125,24,48
3,6.800,1336.0,-465.0,28.3,2018-09-05 03:20:00,2,9,3,2045,circular_pipe,0.125,24,48
4,6.800,1344.0,-465.0,28.3,2018-09-05 03:25:00,2,9,3,2045,circular_pipe,0.125,24,48
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1003582,1.544,96.0,359.0,23.9,2020-07-05 02:10:00,6,7,2,1332,circular_pipe,0.250,24,48
1003583,1.542,96.0,359.0,23.9,2020-07-05 02:20:00,6,7,2,1332,circular_pipe,0.250,24,48
1003584,1.541,96.0,359.0,24.0,2020-07-05 02:30:00,6,7,2,1332,circular_pipe,0.250,24,48
1003585,1.546,96.0,359.0,24.0,2020-07-05 02:40:00,6,7,2,1332,circular_pipe,0.250,24,48


In [4]:
cols_for_matrix_profile = ['EC', 'PH', 'ORP', 'TEMPERATURE']
mps = create_matrix_profile_dictionary(df, cols_for_matrix_profile, m=36)

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




  keepdims=keepdims)


KeyboardInterrupt: 

In [None]:
a_file = open("colliectors_matrix_profile.json", "w")
json.dump(mps, a_file)
a_file.close()

In [None]:
[244,]