In [None]:
import sys
import os

import json
import numpy as np
from skmultiflow.drift_detection import PageHinkley

#### Load the Sensor Data
Load the sensor data from the JSON file. Replace path_to_your_file with the actual path to your JSON file.

In [4]:
import pandas as pd

file_path = 'sensor_readings.json'  # Replace with your file path
with open(file_path, 'r') as file:
    sensor_data = json.load(file)
#print(sensor_data)
df = pd.DataFrame(sensor_data)

In [5]:
# Display the first few rows of the DataFrame
df.head()

# Display DataFrame information (data types, non-null values, etc.)
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   date                61 non-null     object
 1   flow_rate_readings  61 non-null     object
 2   pressure_readings   61 non-null     object
dtypes: object(3)
memory usage: 1.6+ KB


check for missing values in the data 

In [6]:
# Check for missing values
missing_values = df.isnull().sum()
print(missing_values)


date                  0
flow_rate_readings    0
pressure_readings     0
dtype: int64


Ensure that all the data are correct

In [7]:
# Check data types
print(df.dtypes)


date                  object
flow_rate_readings    object
pressure_readings     object
dtype: object


In [8]:
# Statistical summary
df.describe()

Unnamed: 0,date,flow_rate_readings,pressure_readings
count,61,61,61
unique,61,61,61
top,2023-10-01,"[11, 25, 7, 32, 21, 19, 16, 18, 22, 30, 21, 18...","[34, 52, 39, 64, 79, 31, 30, 139, 62, 32, 60, ..."
freq,1,1,1


In [9]:
#initialize the algorithm

In [10]:
# from skmultiflow.drift_detection import PageHinkley

ph = PageHinkley(min_instances=30, delta=0.005, threshold=50, alpha=0.9999)


Iterate Over Data and Detect Drift
Apply the Page-Hinkley method to your data. 

In [11]:

# Adjusting the provided code to filter and analyze data for November 30, 2023
target_date = '2023-10-29'  # Updated target date

# Filter the DataFrame for the target date
target_day_data = df[df['date'] == target_date]

# Initialize the Page-Hinkley change detection
change_points = []

if not target_day_data.empty:
    # Assuming 'flow_rate_readings' contains the readings for the day
    day_readings = target_day_data['flow_rate_readings'].iloc[0]

    # Initialize Page-Hinkley method
    ph = PageHinkley(min_instances=30, delta=0.005, threshold=50, alpha=0.9999)

    for i, value in enumerate(day_readings):
        ph.add_element(value)
        if ph.detected_change():
            print(f"Change detected on {target_date} at index: {i}, value: {value}")
            change_points.append((target_date, i, value))
else:
    print(f"No data available for {target_date}")


Change detected on 2023-10-29 at index: 28, value: 26
Change detected on 2023-10-29 at index: 138, value: 81
Change detected on 2023-10-29 at index: 175, value: 87
Change detected on 2023-10-29 at index: 204, value: 10
Change detected on 2023-10-29 at index: 267, value: 95
Change detected on 2023-10-29 at index: 316, value: 89
Change detected on 2023-10-29 at index: 345, value: 21
Change detected on 2023-10-29 at index: 409, value: 65


In [12]:
target_date = '2023-10-29'  # Target date for analysis

# Filter the DataFrame for the target date
target_day_data = df[df['date'] == target_date]

# Initialize the Page-Hinkley change detection for both flow rate and pressure
change_points_flow = []
change_points_pressure = []

# Initialize Page-Hinkley method
ph_flow = PageHinkley(min_instances=30, delta=0.005, threshold=50, alpha=0.9999)
ph_pressure = PageHinkley(min_instances=30, delta=0.005, threshold=50, alpha=0.9999)

if not target_day_data.empty:
    # Analyze flow rate readings
    if 'flow_rate_readings' in target_day_data.columns:
        day_flow_readings = target_day_data['flow_rate_readings'].iloc[0]
        for i, value in enumerate(day_flow_readings):
            ph_flow.add_element(value)
            if ph_flow.detected_change():
                print(f"Flow rate change detected on {target_date} at index: {i}, value: {value}")
                change_points_flow.append((target_date, i, value))

    # Analyze pressure readings
    if 'pressure_readings' in target_day_data.columns:
        day_pressure_readings = target_day_data['pressure_readings'].iloc[0]
        for i, value in enumerate(day_pressure_readings):
            ph_pressure.add_element(value)
            if ph_pressure.detected_change():
                print(f"Pressure change detected on {target_date} at index: {i}, value: {value}")
                change_points_pressure.append((target_date, i, value))
else:
    print(f"No data available for {target_date}")

# Output the change points
# change_points_flow, change_points_pressure


Flow rate change detected on 2023-10-29 at index: 28, value: 26
Flow rate change detected on 2023-10-29 at index: 138, value: 81
Flow rate change detected on 2023-10-29 at index: 175, value: 87
Flow rate change detected on 2023-10-29 at index: 204, value: 10
Flow rate change detected on 2023-10-29 at index: 267, value: 95
Flow rate change detected on 2023-10-29 at index: 316, value: 89
Flow rate change detected on 2023-10-29 at index: 345, value: 21
Flow rate change detected on 2023-10-29 at index: 409, value: 65
Pressure change detected on 2023-10-29 at index: 103, value: 72
Pressure change detected on 2023-10-29 at index: 132, value: 71
Pressure change detected on 2023-10-29 at index: 161, value: 75
Pressure change detected on 2023-10-29 at index: 190, value: 55
Pressure change detected on 2023-10-29 at index: 247, value: 85
Pressure change detected on 2023-10-29 at index: 276, value: 133
Pressure change detected on 2023-10-29 at index: 305, value: 45
Pressure change detected on 2023

In [13]:
print( change_points_flow)

[('2023-10-29', 28, 26), ('2023-10-29', 138, 81), ('2023-10-29', 175, 87), ('2023-10-29', 204, 10), ('2023-10-29', 267, 95), ('2023-10-29', 316, 89), ('2023-10-29', 345, 21), ('2023-10-29', 409, 65)]


In [14]:
#  function that detects a "possible error" based on three consecutive indices where a change was detected

In [15]:
def find_consecutive_changes(change_points):
    error_ranges = []
    for i in range(len(change_points) - 5):
        # Check if three consecutive indices are present
        if change_points[i][1] == change_points[i + 1][1] - 1 == change_points[i + 2][1] - 2:
            error_range = (change_points[i][1], change_points[i + 2][1])
            error_ranges.append(error_range)
    
    return error_ranges

# Apply the function to the flow rate and pressure change points
error_ranges_flow = find_consecutive_changes(change_points_flow)
error_ranges_pressure = find_consecutive_changes(change_points_pressure)

# Output the error ranges
print("Possible error in flow rate readings:", error_ranges_flow)
print("Possible error in pressure readings:", error_ranges_pressure)


Possible error in flow rate readings: []
Possible error in pressure readings: []
