In [74]:
import pandas as pd
import time
import numpy as np
from datetime import datetime as dt, timedelta

## Offline IN

In [75]:
def offline_interval_action_detection(glucose_data, extreme_high_threshold=250, high_threshold=180, low_threshold=80, extreme_low_threshold=55):
    """
    Crea intervalli per ciascun livello di glucosio nel dataset fornito.

    Argomenti:
    glucose_data : DataFrame
        Il DataFrame contenente i dati sui livelli di glucosio.
    extreme_high_threshold : int, default=250
        La soglia per il livello di glucosio estremamente alto.
    high_threshold : int, default=180
        La soglia per il livello di glucosio alto.
    low_threshold : int, default=80
        La soglia per il livello di glucosio basso.
    extreme_low_threshold : int, default=55
        La soglia per il livello di glucosio estremamente basso.

    Ritorna:
    intervals : list
        Una lista di tuple che rappresentano gli intervalli di tempo associati a ciascun livello di glucosio.
    """
    intervals, events = [], []
    for index, row in glucose_data.iloc[:10000].iterrows():
        timestamp = dt.fromisoformat(row['Data e ora (AAAA-MM-GGThh:mm:ss)'])
        glucose_value = row['Valore del glucosio (mg/dL)']
        
        if glucose_value == "Basso":
            event, symbol = 'extreme_low', 'd'
        else:
            glucose_level = int(glucose_value)
            if glucose_level >= extreme_high_threshold:
                event, symbol = 'extremely_high', 'a'
            elif glucose_level >= high_threshold:
                event, symbol = 'high', 'b'
            elif glucose_level <= extreme_low_threshold:
                event, symbol = 'extremely_low', 'e'
            elif glucose_level <= low_threshold:
                event, symbol = 'low', 'd'
            else:
                event, symbol = 'normal', 'c'

        if not intervals or intervals[-1][3] != event:
            intervals.append((symbol, timestamp, timestamp, event))
        else:
            intervals[-1] = (intervals[-1][0], intervals[-1][1], timestamp, event)

        events.append((symbol, event, glucose_level if glucose_value != "Basso" else "extreme_low"))

    return intervals, events


## Classi per la gestione del glucosio 

In [76]:

class Interval:
    def __init__(self, start_time, end_time, symbol=None, event=None, duration=None):
        self.symbol = symbol
        self.start_time = start_time
        self.end_time = end_time
        self.event = event
        self.duration = duration if duration else self.end_time - self.start_time

In [77]:
class ISEQL:
    def __init__(self):
        self.intervals = []

    def add_interval(self, interval):
        self.intervals.append(interval)

    def get_intervals(self):
        return self.intervals

    def find_event_interval(self, event_name):
        for interval in self.intervals:
            if interval.event == event_name:
                return interval
        return None

    def check_anomalies_event(self, interval):
        return interval.event != 'normal'

    def create_daily_intervals(self):
        daily_intervals = {}
        for interval in self.intervals:
            start_date = interval.start_time.date()
            if start_date not in daily_intervals:
                daily_intervals[start_date] = []
            daily_intervals[start_date].append(interval)

        # Imposta la frequenza a 1 giorno
        frequency_threshold = timedelta(days=1)

        result = {}
        for start_date, intervals in daily_intervals.items():
            end_date = start_date + frequency_threshold
            start_time = dt.combine(start_date, dt.min.time())
            end_time = dt.combine(end_date, dt.min.time())
            result[start_date] = {
                'intervals': intervals,
                'start_time': start_time,
                'end_time': end_time
            }

        return result


    def DURING(self, interval1, interval2):
        start_time1 = interval1.start_time
        end_time1 = interval1.end_time
        start_time2 = interval2.start_time
        end_time2 = interval2.end_time

        return start_time2 <= start_time1 and end_time1 <= end_time2


    def BEFORE(self, interval1, interval2, delta):
        """
        Check if interval1 occurs completely before interval2 with a specified delta.

        Args:
            interval1 (Interval): The first interval.
            interval2 (Interval): The second interval.
            delta (timedelta): The time difference threshold.

        Returns:
            bool: True if interval1 occurs completely before interval2 considering the delta, False otherwise.
        """
        # Check if interval1 ends before interval2 starts and the difference in time is less than or equal to delta
        return interval1.end_time < interval2.start_time <= interval1.end_time + delta


    def CARDINALITY_CONSTRAINTS(self, intervals, event, min_count):
        """
        Check if the number of intervals with a specific level within a given time frame meets the minimum count.

        Args:
            level (str): The level to count (e.g., "high", "low", "extremely high", "extremely low").
            min_count (int): The minimum number of occurrences required.
            time_frame_start (datetime): The start of the time frame to consider.
            time_frame_end (datetime): The end of the time frame to consider.

        Returns:
            bool: True if the count of intervals meeting the criteria is at least min_count, False otherwise.
        """
        count = 0
        for interval in intervals:
            if (interval.event == event):
                count += 1

        return count >= min_count

    def OVERLAP_PERCENTAGE_delta(self, interval, extremely_high_delta=timedelta(minutes=45),
                                 high_delta=timedelta(hours=1, minutes=30), low_delta=timedelta(minutes=30),
                                 extremely_low_delta=timedelta(minutes=10)):
        # Determine the delta threshold based on the event
        if interval.event == "extremely_high":
            delta = extremely_high_delta
        elif interval.event == "high":
            delta = high_delta
        elif interval.event == "low":
            delta = low_delta
        elif interval.event == "extremely_low":
            delta = extremely_low_delta
        else:
            return False  # If the event is not recognized, return False

        # Check if the interval's duration meets or exceeds the delta threshold
        return interval.duration >= delta

    def find_time_swing(self, time_threshold=timedelta(hours=2)):
        time_swings = []
        for i in range(len(self.intervals) - 2):
            interval1 = self.intervals[i]
            interval2 = self.intervals[i + 2]

            if self.BEFORE(interval1, interval2,
                                 time_threshold) and interval1.event != interval2.event and self.check_anomalies_event(
                    interval1) and self.check_anomalies_event(interval2):
                time_swings.append((interval1, interval2))
        return time_swings

    def find_too_frequent_glucose_anomalies(self, min_high=3, min_low=3, min_extremely_high=2, min_extremely_low=2):
        anomalous_frequency = []

        daily_intervals = self.create_daily_intervals()

        for start_date, interval_info in daily_intervals.items():
            start_time = interval_info['start_time']
            end_time = interval_info['end_time']

            # Create a reference interval for the current time frame
            reference_interval = Interval(start_time, end_time)

            # Initialize event lists
            extremely_high_event = []
            high_event = []
            low_event = []
            extremely_low_event = []

            # Process each interval within the daily interval
            for current_interval in interval_info['intervals']:
                # Check if the current interval occurs during the reference interval
                if self.DURING(current_interval, reference_interval):
                    if self.check_anomalies_event(current_interval):
                        if current_interval.event == 'extremely_high':
                            extremely_high_event.append(current_interval)
                        elif current_interval.event == 'high':
                            high_event.append(current_interval)
                        elif current_interval.event == 'low':
                            low_event.append(current_interval)
                        elif current_interval.event == 'extremely_low':
                            extremely_low_event.append(current_interval)

            # Apply cardinality constraints
            high_anomalous_count = len(high_event)
            low_anomalous_count = len(low_event)
            extremely_high_anomalous_count = len(extremely_high_event)
            extremely_low_anomalous_count = len(extremely_low_event)

            total_count = high_anomalous_count + low_anomalous_count + extremely_high_anomalous_count + extremely_low_anomalous_count

            if (self.CARDINALITY_CONSTRAINTS(high_event, "high", min_high) or
                    self.CARDINALITY_CONSTRAINTS(low_event, "low", min_low) or
                    self.CARDINALITY_CONSTRAINTS(extremely_high_event, "extremely_high", min_extremely_high) or
                    self.CARDINALITY_CONSTRAINTS(extremely_low_event, "extremely_low", min_extremely_low)):
                anomalous_frequency.append((
                    start_date,
                    end_time,
                    high_anomalous_count,
                    low_anomalous_count,
                    extremely_high_anomalous_count,
                    extremely_low_anomalous_count,
                    total_count
                ))

        return anomalous_frequency

    def find_too_frequent_time_swings(self,time_swing_threshold=timedelta(hours=2), min_ts=2):
        time_swings_too_frequent = []
        daily_intervals = self.create_daily_intervals()  # Ensure this returns a dict with intervals for each day

        for start_date, interval_info in daily_intervals.items():
            start_time = interval_info['start_time']
            end_time = interval_info['end_time']
            reference_interval = Interval(start_time, end_time)

            # Initialize list to store detected time swings
            time_swings = []

            intervals = interval_info['intervals']

            for i in range(len(intervals) - 2):
                interval1 = intervals[i]
                interval2 = intervals[i + 2]

                if self.DURING(interval1, reference_interval) and self.DURING(interval2, reference_interval):
                    if self.BEFORE(interval1, interval2,
                                         time_swing_threshold) and interval1.event != interval2.event and self.check_anomalies_event(
                            interval1) and self.check_anomalies_event(interval2):
                        time_swings.append((interval1, interval2))

                        # cardinality constrains
            if len(time_swings) >= min_ts:
                time_swings_too_frequent.append(time_swings)

        return time_swings_too_frequent


    def find_too_long_glucose_anomalies(self, extremely_high_delta=timedelta(minutes=45),
                                high_delta=timedelta(hours=1, minutes=30), low_delta=timedelta(minutes=30),
                                extremely_low_delta=timedelta(minutes=10)):
        anomalous_duration = []

        # Create a dictionary for event types and their corresponding thresholds
        thresholds = {
            "extremely_high": extremely_high_delta,
            "high": high_delta,
            "low": low_delta,
            "extremely_low": extremely_low_delta
        }

        for interval in self.intervals:
            event_type = interval.event

            # Determine the threshold for the current event type
            if event_type in thresholds:
                delta = thresholds[event_type]
            else:
                continue  # Skip unrecognized events

            # Check if the interval's duration meets or exceeds the threshold
            if interval.duration >= delta:
                anomalous_duration.append(interval)

        return anomalous_duration

    def find_time_swing_with_too_long_glucose_anomalies(self):
        time_swings_duration = []

        time_swings = self.find_time_swing()
        anomalous_duration = self.find_too_long_glucose_anomalies()
        for ad in anomalous_duration:
            for ts in time_swings:
                if ad == ts[0] or ad == ts[1]:
                    event = f"{ad.event.capitalize()} event"
                    time_swings_duration.append((ts[0], ts[1], f"{event}: {ad.duration}"))

        return time_swings_duration


## Data Crawler 

Evento a basso livello che rappresenta un nuovo pattern di livelli di glucosio

In [78]:
gl = pd.read_csv('../data/glucoseLevel.csv', delimiter=';')

colonne_specifiche = ['Tipo di evento','Sottotipo di evento', 'Data e ora (AAAA-MM-GGThh:mm:ss)', 'Valore del glucosio (mg/dL)']

gl = gl[colonne_specifiche].iloc[18:]
gl

Unnamed: 0,Tipo di evento,Sottotipo di evento,Data e ora (AAAA-MM-GGThh:mm:ss),Valore del glucosio (mg/dL)
18,EGV,,2024-02-13T00:01:32,125
19,EGV,,2024-02-13T00:06:33,123
20,EGV,,2024-02-13T00:11:33,118
21,EGV,,2024-02-13T00:16:32,120
22,EGV,,2024-02-13T00:21:33,123
...,...,...,...,...
25379,EGV,,2024-05-12T12:11:08,90
25380,EGV,,2024-05-12T12:16:08,87
25381,EGV,,2024-05-12T12:21:08,105
25382,EGV,,2024-05-12T12:26:08,117


Creazione tabella contente gli eventi principali con relativa soglia

In [79]:
# Definizione dei dati
data = {
        'Symbol': ['a','b', 'c', 'd', 'e'],
        'Event': ['Extremely High', 'High','Normal', 'Low', 'Extremely low'],
        'Thresold': [' >= 250', ' 180 <= x < 250', '80 < x < 180', '55 < x <= 80 ', ' <= 55']
}
dfe = pd.DataFrame(data)

dfe

Unnamed: 0,Symbol,Event,Thresold
0,a,Extremely High,>= 250
1,b,High,180 <= x < 250
2,c,Normal,80 < x < 180
3,d,Low,55 < x <= 80
4,e,Extremely low,<= 55



## Interval Labeling

Organizzare i dati in intervalli temporali: ogni momento temporale sarà associato a intervalli che rappresentano la situazione corrispondente, che può essere normale, alta, bassa o bassissima. Inoltre, identificheremo finestre temporali in cui si verificano pattern specifici di eventi.

In [80]:
results = offline_interval_action_detection(gl)

Popolazione del nuovo dataset e della classe ISEQL

In [81]:
data = []
iseql = ISEQL()
for interval_labeling in results[0]:
    duration = interval_labeling[2]-interval_labeling[1]
    interval_iseql = Interval(interval_labeling[1], interval_labeling[2], interval_labeling[0],interval_labeling[3], duration)
    iseql.add_interval(interval_iseql)
    data.append({
        'Symbol': interval_labeling[0],
        'Start time': interval_labeling[1],
        'End time': interval_labeling[2],
        'Event': interval_labeling[3],
        'Duration' : duration
    })
dfi = pd.DataFrame(data)



#dfi.to_csv(r"./glucose_level.csv")
dfi

Unnamed: 0,Symbol,Start time,End time,Event,Duration
0,c,2024-02-13 00:01:32,2024-02-13 10:46:33,normal,0 days 10:45:01
1,b,2024-02-13 10:51:33,2024-02-13 11:06:32,high,0 days 00:14:59
2,c,2024-02-13 11:11:33,2024-02-13 12:11:33,normal,0 days 01:00:00
3,b,2024-02-13 12:16:33,2024-02-13 13:06:33,high,0 days 00:50:00
4,c,2024-02-13 13:11:33,2024-02-13 17:31:33,normal,0 days 04:20:00
...,...,...,...,...,...
584,c,2024-03-18 11:53:06,2024-03-18 15:13:07,normal,0 days 03:20:01
585,d,2024-03-18 15:18:06,2024-03-18 15:33:06,low,0 days 00:15:00
586,c,2024-03-18 15:38:06,2024-03-18 18:13:06,normal,0 days 02:35:00
587,b,2024-03-18 18:18:06,2024-03-18 19:13:07,high,0 days 00:55:01


## Time swing

Rappresenta un rapido cambiamento nei livelli di glucosio del sangue in un breve lasso di tempo

In [82]:
time_swings = iseql.find_time_swings(timedelta(hours=3))
data = []
for time_swing in time_swings:
    time_swing = time_swing[1].start_time-time_swing[0].end_time
    data.append({
        'Day': time_swing[0].start_time.date(),  # Giorno del primo evento
        'First event': time_swing[0].event,
        'Second event': time_swing[1].event,
        'Duration time swing': time_swing,

    })
dfgs = pd.DataFrame(data)

dfgs

AttributeError: 'ISEQL' object has no attribute 'find_time_swings'

## Anomalous Frequency

 Numero di volte in cui i livelli di glucosio del paziente superano o scendono al di sotto delle soglie normali entro un determinato periodo di tempo

In [25]:
result = iseql.find_too_frequent_glucose_anomalies()

data = {
    "Day": [],
    "High Count": [],
    "Low Count": [],
    "Extremely High Count": [],
    "Extremely Low Count": [],
    "Total Count": [],
}

for (start_time, end_time, high_anomalous_count, low_anomalous_count, extremely_high_anomalous_count, extremely_low_anomalous_count, total_count) in result:
    data["Day"].append(f"{start_time}")
    data["High Count"].append(high_anomalous_count)
    data["Low Count"].append(low_anomalous_count)
    data["Extremely High Count"].append(extremely_high_anomalous_count)
    data["Extremely Low Count"].append(extremely_low_anomalous_count)
    data["Total Count"].append(total_count)

dfca = pd.DataFrame(data)
dfca

Unnamed: 0,Day,High Count,Low Count,Extremely High Count,Extremely Low Count,Total Count
0,2024-02-13,4,2,1,0,7
1,2024-02-14,7,2,0,0,9
2,2024-02-15,9,1,1,0,11
3,2024-02-16,6,0,1,0,7
4,2024-02-17,4,2,0,0,6
5,2024-02-18,3,4,0,0,7
6,2024-02-19,4,2,0,0,6
7,2024-02-20,7,1,1,0,9
8,2024-02-21,5,3,0,0,8
9,2024-02-23,3,13,0,1,17


## Time Swing too Frequent


In [83]:
# Adjusted function to use start_time and end_time correctly
time_swings_too_frequent = iseql.find_too_frequent_time_swings()

data = []
for swing_set in time_swings_too_frequent:
    event_details = []
    for interval1, interval2 in swing_set:
        time_swing = interval2.start_time - interval1.end_time
        event_details.append({
            'Day': f"{interval1.start_time.date()}",
            'First event': interval1.event,
            'Second event': interval2.event,
            'Duration time swing': time_swing,
        })

    # Create a summary for each set of swings
    data.append({
        'Number of Time Swings': len(swing_set),
        'Events': event_details
    })

dfgs = pd.DataFrame(data)
dfgs

Unnamed: 0,Number of Time Swings,Events
0,2,"[{'Day': '2024-02-13', 'First event': 'high', ..."
1,2,"[{'Day': '2024-02-15', 'First event': 'high', ..."
2,2,"[{'Day': '2024-02-16', 'First event': 'high', ..."
3,2,"[{'Day': '2024-02-20', 'First event': 'high', ..."
4,2,"[{'Day': '2024-02-23', 'First event': 'low', '..."
5,2,"[{'Day': '2024-02-25', 'First event': 'high', ..."
6,6,"[{'Day': '2024-02-27', 'First event': 'high', ..."
7,2,"[{'Day': '2024-02-29', 'First event': 'high', ..."
8,3,"[{'Day': '2024-03-01', 'First event': 'extreme..."
9,4,"[{'Day': '2024-03-03', 'First event': 'high', ..."


## Anomalous duration

Periodo di tempo durante il quale il livello di glucosio del paziente rimane fuori dalle soglie normali

In [62]:
result = iseql.find_too_long_glucose_anomalies()

data = {
    "Day": [intrvl.start_time.date() for intrvl in result],
    "Event": [intrvl.event for intrvl in result],
    "Start Time": [intrvl.start_time for intrvl in result],
    "End Time": [intrvl.end_time for intrvl in result],
    "Duration": [(intrvl.end_time - intrvl.start_time) for intrvl in result]
}

dfca = pd.DataFrame(data)

dfca

Unnamed: 0,Day,Event,Start Time,End Time,Duration
0,2024-02-13,extremely_high,2024-02-13 18:16:33,2024-02-13 19:16:34,0 days 01:00:01
1,2024-02-14,low,2024-02-14 01:46:33,2024-02-14 02:51:34,0 days 01:05:01
2,2024-02-17,high,2024-02-17 20:41:16,2024-02-17 22:11:16,0 days 01:30:00
3,2024-02-18,low,2024-02-18 11:26:16,2024-02-18 11:56:16,0 days 00:30:00
4,2024-02-18,low,2024-02-18 14:56:16,2024-02-18 15:31:15,0 days 00:34:59
5,2024-02-20,extremely_high,2024-02-20 11:06:16,2024-02-20 12:01:15,0 days 00:54:59
6,2024-02-21,low,2024-02-21 03:21:16,2024-02-21 04:31:15,0 days 01:09:59
7,2024-02-23,low,2024-02-23 01:36:16,2024-02-23 02:51:15,0 days 01:14:59
8,2024-02-23,low,2024-02-23 06:46:16,2024-02-23 07:56:15,0 days 01:09:59
9,2024-02-23,extremely_high,2024-02-23 23:41:15,2024-02-24 00:31:16,0 days 00:50:01


## Time Swing Duration

In [61]:
time_swings_duration = iseql.find_time_swing_with_too_long_glucose_anomalies()

data = []
for interval1, interval2, description in time_swings_duration:
    time_swing = interval2.start_time - interval1.end_time

    data.append({
        'Day': interval1.start_time.date(),  # Giorno del primo evento
        'First event': interval1.event,
        'Second event': interval2.event,
        'Duration time swing': time_swing,  # Conversione in secondi per esprimere la durata
        'Anomalous durations': description
    })

dfgs = pd.DataFrame(data)

dfgs


Unnamed: 0,Day,First event,Second event,Duration time swing,Anomalous durations
0,2024-02-14,high,low,0 days 01:04:59,Low event: 1:05:01
1,2024-02-18,low,high,0 days 01:30:01,Low event: 0:34:59
2,2024-02-21,high,low,0 days 01:25:01,Low event: 1:09:59
3,2024-02-23,high,low,0 days 00:55:01,Low event: 1:14:59
4,2024-02-25,low,high,0 days 01:25:01,Low event: 0:40:00
5,2024-02-26,high,low,0 days 00:40:00,Low event: 0:30:00
6,2024-03-11,high,low,0 days 01:19:59,Low event: 0:35:01
7,2024-03-13,high,low,0 days 01:00:00,Low event: 0:40:01
8,2024-03-18,high,low,0 days 01:05:00,High event: 1:50:00
9,2024-03-18,high,low,0 days 01:05:00,Low event: 0:30:00
