# Project 1: Extracting Time Series Properties of Glucose Levels in Artificial Pancreas Purpose

In [1]:
import pandas as pd
import datetime as dt
import time
import math

### CGM (Continuous Glucose Monitor) and Insulin Datasets

In [2]:
insulin_data_set_full = pd.read_csv('InsulinData.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
insulin_data = insulin_data_set_full[['Date', 'Time', 'Alarm']]

In [4]:
cgm_data_set_full = pd.read_csv('CGMData.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
cgm_data = cgm_data_set_full[['Date', 'Time', 'Sensor Glucose (mg/dL)']]

In [6]:
insulin_data

Unnamed: 0,Date,Time,Alarm
0,2/12/2018,13:20:53,
1,2/12/2018,13:20:48,Other Alarm Code: 112
2,2/12/2018,13:18:48,
3,2/12/2018,13:18:48,
4,2/12/2018,13:12:33,
...,...,...,...
41430,7/24/2017,19:00:01,
41431,7/24/2017,18:59:44,
41432,7/24/2017,18:59:44,
41433,7/24/2017,18:59:44,


In [7]:
cgm_data

Unnamed: 0,Date,Time,Sensor Glucose (mg/dL)
0,2/12/2018,13:22:27,118.0
1,2/12/2018,13:17:27,122.0
2,2/12/2018,13:12:27,
3,2/12/2018,13:07:27,
4,2/12/2018,13:02:27,
...,...,...,...
55338,7/25/2017,12:28:54,311.0
55339,7/25/2017,12:23:54,311.0
55340,7/25/2017,12:18:54,309.0
55341,7/25/2017,12:13:54,310.0


### Combining Date and Time as DateTime object

In [8]:
#insulin_date_time_series.apply(lambda x: dt.datetime.strptime(x, '%m/%d/%Y %H:%M:%S'))

In [9]:
insulin_data['DateTime'] = pd.to_datetime(insulin_data['Date'] + " " + insulin_data['Time'], format = '%m/%d/%Y %H:%M:%S')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [10]:
insulin_data

Unnamed: 0,Date,Time,Alarm,DateTime
0,2/12/2018,13:20:53,,2018-02-12 13:20:53
1,2/12/2018,13:20:48,Other Alarm Code: 112,2018-02-12 13:20:48
2,2/12/2018,13:18:48,,2018-02-12 13:18:48
3,2/12/2018,13:18:48,,2018-02-12 13:18:48
4,2/12/2018,13:12:33,,2018-02-12 13:12:33
...,...,...,...,...
41430,7/24/2017,19:00:01,,2017-07-24 19:00:01
41431,7/24/2017,18:59:44,,2017-07-24 18:59:44
41432,7/24/2017,18:59:44,,2017-07-24 18:59:44
41433,7/24/2017,18:59:44,,2017-07-24 18:59:44


In [11]:
cgm_data['DateTime'] = pd.to_datetime(cgm_data['Date'] + " " + cgm_data['Time'], format = '%m/%d/%Y %H:%M:%S')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [12]:
cgm_data

Unnamed: 0,Date,Time,Sensor Glucose (mg/dL),DateTime
0,2/12/2018,13:22:27,118.0,2018-02-12 13:22:27
1,2/12/2018,13:17:27,122.0,2018-02-12 13:17:27
2,2/12/2018,13:12:27,,2018-02-12 13:12:27
3,2/12/2018,13:07:27,,2018-02-12 13:07:27
4,2/12/2018,13:02:27,,2018-02-12 13:02:27
...,...,...,...,...
55338,7/25/2017,12:28:54,311.0,2017-07-25 12:28:54
55339,7/25/2017,12:23:54,311.0,2017-07-25 12:23:54
55340,7/25/2017,12:18:54,309.0,2017-07-25 12:18:54
55341,7/25/2017,12:13:54,310.0,2017-07-25 12:13:54


### Finding when the auto mode is turned on based on the earliest 'AUTO MODE ACTIVE PLGM OFF' alarm code

In [13]:
auto_mode_start_datetime = insulin_data[insulin_data['Alarm'] == 'AUTO MODE ACTIVE PLGM OFF']['DateTime'].min()

In [14]:
auto_mode_start_datetime

Timestamp('2017-08-09 08:07:13')

### Splitting the CGM data based on auto and manual modes

In [15]:
cgmAutoDf = cgm_data[cgm_data['DateTime'] >= auto_mode_start_datetime]

In [16]:
cgmManualDf = cgm_data[cgm_data['DateTime'] < auto_mode_start_datetime]

In [17]:
print(len(cgm_data))
print(len(cgmAutoDf))
print(len(cgmManualDf))

55343
51087
4256


### Dividing the CGM data into different timeframes - day time and night time

In [18]:
cgmAutoDayDf = cgmAutoDf[cgmAutoDf['DateTime'].dt.hour >= 6]

In [19]:
cgmAutoNightDf = cgmAutoDf[cgmAutoDf['DateTime'].dt.hour < 6]

In [20]:
cgmManualDayDf = cgmManualDf[cgmManualDf['DateTime'].dt.hour >= 6]

In [21]:
cgmManualNightDf = cgmManualDf[cgmManualDf['DateTime'].dt.hour < 6]

In [22]:
print(len(cgmAutoDayDf), len(cgmAutoNightDf), len(cgmManualDayDf), len(cgmManualNightDf))

38265 12822 3176 1080


### Handling NaN values

#### Removing the dates for which entries are not more than 70% of the expected count

In [23]:
def drop_dates_in_df(df, countCol, threshold = 0, expected_count = 288):
    groupedDataCount = df.groupby('Date').count()[countCol]
    keys_to_drop = list(groupedDataCount[(groupedDataCount / expected_count) < threshold].keys())
    result = df[~df['Date'].isin(keys_to_drop)]
    return result

In [24]:
threshold = 0 #not removing any data for now

In [25]:
#Whole day => 24 hours 
#Frequency of entries => every 5 minutes
#Entry count in 24 hours => Count of 5 minutes in a day => 288

In [26]:
autoModeDf = drop_dates_in_df(df = cgmAutoDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 288)

In [27]:
manualModeDf = drop_dates_in_df(df = cgmManualDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 288)

In [28]:
#Day Time => 6am to 11:59pm (18 hours)
#Frequency of entries => every 5 minutes
#Entry count per day in day time alone => Count of 5 minutes in 18 hours => 216

In [29]:
autoModeDayDf = drop_dates_in_df(df = cgmAutoDayDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 216)

In [30]:
manualModeDayDf = drop_dates_in_df(df = cgmManualDayDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 216)

In [31]:
#Night Time => 12am to 5:59am (6 hours)
#Frequency of entries => every 5 minutes
#Entry count per day in night time alone => Count of 5 minutes in 6 hours => 72

In [32]:
autoModeNightDf = drop_dates_in_df(df = cgmAutoNightDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 72)

In [33]:
manualModeNightDf = drop_dates_in_df(df = cgmManualNightDf, countCol = 'Sensor Glucose (mg/dL)', threshold = threshold, expected_count = 72)

In [34]:
print(len(autoModeDf), len(manualModeDf), len(autoModeDayDf), len(manualModeDayDf), len(autoModeNightDf), len(manualModeNightDf))

51087 4256 38265 3176 12822 1080


### Interpolation

In [35]:
"""
autoModeDf = autoModeDf.interpolate()
manualModeDf = manualModeDf.interpolate()
autoModeDayDf = autoModeDayDf.interpolate()
manualModeDayDf = manualModeDayDf.interpolate()
autoModeNightDf = autoModeNightDf.interpolate()
manualModeNightDf = manualModeNightDf.interpolate()
"""

'\nautoModeDf = autoModeDf.interpolate()\nmanualModeDf = manualModeDf.interpolate()\nautoModeDayDf = autoModeDayDf.interpolate()\nmanualModeDayDf = manualModeDayDf.interpolate()\nautoModeNightDf = autoModeNightDf.interpolate()\nmanualModeNightDf = manualModeNightDf.interpolate()\n'

### Metrics to be extracted:
a) Percentage time in hyperglycemia (CGM > 180 mg/dL),

b) percentage of time in hyperglycemia critical (CGM > 250 mg/dL),

c) percentage time in range (CGM >= 70 mg/dL and CGM <= 180 mg/dL),

d) percentage time in range secondary (CGM >= 70 mg/dL and CGM <= 150 mg/dL),

e) percentage time in hypoglycemia level 1 (CGM < 70 mg/dL), and

f) percentage time in hypoglycemia level 2 (CGM < 54 mg/dL).

Each of the above mentioned metrics are extracted in three different time intervals: daytime (6 am to
midnight), overnight (midnight to 6 am) and whole day (12 am to 12 am).

In [36]:
#df = autoModeDf
#df[(df['Sensor Glucose (mg/dL)'] >= 70) & (df['Sensor Glucose (mg/dL)'] <= 180)]

In [37]:
"""def get_percentage_of_entries_within_range(dataframe, columnName, interval = (None, None)):
    df = dataframe
    total_entries = len(df)
    range_entries = 0
    (minRange, maxRange) = interval
    if minRange is not None and maxRange is not None:
        range_entries = len(df[(df[columnName] >= minRange) & (df[columnName] <= maxRange)])
    elif minRange is not None:
        range_entries = len(df[df[columnName] > minRange])
    elif maxRange is not None:
        range_entries = len(df[df[columnName] < maxRange])
    return (range_entries/total_entries) * 100
    """

'def get_percentage_of_entries_within_range(dataframe, columnName, interval = (None, None)):\n    df = dataframe\n    total_entries = len(df)\n    range_entries = 0\n    (minRange, maxRange) = interval\n    if minRange is not None and maxRange is not None:\n        range_entries = len(df[(df[columnName] >= minRange) & (df[columnName] <= maxRange)])\n    elif minRange is not None:\n        range_entries = len(df[df[columnName] > minRange])\n    elif maxRange is not None:\n        range_entries = len(df[df[columnName] < maxRange])\n    return (range_entries/total_entries) * 100\n    '

In [38]:
#using count instead of len
def get_percentage_of_entries_within_range(dataframe, columnName, interval, expected_count_per_day):
    df = dataframe
    numDays = len(df['Date'].unique())
    outOf = numDays * expected_count_per_day
    range_entries = 0
    (minRange, maxRange) = interval
    if minRange is not None and maxRange is not None:
        range_entries = df[(df[columnName] >= minRange) & (df[columnName] <= maxRange)].count()[columnName]
    elif minRange is not None:
        range_entries = df[df[columnName] > minRange].count()[columnName]
    elif maxRange is not None:
        range_entries = df[df[columnName] < maxRange].count()[columnName]
    return (range_entries / (outOf * 1.0)) * 100

In [39]:
"""columnName = 'Sensor Glucose (mg/dL)'
manualModeDfList = [manualModeNightDf, manualModeDayDf, manualModeDf]
autoModeDfList = [autoModeNightDf, autoModeDayDf, autoModeDf] 
intervalList = [(180, None), (250, None), (70, 180), (70, 150), (None, 70), (None, 54)]

manualModeEntries = []
for df in manualModeDfList:
    for interval in intervalList:
        manualModeEntries.append(get_percentage_of_entries_within_range(df, columnName, interval))
autoModeEntries = []
for df in autoModeDfList:
    for interval in intervalList:
        autoModeEntries.append(get_percentage_of_entries_within_range(df, columnName, interval))
manualModeEntries.append(1.1)
autoModeEntries.append(1.1)"""

columnName = 'Sensor Glucose (mg/dL)'
#manualModeDfList = [(manualModeNightDf, 72), (manualModeDayDf, 216), (manualModeDf, 288)]
#autoModeDfList = [(autoModeNightDf, 72), (autoModeDayDf, 216), (autoModeDf, 288)] 

manualModeDfList = [(manualModeNightDf, 288), (manualModeDayDf, 288), (manualModeDf, 288)]
autoModeDfList = [(autoModeNightDf, 288), (autoModeDayDf, 288), (autoModeDf, 288)] 

intervalList = [(180, None), (250, None), (70, 180), (70, 150), (None, 70), (None, 54)]

manualModeEntries = []
for df,expected_count_per_day in manualModeDfList:
    for interval in intervalList:
        manualModeEntries.append(get_percentage_of_entries_within_range(df, columnName, interval, expected_count_per_day))
autoModeEntries = []
for df,expected_count_per_day in autoModeDfList:
    for interval in intervalList:
        autoModeEntries.append(get_percentage_of_entries_within_range(df, columnName, interval, expected_count_per_day))
manualModeEntries.append(1.1)
autoModeEntries.append(1.1)

In [40]:
result_df = pd.DataFrame([manualModeEntries, autoModeEntries])

In [41]:
result_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,4.837963,1.018519,18.240741,13.402778,0.671296,0.0,22.829861,8.463542,34.830729,27.105035,4.578993,1.801215,27.365451,9.418403,51.931424,39.670139,5.208333,1.801215,1.1
1,2.600265,0.405743,20.265684,17.681024,0.630072,0.152154,19.535108,4.560185,45.115741,33.800154,3.31983,0.983796,22.106481,4.96142,65.15625,51.284722,3.942901,1.134259,1.1


In [42]:
result_df.to_csv('Results.csv', index = False, header = False)