In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from past.builtins import xrange
import datetime
from datetime import timedelta
from pandas import DataFrame
from scipy.fftpack import fft, ifft,dct
import pywt
import scipy.stats as stats
from sklearn.metrics import classification_report, accuracy_score, make_scorer

import warnings
warnings.filterwarnings("ignore")

In [2]:
insulin_data1=pd.read_csv('InsulinData.csv',low_memory=False, parse_dates = [['Date', 'Time']])[['Date_Time', 'BWZ Carb Input (grams)']]
cgm_data1=pd.read_csv('CGMData.csv',low_memory=False,parse_dates = [['Date', 'Time']])

In [3]:
insulin_data2=pd.read_excel('InsulinAndMealIntake670GPatient3.xlsx', parse_dates = [['Date', 'Time']])[['Date_Time', 'BWZ Carb Input (grams)']]
cgm_data2=pd.read_excel('CGMData670GPatient3.xlsx',parse_dates = [['Date', 'Time']])

In [4]:
def trimData(insulin_data, cgm_data):
    min_insulin_time = min(insulin_data['Date_Time'])
    min_cgm_time = min(cgm_data['Date_Time'])
    min_timestamp = max(min_insulin_time, min_cgm_time)
    max_insulin_time = max(insulin_data['Date_Time'])
    max_cgm_time = max(cgm_data['Date_Time'])
    max_timestamp = min(max_insulin_time, max_cgm_time)
    insulin_trimmed = insulin_data[(insulin_data['Date_Time'] >= min_timestamp)]
    insulin_trimmed = insulin_trimmed.reset_index(drop=True)
    insulin_trimmed = insulin_data[(insulin_data['Date_Time'] <= max_timestamp)]
    insulin_trimmed = insulin_trimmed.reset_index(drop=True)
    cgm_trimmed = cgm_data[(cgm_data['Date_Time'] >= min_timestamp)]
    cgm_trimmed = cgm_trimmed.reset_index(drop=True)
    cgm_trimmed = cgm_data[(cgm_data['Date_Time'] <= max_timestamp)]
    cgm_trimmed = cgm_trimmed.reset_index(drop=True)
    return insulin_trimmed, cgm_trimmed

In [5]:
insulin_data1_trimmed, cgm_data1_trimmed = trimData(insulin_data1, cgm_data1)
insulin_data2_trimmed, cgm_data2_trimmed = trimData(insulin_data2, cgm_data2)

In [6]:
Bool_carbinput_data1=pd.notnull(insulin_data1_trimmed['BWZ Carb Input (grams)'])
Bool_carbinput_data2=pd.notnull(insulin_data2_trimmed['BWZ Carb Input (grams)'])
insuline_data_ext1=insulin_data1_trimmed[Bool_carbinput_data1]
insuline_data_ext2=insulin_data2_trimmed[Bool_carbinput_data2]
insuline_data_ext1 = insuline_data_ext1[insuline_data_ext1['BWZ Carb Input (grams)'] != 0].sort_values(by=['Date_Time'], ignore_index = True)
insuline_data_ext1 = insuline_data_ext1.reset_index(drop=True)
insuline_data_ext2 = insuline_data_ext2[insuline_data_ext2['BWZ Carb Input (grams)'] != 0].sort_values(by=['Date_Time'], ignore_index = True)
insuline_data_ext2 = insuline_data_ext2.reset_index(drop=True)

In [7]:
def extract_meal_cgm_timestamps(meal_dataframe):
    meal_data = []
    meal_len = len(meal_dataframe)-1
    for row in range(0,meal_len):
        new_time = meal_dataframe.at[row, 'Date_Time'] + timedelta(hours = 2)
        if(new_time > meal_dataframe.at[row+1, 'Date_Time']):
           continue
        else:
             meal_data.append(meal_dataframe.at[row, 'Date_Time'])
    meal_dataframe_fin = pd.DataFrame(meal_data)
    return meal_dataframe_fin      

In [8]:
# result = []
meal_data1 = extract_meal_cgm_timestamps(insuline_data_ext1)
meal_data2 = extract_meal_cgm_timestamps(insuline_data_ext2)
print(meal_data2)

                      0
0   2017-09-05 13:14:52
1   2017-09-05 19:32:23
2   2017-09-06 08:48:28
3   2017-09-06 13:22:26
4   2017-09-06 19:27:21
..                  ...
398 2018-03-04 18:38:57
399 2018-03-05 11:52:19
400 2018-03-05 17:19:29
401 2018-03-06 10:22:37
402 2018-03-06 12:40:57

[403 rows x 1 columns]


In [9]:
cgm_data1_trimmed['Sensor Glucose (mg/dL)'] = cgm_data1_trimmed['Sensor Glucose (mg/dL)'].interpolate(method = 'linear')
cgm_data2_trimmed['Sensor Glucose (mg/dL)'] = cgm_data2_trimmed['Sensor Glucose (mg/dL)'].interpolate(method = 'linear')

In [10]:
cgm_data1_trimmed = cgm_data1_trimmed[['Date_Time','Sensor Glucose (mg/dL)']]
cgm_data1_trimmed = cgm_data1_trimmed.reindex(index=cgm_data1_trimmed.index[::-1])
cgm_data1_trimmed = cgm_data1_trimmed.reset_index(drop=True)
cgm_data2_trimmed = cgm_data2_trimmed[['Date_Time','Sensor Glucose (mg/dL)']]
cgm_data2_trimmed = cgm_data2_trimmed.reindex(index=cgm_data2_trimmed.index[::-1])
cgm_data2_trimmed = cgm_data2_trimmed.reset_index(drop=True)

In [11]:
def extract_nomeal_cgm_timestamps(no_meal_dataframe):
    no_meal_data = []
    len_no_meal_dataframe = len(no_meal_dataframe)-1
    for row in range(len_no_meal_dataframe):
        new_time = no_meal_dataframe.loc[row, 0] + timedelta(hours = 4)
        if( new_time > no_meal_dataframe.at[row+1, 0]):
            continue
        else:
            two_hrs = timedelta(hours = 2) 
            st_time = no_meal_dataframe.at[row, 0] + two_hrs
            diff = st_time - no_meal_dataframe.at[row+1,0]
            if (diff.days>0):
                hours = int((diff.days*24*3600 + diff.seconds)//3600) 
            else:
                hours = int(diff.seconds//3600)
            for i in range(2,hours,2):
                if st_time + timedelta(hours = i) <= no_meal_dataframe.at[row+1, 0]: 
                    no_meal_data.append(st_time + timedelta(hours = i) )
                
           
    no_meal_data = pd.DataFrame(no_meal_data)
    return no_meal_data

In [12]:
nomeal_data1 = extract_nomeal_cgm_timestamps(meal_data1)
nomeal_data2 = extract_nomeal_cgm_timestamps(meal_data2)
print(nomeal_data2)

                      0
0   2017-09-05 17:14:52
1   2017-09-05 19:14:52
2   2017-09-05 23:32:23
3   2017-09-06 01:32:23
4   2017-09-06 03:32:23
..                  ...
916 2018-03-05 02:38:57
917 2018-03-05 15:52:19
918 2018-03-05 21:19:29
919 2018-03-05 23:19:29
920 2018-03-06 01:19:29

[921 rows x 1 columns]


In [13]:
def meal_cgm_extraction(meal_cgm_dataframe, cgmdata):
    list1 = ['cgm_val'+str(x) for x in range(30)]
    meal_data = pd.DataFrame(columns = list1)
    for id in meal_cgm_dataframe.index:
        dict1 = dict()
        data_sets = cgmdata[cgmdata['Date_Time'] >= meal_cgm_dataframe[0][id]]
        data_set_list = list(cgmdata.loc[data_sets.index[0]-6: data_sets.index[0]+23, 'Sensor Glucose (mg/dL)'].values)
        cgm_list=[]
        for id1, cgm_val in enumerate(data_set_list):
            cgm_list.append(cgm_val)
            for cgm in cgm_list:
              dict1[list1[id1]] = cgm_val
        meal_data = meal_data.append(dict1, ignore_index = True)
    return meal_data

In [14]:
cgm_meal1 = meal_cgm_extraction(meal_data1, cgm_data1_trimmed)
cgm_meal2 = meal_cgm_extraction(meal_data2, cgm_data2_trimmed)
print(cgm_meal1)

       cgm_val0    cgm_val1    cgm_val2  ...  cgm_val27  cgm_val28  cgm_val29
0    314.000000  310.000000  309.000000  ...        NaN        NaN        NaN
1     58.000000   59.000000   63.000000  ...      198.0      203.0      196.0
2    304.000000  292.000000  281.000000  ...      284.0      283.0      278.0
3     40.000000   40.000000   40.000000  ...       74.0       77.0       81.0
4    212.000000  210.000000  204.000000  ...      209.0      210.0      209.0
..          ...         ...         ...  ...        ...        ...        ...
598   57.000000   58.000000   62.000000  ...      191.0      197.0      202.0
599  209.000000  219.000000  221.000000  ...      212.0      215.0      208.0
600  169.176471  167.941176  166.705882  ...      123.0      106.0      107.0
601  106.000000  107.000000  100.000000  ...      200.0      194.0      177.0
602   89.000000   80.000000   69.000000  ...       92.0       93.0       86.0

[603 rows x 30 columns]


In [15]:
def nomeal_cgm_extraction(no_meal_cgm_dataframe, cgmdata):
    list1 = ['cgm_val'+str(x) for x in range(24)]
    no_meal_data = pd.DataFrame(columns = list1)
    #print(data)
    for id in no_meal_cgm_dataframe.index:
        dict1 = dict()
        data_sets = cgmdata[cgmdata['Date_Time'] >= no_meal_cgm_dataframe[0][id]]
        #print(temp1)
        data_set_list = list(cgmdata.loc[data_sets.index[0]-0: data_sets.index[0]+23, 'Sensor Glucose (mg/dL)'].values)
        cgm_list=[]
        #print(inputs)
        for id1, cgm_val in enumerate(data_set_list):
          cgm_list.append(cgm_val)
          for cgm in cgm_list:
            dict1[list1[id1]] = cgm
        no_meal_data = no_meal_data.append(dict1, ignore_index = True)
    return no_meal_data

In [16]:
cgm_nomeal1 = nomeal_cgm_extraction(nomeal_data1, cgm_data1_trimmed)
cgm_nomeal2 = nomeal_cgm_extraction(nomeal_data2, cgm_data2_trimmed)
print(cgm_nomeal2)

     cgm_val0  cgm_val1  cgm_val2  ...  cgm_val21  cgm_val22  cgm_val23
0        93.0      88.0      83.0  ...      127.0      131.0      132.0
1       130.0     128.0     127.0  ...      223.0      227.0      229.0
2       142.0     140.0     137.0  ...      106.0      110.0      129.0
3       130.0     126.0     120.0  ...      165.0      170.0      174.0
4       177.0     179.0     182.0  ...      202.0      199.0      194.0
..        ...       ...       ...  ...        ...        ...        ...
916     160.0     162.0     164.0  ...      158.0      157.0      155.0
917     153.0     154.0     155.0  ...      167.0      153.0      138.0
918     184.0     187.0     186.0  ...      155.0      151.0      147.0
919     142.0     138.0     135.0  ...      102.0      102.0      102.0
920     103.0     105.0     103.0  ...      112.0      111.0      111.0

[921 rows x 24 columns]


In [17]:
meal_cgm_master = pd.concat([cgm_meal1,cgm_meal2]).reset_index(drop=True)
no_meal_cgm_master = pd.concat([cgm_nomeal1,cgm_nomeal2]).reset_index(drop=True)
meal_cgm_master_list = meal_cgm_master.values.tolist()
no_meal_cgm_master_list = no_meal_cgm_master.values.tolist()

In [18]:
meal_cgm_master

Unnamed: 0,cgm_val0,cgm_val1,cgm_val2,cgm_val3,cgm_val4,cgm_val5,cgm_val6,cgm_val7,cgm_val8,cgm_val9,cgm_val10,cgm_val11,cgm_val12,cgm_val13,cgm_val14,cgm_val15,cgm_val16,cgm_val17,cgm_val18,cgm_val19,cgm_val20,cgm_val21,cgm_val22,cgm_val23,cgm_val24,cgm_val25,cgm_val26,cgm_val27,cgm_val28,cgm_val29
0,314.0,310.0,309.0,311.0,311.0,311.0,312.0,312.0,316.0,324.0,319.0,315.0,314.0,316.0,314.0,310.0,298.0,293.0,279.0,278.0,281.0,289.0,294.0,290.0,,,,,,
1,58.0,59.0,63.0,71.0,81.0,102.0,131.0,140.0,147.0,153.0,159.0,163.0,159.0,153.0,151.0,155.0,163.0,164.0,165.0,166.0,168.0,164.0,169.0,178.0,184.0,190.0,195.0,198.0,203.0,196.0
2,304.0,292.0,281.0,268.0,259.0,255.0,248.0,241.0,231.0,220.0,222.0,222.0,222.0,223.0,224.0,228.0,233.0,243.0,256.0,262.0,270.0,277.0,274.0,269.0,267.0,267.0,274.0,284.0,283.0,278.0
3,40.0,40.0,40.0,40.0,60.0,71.0,83.0,87.0,100.0,112.0,121.0,125.0,130.0,132.0,123.0,112.0,89.0,77.0,69.0,66.0,67.0,71.0,75.0,74.0,72.0,70.0,67.0,74.0,77.0,81.0
4,212.0,210.0,204.0,200.0,199.0,201.0,201.0,194.0,188.0,183.0,181.0,176.0,170.0,168.0,169.0,172.0,179.0,192.0,203.0,205.0,210.0,213.0,212.0,216.0,213.0,210.0,210.0,209.0,210.0,209.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,190.0,189.0,187.0,181.0,176.0,178.0,183.0,185.0,186.0,188.0,198.0,207.0,216.0,218.0,215.0,210.0,206.0,200.0,198.0,196.0,190.0,173.0,162.0,154.0,148.0,144.0,141.0,148.0,154.0,153.0
1002,146.0,143.0,140.0,135.0,123.0,113.0,109.0,102.0,103.0,117.0,150.0,164.0,181.0,199.0,214.0,220.0,225.0,231.0,239.0,236.0,240.0,248.0,249.0,247.0,243.0,237.0,226.0,211.0,202.0,192.0
1003,201.0,198.0,198.0,200.0,199.0,200.0,197.0,187.0,175.0,172.0,167.0,153.0,138.0,125.0,117.0,109.0,102.0,107.0,116.0,127.0,132.0,131.0,123.0,103.0,88.0,78.0,76.0,84.0,96.0,109.0
1004,159.0,158.0,152.0,153.0,152.0,150.0,147.0,143.0,134.0,132.0,135.0,142.0,147.0,149.0,151.0,153.0,150.0,142.0,139.0,138.0,144.0,149.0,150.0,149.0,147.0,147.0,153.0,164.0,172.0,179.0


In [19]:
no_meal_cgm_master

Unnamed: 0,cgm_val0,cgm_val1,cgm_val2,cgm_val3,cgm_val4,cgm_val5,cgm_val6,cgm_val7,cgm_val8,cgm_val9,cgm_val10,cgm_val11,cgm_val12,cgm_val13,cgm_val14,cgm_val15,cgm_val16,cgm_val17,cgm_val18,cgm_val19,cgm_val20,cgm_val21,cgm_val22,cgm_val23
0,160.0,145.0,134.0,129.0,120.0,113.0,109.0,106.0,100.0,95.0,89.0,87.0,85.0,82.0,77.0,73.0,70.0,65.0,62.0,59.0,58.0,63.0,71.0,81.0
1,86.0,81.0,75.0,71.0,64.0,57.0,50.0,45.0,44.0,43.0,45.0,46.0,47.0,48.0,51.0,54.0,58.0,59.0,63.0,71.0,81.0,102.0,131.0,140.0
2,64.0,67.0,69.0,67.0,63.0,60.0,55.0,53.0,53.0,54.0,57.0,59.0,62.0,64.0,65.0,64.0,64.0,65.0,65.0,64.0,64.0,67.0,72.0,78.0
3,86.0,89.0,90.0,89.0,89.0,90.0,91.0,90.0,87.0,83.0,81.0,87.0,92.0,95.0,95.0,95.0,92.0,92.0,93.0,91.0,89.0,92.0,99.0,106.0
4,114.0,115.0,115.0,124.0,127.0,124.0,120.0,115.0,113.0,113.0,112.0,111.0,113.0,115.0,118.0,120.0,120.0,119.0,118.0,117.0,117.0,118.0,119.0,119.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,160.0,162.0,164.0,165.0,166.0,168.0,170.0,171.0,172.0,174.0,175.0,175.0,175.0,174.0,172.0,166.0,161.0,157.0,155.0,156.0,158.0,158.0,157.0,155.0
2017,153.0,154.0,155.0,161.0,167.0,175.0,185.0,193.0,199.0,203.0,203.0,201.0,198.0,198.0,200.0,199.0,200.0,197.0,187.0,175.0,172.0,167.0,153.0,138.0
2018,184.0,187.0,186.0,182.0,174.0,167.0,167.0,166.0,164.0,165.0,165.0,165.0,165.0,163.0,163.0,164.0,162.0,162.0,162.0,160.0,158.0,155.0,151.0,147.0
2019,142.0,138.0,135.0,133.0,131.0,129.0,127.0,124.0,122.0,120.0,117.0,114.0,113.0,113.0,111.0,109.0,107.0,105.0,104.0,103.0,103.0,102.0,102.0,102.0


In [20]:
def diff_max_min_cgm(meal_list,row):
  max_cgm_val = max(meal_list[row])
  min_cgm_val = min(meal_list[row])
  result = max_cgm_val - min_cgm_val
  return (result)

difference_list_meal = []
for row in range (len(meal_cgm_master_list)):
  difference = diff_max_min_cgm(meal_cgm_master_list,row)
  difference_list_meal.append(difference)
difference_df_meal = pd.DataFrame(difference_list_meal) 
difference_df_meal

difference_list_no_meal = []
for row in range (len(no_meal_cgm_master_list)):
  difference = diff_max_min_cgm(no_meal_cgm_master_list,row)
  difference_list_no_meal.append(difference)
difference_df_no_meal = pd.DataFrame(difference_list_no_meal) 
difference_df_no_meal



Unnamed: 0,0
0,102.0
1,97.0
2,25.0
3,25.0
4,16.0
...,...
2016,20.0
2017,65.0
2018,40.0
2019,40.0


In [21]:
def coeff_of_var(meal_data,row):
  mean = np.mean(meal_data[row])
  std_dev = np.std(meal_data[row])
  result = mean/std_dev
  return (result)

coeff_of_var_meal_list = []
for row in range(len(meal_cgm_master_list)):
    coeff_val = coeff_of_var(meal_cgm_master_list,row)
    coeff_of_var_meal_list.append(coeff_val)
coeff_of_var_df_meal = pd.DataFrame(coeff_of_var_meal_list) 
coeff_of_var_df_meal 

coeff_of_var_no_meal_list = []
for row in range(len(no_meal_cgm_master_list)):
    coeff_val = coeff_of_var(no_meal_cgm_master_list,row)
    coeff_of_var_no_meal_list.append(coeff_val)
coeff_of_var_df_no_meal = pd.DataFrame(coeff_of_var_no_meal_list) 
coeff_of_var_df_no_meal 

Unnamed: 0,0
0,3.319658
1,2.621308
2,10.834981
3,18.424073
4,29.829437
...,...
2016,23.482671
2017,9.060195
2018,16.697689
2019,9.304049


In [22]:
def discrete_wavelet_trans(meal_data,row):
  (cA,cD) = pywt.dwt(meal_data[row], 'db1')
  cA = cA[::-1][0:8]
  return (cA,cD)

x_meal=[]
for row in range(len(meal_cgm_master_list)):
  x1,y1 = discrete_wavelet_trans(meal_cgm_master_list,row)
  x_meal.append(list(x1))
  discrete_wavelet_df_meal = pd.DataFrame(x_meal)  
discrete_wavelet_df_meal 

x_no_meal=[]
for row in range(len(no_meal_cgm_master_list)):
  x1,y1 = discrete_wavelet_trans(no_meal_cgm_master_list,row)
  x_no_meal.append(list(x1))
  discrete_wavelet_df_no_meal = pd.DataFrame(x_no_meal)  
discrete_wavelet_df_no_meal 

Unnamed: 0,0,1,2,3,4,5,6,7
0,107.480231,85.559921,85.559921,95.459415,106.066017,118.086832,124.450793,137.885822
1,191.625938,129.400541,94.752309,82.731493,74.246212,67.175144,64.346717,61.518290
2,106.066017,92.630988,91.216775,91.216775,91.216775,89.095454,82.024387,75.660426
3,144.956890,127.986327,130.107648,130.107648,134.350288,132.228968,118.793939,120.208153
4,168.291414,166.170094,166.170094,168.998521,168.291414,161.220346,157.684812,159.806133
...,...,...,...,...,...,...,...,...
2016,220.617316,223.445743,219.910209,224.859956,239.002092,246.780267,247.487373,244.658946
2017,205.768073,239.709199,255.972655,280.721392,282.135606,280.014285,285.671140,284.256926
2018,210.717821,221.324423,227.688384,229.102597,231.223917,231.931024,233.345238,232.638131
2019,144.249783,144.956890,146.371104,149.906638,155.563492,159.806133,163.341666,171.119841


In [23]:
def windowed_mean(meal_list,row):
  w_size = 5
  window_averages =[]
  i=0
  while i < len(meal_list[row]) - w_size +1: 
    nw_size = meal_list[row][i : i + w_size]
    w_average = sum(nw_size)/w_size
    window_averages.append(w_average)
    i+=w_size
  # print(len(window_averages))
  return (window_averages[1:5]) 

windowed_average_list_meal = []
for row in range (len(meal_cgm_master_list)):
  average_list = windowed_mean(meal_cgm_master_list,row)
  windowed_average_list_meal.append(average_list)
windowed_average_list_df_meal = pd.DataFrame(windowed_average_list_meal)
windowed_average_list_df_meal

windowed_average_list_no_meal = []
for row in range (len(no_meal_cgm_master_list)):
  average_list = windowed_mean(no_meal_cgm_master_list,row)
  windowed_average_list_no_meal.append(average_list)
# print(windowed_average_list_no_meal)
windowed_average_list_df_no_meal = pd.DataFrame(windowed_average_list_no_meal)
windowed_average_list_df_meal



Unnamed: 0,0,1,2,3
0,315.0,315.6,291.6,
1,134.6,157.0,162.6,172.6
2,239.0,222.6,244.4,271.4
3,90.6,126.2,82.6,71.8
4,193.4,172.8,190.2,212.8
...,...,...,...,...
1001,184.0,210.8,202.0,165.4
1002,108.8,181.6,230.2,245.4
1003,186.2,140.0,112.2,115.4
1004,141.2,144.8,144.4,147.8


In [24]:
def calc_peaks_fouriertrans(meal_list,row):
    fouriertrans = list()
    peak_feat = list()
    arr = np.array(meal_list[row])
    # print(fft(arr))
    fouriertrans.append(abs(fft(arr)))
    for val in range(len(fouriertrans)):
      sets = set(fouriertrans[val])  
      set_list = list(sets)
      set_list.sort()
      set_list = set_list[::-1][0:8]
      # print(set_list)
      peak_feat+=set_list
    # print(peak_feat)
    return (fouriertrans,peak_feat)  


x_meal=[]
y_meal=[]
for row in range(len(meal_cgm_master_list)):
  x1,y1 = calc_peaks_fouriertrans(meal_cgm_master_list,row)
  x_meal.append(x1)
  y_meal.append(list(y1))
# print(len(y_meal))
peak_valdf_meal = pd.DataFrame(y_meal)
peak_valdf_meal

x_no_meal=[]
y_no_meal=[]
for row in range(len(no_meal_cgm_master_list)):
  x1,y1 = calc_peaks_fouriertrans(no_meal_cgm_master_list,row)
  x_no_meal.append(x1)
  y_no_meal.append(y1)
# print(y_no_meal)
peak_valdf_no_meal = pd.DataFrame(y_no_meal)
peak_valdf_no_meal

Unnamed: 0,0,1,2,3,4,5,6,7
0,2233.0,383.612103,200.266111,114.865785,86.365502,58.180753,55.484858,52.943366
1,1612.0,367.509787,152.193452,103.143103,93.471921,62.677642,50.931326,41.805321
2,1515.0,77.954818,44.953363,19.924844,17.058722,15.264338,13.076697,12.155566
3,2183.0,47.244705,42.391123,29.279056,22.022716,18.756088,17.003091,15.000000
4,2816.0,46.384040,26.000000,25.868738,25.783402,13.058512,5.961899,5.587143
...,...,...,...,...,...,...,...,...
2016,3966.0,113.601913,27.687309,15.509716,14.525839,7.940799,5.099020,4.000000
2017,4333.0,323.344680,77.155831,29.586852,27.658633,25.338433,24.952746,19.078784
2018,3984.0,92.077790,90.229890,74.737209,47.623524,32.825413,27.712813,24.863800
2019,2806.0,172.145768,81.445509,48.198513,42.508823,34.473751,31.112698,25.119713


In [25]:
 #def rank1(meal_cgm_master_list):
def calc_rank_row(meal_data,row):
  rank = [0 for x in range(len(meal_data[row]))]
  for j in range (len(meal_data[row])):
    (r,s)=(1,1)
    for k in range (len(meal_data[row])):
      if k != j and meal_data[row][k] < meal_data[row][j]:
        r += 1
      if k != j and meal_data[row][k] == meal_data[row][j]:
        s += 1       
    rank[j] = r + (s - 1) / 2
  return (rank) 

rank_list_meal=[]
for row in range(len(meal_cgm_master_list)):
    ranks_list_meal = calc_rank_row(meal_cgm_master_list,row)
    rank_list_meal.append(ranks_list_meal)

ranks_list_df_meal = pd.DataFrame(rank_list_meal)
ranks_list_df_meal = ranks_list_df_meal.iloc[:,:24]

rank_list_no_meal=[]
for row in range(len(no_meal_cgm_master_list)):
  ranks_list_no_meal = calc_rank_row(no_meal_cgm_master_list,row)
  rank_list_no_meal.append(ranks_list_no_meal)

ranks_list_df_no_meal = pd.DataFrame(rank_list_no_meal)
ranks_list_df_no_meal = ranks_list_df_no_meal.iloc[:,:24]

  

In [26]:
ranks_list_df_no_meal

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
0,24.0,23.0,22.0,21.0,20.0,19.0,18.0,17.0,16.0,15.0,14.0,13.0,12.0,11.0,9.0,8.0,6.0,5.0,3.0,2.0,1.0,4.0,7.0,10.0
1,21.0,19.5,18.0,16.5,15.0,11.0,8.0,3.5,2.0,1.0,3.5,5.0,6.0,7.0,9.0,10.0,12.0,13.0,14.0,16.5,19.5,22.0,23.0,24.0
2,12.5,20.0,22.0,20.0,9.0,7.0,4.0,1.5,1.5,3.0,5.0,6.0,8.0,12.5,17.0,12.5,12.5,17.0,17.0,12.5,12.5,20.0,23.0,24.0
3,3.0,7.5,11.0,7.5,7.5,11.0,13.5,11.0,4.5,2.0,1.0,4.5,16.5,21.0,21.0,21.0,16.5,16.5,19.0,13.5,7.5,16.5,23.0,24.0
4,6.0,8.5,8.5,22.5,24.0,22.5,20.0,8.5,4.0,4.0,2.0,1.0,4.0,8.5,14.0,20.0,20.0,17.0,14.0,11.5,11.5,14.0,17.0,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,8.0,10.0,11.0,12.0,13.5,15.0,16.0,17.0,18.5,20.5,23.0,23.0,23.0,20.5,18.5,13.5,9.0,4.5,1.5,3.0,6.5,6.5,4.5,1.5
2017,2.5,4.0,5.0,6.0,7.5,10.5,12.0,14.0,18.5,23.5,23.5,22.0,16.5,16.5,20.5,18.5,20.5,15.0,13.0,10.5,9.0,7.5,2.5,1.0
2018,22.0,24.0,23.0,21.0,20.0,18.5,18.5,17.0,11.5,14.5,14.5,14.5,14.5,9.5,9.5,11.5,7.0,7.0,7.0,5.0,4.0,3.0,2.0,1.0
2019,24.0,23.0,22.0,21.0,20.0,19.0,18.0,17.0,16.0,15.0,14.0,13.0,11.5,11.5,10.0,9.0,8.0,7.0,6.0,4.5,4.5,2.0,2.0,2.0


In [27]:
def z_score_feat(meal_list,row):
  z_score_arr = np.array(meal_list[row])
  z_score_list = stats.zscore(z_score_arr)
  return (z_score_list)

z_scores_meal=[]
for row in range (len(meal_cgm_master_list)):
  z_score_res_list = z_score_feat(meal_cgm_master_list,row)
  z_scores_meal.append(z_score_res_list)
z_score_res_list_df_meal = pd.DataFrame(z_scores_meal)
z_score_res_list_df_meal = z_score_res_list_df_meal.iloc[:,:24]

z_scores_no_meal=[]
for row in range (len(no_meal_cgm_master_list)):
  z_score_res_list = z_score_feat(no_meal_cgm_master_list,row)
  z_scores_no_meal.append(z_score_res_list)
z_score_res_list_df_no_meal = pd.DataFrame(z_scores_no_meal)
z_score_res_list_df_no_meal = z_score_res_list_df_no_meal.iloc[:,:24]


In [28]:
z_score_res_list_df_no_meal 


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
0,2.389024,1.853835,1.461363,1.282967,0.961853,0.712099,0.569382,0.462344,0.248268,0.069872,-0.144204,-0.215562,-0.286921,-0.393959,-0.572355,-0.715072,-0.822110,-1.000506,-1.107544,-1.214582,-1.250261,-1.071865,-0.786430,-0.429638
1,0.735007,0.539872,0.305711,0.149603,-0.123585,-0.396774,-0.669962,-0.865097,-0.904124,-0.943151,-0.865097,-0.826070,-0.787043,-0.748016,-0.630935,-0.513854,-0.357747,-0.318720,-0.162612,0.149603,0.539872,1.359438,2.491218,2.842461
2,0.150188,0.665118,1.008404,0.665118,-0.021455,-0.536385,-1.394601,-1.737888,-1.737888,-1.566245,-1.051315,-0.708028,-0.193099,0.150188,0.321831,0.150188,0.150188,0.321831,0.321831,0.150188,0.150188,0.665118,1.523334,2.553193
3,-1.004336,-0.396670,-0.194115,-0.396670,-0.396670,-0.194115,0.008440,-0.194115,-0.801781,-1.612001,-2.017111,-0.801781,0.210995,0.818660,0.818660,0.818660,0.210995,0.210995,0.413550,0.008440,-0.396670,0.210995,1.628880,3.046766
4,-0.847427,-0.593199,-0.593199,1.694854,2.457539,1.694854,0.677942,-0.593199,-1.101655,-1.101655,-1.355884,-1.610112,-1.101655,-0.593199,0.169485,0.677942,0.677942,0.423714,0.169485,-0.084743,-0.084743,0.169485,0.423714,0.423714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,-0.746046,-0.461838,-0.177630,-0.035526,0.106578,0.390786,0.674994,0.817097,0.959201,1.243409,1.385513,1.385513,1.385513,1.243409,0.959201,0.106578,-0.603942,-1.172357,-1.456565,-1.314461,-1.030253,-1.030253,-1.172357,-1.456565
2017,-1.382135,-1.331951,-1.281768,-0.980667,-0.679567,-0.278100,0.223734,0.625202,0.926302,1.127036,1.127036,1.026669,0.876119,0.876119,0.976485,0.926302,0.976485,0.825935,0.324101,-0.278100,-0.428650,-0.679567,-1.382135,-2.134886
2018,1.810593,2.112358,2.011770,1.609416,0.804708,0.100588,0.100588,0.000000,-0.201177,-0.100588,-0.100588,-0.100588,-0.100588,-0.301765,-0.301765,-0.201177,-0.402354,-0.402354,-0.402354,-0.603531,-0.804708,-1.106473,-1.508827,-1.911181
2019,1.996093,1.677779,1.439044,1.279887,1.120730,0.961573,0.802416,0.563681,0.404524,0.245367,0.006632,-0.232104,-0.311682,-0.311682,-0.470839,-0.629996,-0.789153,-0.948310,-1.027889,-1.107467,-1.107467,-1.187045,-1.187045,-1.187045


In [30]:
meal_data = pd.concat([difference_df_meal,coeff_of_var_df_meal,discrete_wavelet_df_meal,windowed_average_list_df_meal,peak_valdf_meal,ranks_list_df_meal,z_score_res_list_df_meal])
no_meal_data = pd.concat([difference_df_no_meal,coeff_of_var_df_no_meal,discrete_wavelet_df_no_meal,windowed_average_list_df_no_meal,peak_valdf_no_meal,ranks_list_df_no_meal,z_score_res_list_df_no_meal])
meal_data['Class_label']=1
no_meal_data['Class_label']=0


In [31]:
from sklearn.utils import shuffle
dataset = shuffle(pd.concat([meal_data,no_meal_data]).fillna(0)).reset_index().drop(columns = ['index'])
dataset1 = dataset.drop(columns = 'Class_label')
print(dataset1)

                0           1           2           3   ...    20    21    22    23
0      6652.000000  582.535792  193.411766  108.010505  ...   0.0   0.0   0.0   0.0
1         8.144479    0.000000    0.000000    0.000000  ...   0.0   0.0   0.0   0.0
2       231.223917  210.010714  207.889394  205.768073  ...   0.0   0.0   0.0   0.0
3       152.735065  154.149278  154.149278  152.735065  ...   0.0   0.0   0.0   0.0
4        16.500000    9.000000    6.500000    9.000000  ...  21.5  21.5  23.0  24.0
...            ...         ...         ...         ...  ...   ...   ...   ...   ...
21184    82.731493   94.752309  115.965512  141.421356  ...   0.0   0.0   0.0   0.0
21185   265.644444  243.000000  177.200000  148.000000  ...   0.0   0.0   0.0   0.0
21186     6.427781    0.000000    0.000000    0.000000  ...   0.0   0.0   0.0   0.0
21187   161.800000  167.800000  145.200000  157.200000  ...   0.0   0.0   0.0   0.0
21188   180.000000  167.600000  157.400000    0.000000  ...   0.0   0.0   0.

In [32]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy import stats

feat_dataset = StandardScaler().fit_transform(dataset1)

pca = PCA(n_components= 15)
pca_component_extract = pca.fit_transform(feat_dataset)
pca_dataframe = pd.DataFrame(data = pca_component_extract, columns = ['PC 1', 'PC 2','PC 3','PC 4','PC 5','PC 6','PC 7','PC 8','PC 9','PC 10','PC 11','PC 12','PC 13','PC 14','PC 15'])

In [33]:
pca_dataframe['Class_label'] = dataset['Class_label']
pca_dataset = pca_dataframe.drop(columns = ['Class_label'])

def score_calc(model, x_train,x_test,y_train,y_test):
    model = model
    model.fit(x_train,y_train)
    #print(classification_report(y_train, y_test))
    return model.score(x_test,y_test)

scores_list = []
from sklearn.svm import SVC
from sklearn.model_selection import KFold
k_fold = KFold(n_splits=10)
for train_index, test_index in k_fold.split(pca_dataset):
    print("TRAIN:", train_index, "TEST:", test_index)
    
    x_train = pca_dataset.loc[train_index]
    x_test = pca_dataset.loc[test_index]
    y_train = pca_dataframe['Class_label'].loc[train_index]
    y_test = pca_dataframe['Class_label'].loc[test_index]
    pca_dataframe.Class_label.loc[train_index],pca_dataframe.Class_label.loc[test_index]
    
    scores = score_calc(SVC(), x_train, x_test, y_train, y_test)
    scores_list.append(scores)
    print(scores_list)

print(np.mean(scores_list))


TRAIN: [ 2119  2120  2121 ... 21186 21187 21188] TEST: [   0    1    2 ... 2116 2117 2118]
[0.7781972628598396]
TRAIN: [    0     1     2 ... 21186 21187 21188] TEST: [2119 2120 2121 ... 4235 4236 4237]
[0.7781972628598396, 0.7678150070788108]
TRAIN: [    0     1     2 ... 21186 21187 21188] TEST: [4238 4239 4240 ... 6354 6355 6356]
[0.7781972628598396, 0.7678150070788108, 0.7663992449268523]
TRAIN: [    0     1     2 ... 21186 21187 21188] TEST: [6357 6358 6359 ... 8473 8474 8475]
[0.7781972628598396, 0.7678150070788108, 0.7663992449268523, 0.7678150070788108]
TRAIN: [    0     1     2 ... 21186 21187 21188] TEST: [ 8476  8477  8478 ... 10592 10593 10594]
[0.7781972628598396, 0.7678150070788108, 0.7663992449268523, 0.7678150070788108, 0.7852760736196319]
TRAIN: [    0     1     2 ... 21186 21187 21188] TEST: [10595 10596 10597 ... 12711 12712 12713]
[0.7781972628598396, 0.7678150070788108, 0.7663992449268523, 0.7678150070788108, 0.7852760736196319, 0.7616800377536573]
TRAIN: [    0   

In [34]:
from sklearn.svm import SVC
SVM_Classifier = SVC()
x_axis = pca_dataset 
y_axis = pca_dataframe['Class_label']
SVM_Classifier.fit(x_axis,y_axis)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [35]:
predicted_labels = SVM_Classifier.predict(x_axis)

In [36]:
from sklearn.metrics import classification_report
print(classification_report(y_axis, predicted_labels))

              precision    recall  f1-score   support

           0       0.75      0.99      0.85     14147
           1       0.93      0.33      0.49      7042

    accuracy                           0.77     21189
   macro avg       0.84      0.66      0.67     21189
weighted avg       0.81      0.77      0.73     21189



In [37]:
# Using reference : https://scikit-learn.org/stable/modules/model_persistence.html
from joblib import dump, load
dump(SVM_Classifier, 'SVM_Classifier.pickle') 

['SVM_Classifier.pickle']