In [1]:
#Import packages

import pandas as pd
import numpy as np
from datetime import timedelta
from scipy.fftpack import fft, ifft,rfft
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, RepeatedKFold
from sklearn.tree import DecisionTreeClassifier
from joblib import dump, load

In [2]:
#Read the necessary files

insulin_data_1 = pd.read_csv("Project-2-Files/InsulinData.csv",low_memory = False)
insulin_data_2 = pd.read_csv("Project-2-Files/Insulin_patient2.csv",low_memory = False)
cgm_data_1 = pd.read_csv("Project-2-Files/CGMData.csv",low_memory = False)
cgm_data_2 = pd.read_csv("Project-2-Files/CGM_patient2.csv",low_memory = False)

#Create a column for timestamp

insulin_data_1['TimeStamp'] = pd.to_datetime(insulin_data_1[["Date", "Time"]].apply(" ".join, axis=1))
insulin_data_2['TimeStamp'] = pd.to_datetime(insulin_data_2[["Date", "Time"]].apply(" ".join, axis=1))
cgm_data_1['TimeStamp'] = pd.to_datetime(cgm_data_1[["Date", "Time"]].apply(" ".join, axis=1))
cgm_data_2['TimeStamp'] = pd.to_datetime(cgm_data_2[["Date", "Time"]].apply(" ".join, axis=1))

#Sort by Timestamp

insulin_data_1 = insulin_data_1.sort_values(by =['TimeStamp'],ascending=True)
insulin_data_2 = insulin_data_2.sort_values(by =['TimeStamp'],ascending=True)
cgm_data_1 = cgm_data_1.sort_values(by =['TimeStamp'],ascending=True)
cgm_data_2 = cgm_data_2.sort_values(by =['TimeStamp'],ascending=True)


In [3]:
#Extract Meal Data Function
def extract_meal_data(ins_data,cgm_data,date_format):
    ins_data_copy = ins_data.loc[ins_data['BWZ Carb Input (grams)']>0]
    #ins_data_copy = ins_data_copy['BWZ Carb Input (grams)'].replace(0.0,np.nan,inplace=True)
    #ins_data_copy = ins_data_copy.where(lambda x:x==0).dropna()
    validTimeStamp = []
    minutes = 0
    time_diff = 0
    #Get valid TimeStamps from Insulin Data
    for i in range(1,len(ins_data_copy)-1):
        time_diff = ins_data_copy.iloc[i+1]['TimeStamp']-ins_data_copy.iloc[i]['TimeStamp']       
        minutes = (time_diff.total_seconds())/60
        if minutes >= 120:
            validTimeStamp.append(ins_data_copy.iloc[i]['TimeStamp'])
        
    #Get rows from CGM Data
    list_meal = []
    e_list=[]
    s_list=[]
    if date_format == 0:
        for j in validTimeStamp: 
            start = pd.to_datetime(j - timedelta(minutes=30))
            end = pd.to_datetime(j + timedelta(minutes=120))
            date = j.date().strftime('%-m/%-d/%Y')
            list_meal.append(cgm_data.loc[cgm_data['Date']== date].set_index('TimeStamp').between_time(start_time = start.strftime('%-H:%-M:%S'),end_time = end.strftime('%-H:%-M:%S'))['Sensor Glucose (mg/dL)'].values.tolist())
        return validTimeStamp,pd.DataFrame(list_meal)
    if date_format == 2:
        for j in validTimeStamp: 
            start = pd.to_datetime(j - timedelta(minutes=30))
            end = pd.to_datetime(j + timedelta(minutes=120))
            date = j.date().strftime('%Y-%m-%d')
            list_meal.append(cgm_data.loc[cgm_data['Date']== date].set_index('TimeStamp').between_time(start_time = start.strftime('%-H:%-M:%S'),end_time = end.strftime('%-H:%-M:%S'))['Sensor Glucose (mg/dL)'].values.tolist())
            s_list.append(start)
            e_list.append(end)
        return validTimeStamp,pd.DataFrame(list_meal)
    
    

In [4]:
#Extract Meal Data
validtime1,meal_data_1 = extract_meal_data(insulin_data_1,cgm_data_1,0)
validtime2,meal_data_2= extract_meal_data(insulin_data_2,cgm_data_2,2)
meal_data_1 = meal_data_1.iloc[:,0:24]
meal_data_2 = meal_data_2.iloc[:,0:24]
#meal_data_2.drop(columns=30)
#print(validtime2[185],meal_data_2[185],j,date)
#print(pd.to_datetime((j-timedelta(minutes=30))))


In [5]:
#Cleaning the meal Data - 859 rows
index1 = meal_data_1.isna().sum(axis=1).where(lambda x:x>2).dropna().index 
meal_data_final_1 = meal_data_1.drop(meal_data_1.index[index1])
index2 = meal_data_2.isna().sum(axis=1).where(lambda x:x>2).dropna().index 
meal_data_final_2 = meal_data_2.drop(meal_data_2.index[index2])
meal_data_final_1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
1,58.0,59.0,63.0,71.0,81.0,102.0,131.0,140.0,147.0,153.0,...,151.0,155.0,163.0,164.0,,166.0,168.0,164.0,169.0,178.0
2,304.0,292.0,281.0,268.0,259.0,255.0,248.0,241.0,231.0,220.0,...,224.0,228.0,233.0,243.0,256.0,262.0,270.0,277.0,274.0,269.0
3,40.0,40.0,40.0,40.0,60.0,71.0,83.0,87.0,100.0,112.0,...,123.0,112.0,89.0,77.0,69.0,66.0,67.0,71.0,75.0,74.0
4,212.0,210.0,204.0,200.0,199.0,201.0,201.0,194.0,188.0,183.0,...,169.0,172.0,179.0,192.0,203.0,205.0,210.0,213.0,212.0,216.0
5,145.0,141.0,137.0,133.0,129.0,124.0,125.0,122.0,123.0,130.0,...,192.0,203.0,212.0,211.0,213.0,204.0,200.0,196.0,189.0,183.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,61.0,59.0,59.0,60.0,65.0,70.0,82.0,100.0,126.0,133.0,...,108.0,106.0,110.0,116.0,127.0,140.0,146.0,153.0,158.0,162.0
598,57.0,58.0,62.0,74.0,104.0,110.0,126.0,133.0,135.0,141.0,...,147.0,136.0,135.0,136.0,142.0,168.0,168.0,174.0,207.0,209.0
599,209.0,219.0,221.0,221.0,216.0,217.0,215.0,211.0,201.0,200.0,...,220.0,207.0,205.0,194.0,186.0,177.0,180.0,184.0,180.0,190.0
601,106.0,107.0,100.0,104.0,123.0,132.0,129.0,127.0,120.0,132.0,...,178.0,182.0,180.0,168.0,166.0,167.0,169.0,176.0,173.0,166.0


In [6]:
meal_data_final_2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,131.0,132.0,130.0,128.0,127.0,128.0,129.0,126.0,122.0,120.0,...,145.0,156.0,162.0,170.0,180.0,191.0,201.0,212.0,219.0,223.0
1,165.0,163.0,160.0,157.0,161.0,177.0,185.0,187.0,192.0,190.0,...,163.0,148.0,136.0,127.0,122.0,130.0,140.0,150.0,156.0,157.0
2,103.0,108.0,117.0,123.0,128.0,132.0,137.0,141.0,142.0,142.0,...,192.0,186.0,176.0,160.0,161.0,168.0,180.0,206.0,209.0,207.0
3,168.0,163.0,157.0,161.0,168.0,185.0,190.0,196.0,202.0,211.0,...,230.0,224.0,220.0,217.0,208.0,197.0,197.0,203.0,201.0,193.0
4,127.0,127.0,124.0,121.0,116.0,113.0,114.0,115.0,115.0,114.0,...,107.0,110.0,108.0,99.0,87.0,78.0,76.0,82.0,87.0,93.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
397,190.0,189.0,187.0,181.0,176.0,178.0,183.0,185.0,186.0,188.0,...,215.0,210.0,206.0,200.0,198.0,196.0,190.0,173.0,162.0,154.0
398,146.0,143.0,140.0,135.0,123.0,113.0,109.0,102.0,103.0,117.0,...,214.0,220.0,225.0,231.0,239.0,236.0,240.0,248.0,249.0,247.0
399,201.0,198.0,198.0,200.0,199.0,200.0,197.0,187.0,175.0,172.0,...,117.0,109.0,102.0,107.0,116.0,127.0,132.0,131.0,123.0,103.0
400,159.0,158.0,152.0,153.0,152.0,150.0,147.0,143.0,134.0,132.0,...,151.0,153.0,150.0,142.0,139.0,138.0,144.0,149.0,150.0,149.0


In [7]:
#Extract No Meal Data Function
def extract_no_meal_data(ins_data,cgm_data,date_format):
    ins_data_copy = ins_data.loc[ins_data['BWZ Carb Input (grams)']>0]
    validTimeStamp = []
    hours = 0
    time_diff = 0
    #Get valid TimeStamps from Insulin Data
    for i in range(1,len(ins_data_copy)-1):
        time_diff = ins_data_copy.iloc[i+1]['TimeStamp']-pd.to_datetime(ins_data_copy.iloc[i]['TimeStamp']+timedelta(hours=2))    
        hours = (time_diff.total_seconds())/3600
        if hours >= 2:
            validTimeStamp.append(ins_data_copy.iloc[i]['TimeStamp'])
    list_no_meal = []
    for k,l in enumerate(validTimeStamp):
        itera = 1
        try:
            len_24 = len(cgm_data.loc[(cgm_data['TimeStamp']>=validTimeStamp[k]+pd.Timedelta(hours=2))&(cgm_data['TimeStamp']<validTimeStamp[k+1])])//24
            while(itera<=len_24):
                if itera == 1:
                    list_no_meal.append(cgm_data.loc[(cgm_data['TimeStamp']>=validTimeStamp[k]+pd.Timedelta(hours=2))&(cgm_data['TimeStamp']<validTimeStamp[k+1])]['Sensor Glucose (mg/dL)'][:itera*24].values.tolist())
                    itera+=1
                else:
                    list_no_meal.append(cgm_data.loc[(cgm_data['TimeStamp']>=validTimeStamp[k]+pd.Timedelta(hours=2))&(cgm_data['TimeStamp']<validTimeStamp[k+1])]['Sensor Glucose (mg/dL)'][(itera-1)*24:itera*24].values.tolist())
                    itera+=1
        except IndexError:
            break
    return pd.DataFrame(list_no_meal)
    

In [8]:
#Extract No Meal Data
no_meal_data_1 = extract_no_meal_data(insulin_data_1,cgm_data_1,0)
no_meal_data_2 = extract_no_meal_data(insulin_data_2,cgm_data_2,2)


insulin_data_1

In [9]:
#Clean No meal data - 2408 rows
index3 = no_meal_data_1.isna().sum(axis=1).where(lambda x:x>2).dropna().index 
no_meal_data_final_1 = no_meal_data_1.drop(no_meal_data_1.index[index3])
index4 = no_meal_data_2.isna().sum(axis=1).where(lambda x:x>2).dropna().index 
no_meal_data_final_2 = no_meal_data_2.drop(no_meal_data_2.index[index4])
no_meal_data_final_1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,312.0,316.0,324.0,319.0,315.0,314.0,316.0,314.0,310.0,298.0,...,289.0,294.0,290.0,278.0,251.0,229.0,208.0,198.0,186.0,174.0
1,160.0,145.0,134.0,129.0,120.0,113.0,109.0,106.0,100.0,95.0,...,77.0,73.0,70.0,65.0,62.0,59.0,58.0,63.0,71.0,81.0
2,189.0,182.0,175.0,166.0,146.0,138.0,129.0,123.0,118.0,114.0,...,82.0,77.0,71.0,66.0,63.0,65.0,66.0,64.0,62.0,61.0
3,64.0,67.0,69.0,67.0,63.0,60.0,55.0,53.0,53.0,54.0,...,65.0,64.0,64.0,65.0,65.0,64.0,64.0,67.0,72.0,78.0
4,86.0,89.0,90.0,89.0,89.0,90.0,91.0,90.0,87.0,83.0,...,95.0,95.0,92.0,92.0,93.0,91.0,89.0,92.0,99.0,106.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1699,144.0,150.0,141.0,126.0,116.0,107.0,111.0,114.0,115.0,113.0,...,80.0,86.0,92.0,90.0,84.0,75.0,70.0,72.0,69.0,69.0
1700,66.0,63.0,67.0,70.0,70.0,69.0,65.0,62.0,63.0,63.0,...,61.0,60.0,60.0,61.0,60.0,57.0,58.0,57.0,58.0,62.0
1701,74.0,104.0,110.0,126.0,133.0,135.0,141.0,144.0,137.0,137.0,...,136.0,142.0,168.0,168.0,174.0,207.0,209.0,206.0,197.0,185.0
1702,191.0,197.0,202.0,202.0,200.0,209.0,219.0,221.0,221.0,216.0,...,200.0,198.0,196.0,194.0,200.0,220.0,207.0,205.0,194.0,186.0


In [10]:
no_meal_data_2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,207.0,205.0,191.0,181.0,174.0,172.0,173.0,175.0,178.0,183.0,...,167.0,164.0,168.0,174.0,178.0,178.0,173.0,164.0,153.0,147.0
1,142.0,140.0,137.0,133.0,129.0,128.0,124.0,116.0,106.0,101.0,...,107.0,96.0,86.0,,,,,,110.0,129.0
2,130.0,126.0,120.0,115.0,113.0,113.0,114.0,116.0,118.0,121.0,...,120.0,122.0,129.0,139.0,147.0,153.0,159.0,165.0,170.0,174.0
3,177.0,179.0,182.0,185.0,189.0,194.0,197.0,199.0,202.0,204.0,...,205.0,205.0,204.0,204.0,205.0,205.0,204.0,202.0,199.0,194.0
4,187.0,180.0,175.0,171.0,170.0,170.0,170.0,170.0,169.0,168.0,...,179.0,182.0,178.0,174.0,173.0,172.0,171.0,169.0,165.0,163.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
914,103.0,105.0,103.0,101.0,100.0,100.0,101.0,103.0,105.0,106.0,...,111.0,111.0,112.0,112.0,112.0,112.0,112.0,112.0,111.0,111.0
915,111.0,111.0,112.0,115.0,114.0,112.0,110.0,106.0,108.0,111.0,...,113.0,118.0,120.0,120.0,120.0,119.0,120.0,120.0,120.0,120.0
916,122.0,124.0,130.0,136.0,143.0,147.0,149.0,150.0,149.0,147.0,...,140.0,140.0,139.0,139.0,139.0,140.0,140.0,141.0,142.0,144.0
917,146.0,150.0,153.0,156.0,159.0,159.0,160.0,159.0,157.0,,...,,,,177.0,175.0,181.0,189.0,191.0,196.0,196.0


In [11]:
#Function for Meal data Feature Matrix creation
def create_meal_feature_matrix(meal_data):
    pow_first_max =[]
    pow_second_max =[]
    pow_third_max =[]
    ind_third_max=[]
    ind_second_max =[]
    meal_data_temp = meal_data.interpolate(method = 'linear',axis = 1)
    #indices_to_drop = meal_data_temp.isna().sum(axis=1).replace(0,np.nan).dropna().index 
    #cleaned_data = meal_data_temp.drop(meal_data.index[indices_to_drop])
    cleaned_data_meal = meal_data_temp.dropna().reset_index()
    for i in range(len(cleaned_data_meal)):
        arr = abs(rfft(cleaned_data_meal.iloc[:,0:30].iloc[i].values.tolist())).tolist()
        arr_copy = arr.copy()
        arr_copy.sort()
        pow_first_max.append(arr_copy[-2])
        pow_second_max.append(arr_copy[-3])
        pow_third_max.append(arr_copy[-4])
        ind_second_max.append(arr.index(arr_copy[-3]))
        ind_third_max.append(arr.index(arr_copy[-4]))
    meal_feat_matrix = pd.DataFrame()
    meal_feat_matrix['tau_time']=(cleaned_data_meal.iloc[:,22:25].idxmin(axis=1)-cleaned_data_meal.iloc[:,5:19].idxmax(axis=1))*5
    meal_feat_matrix['CGM_diff_normalized']=(cleaned_data_meal.iloc[:,5:19].max(axis=1)-cleaned_data_meal.iloc[:,22:25].min(axis=1))/(cleaned_data_meal.iloc[:,22:25].min(axis=1))
    meal_feat_matrix['pow_second_max']=pow_second_max
    meal_feat_matrix['ind_second_max']=ind_second_max
    meal_feat_matrix['pow_third_max']=pow_third_max
    meal_feat_matrix['ind_third_max']=ind_third_max
    time = cleaned_data_meal.iloc[:,22:25].idxmin(axis = 1)
    maximum = cleaned_data_meal.iloc[:,5:19].idxmax(axis = 1)
    temp_list = []
    sec_diff_data = []
    std_dev = []
    for i in range(len(cleaned_data_meal)):
        temp_list.append(np.diff(cleaned_data_meal.iloc[:,maximum[i]:time[i]].iloc[i].tolist()).max())
        sec_diff_data.append(np.diff(np.diff(cleaned_data_meal.iloc[:,maximum[i]:time[i]].iloc[i].tolist())).max())
        std_dev.append(np.std(cleaned_data_meal.iloc[i]))
    meal_feat_matrix['sec_differential']=sec_diff_data
    meal_feat_matrix['std_deviation']=std_dev
    meal_feat_matrix['label']=1
    return meal_feat_matrix


In [12]:
#Concatenate the feature matrices
meal_f_mat_data1 = create_meal_feature_matrix(meal_data_final_1)
meal_f_mat_data2 = create_meal_feature_matrix(meal_data_final_2)
final_meal_feature_matrix = pd.concat([meal_f_mat_data1,meal_f_mat_data2]).reset_index().drop(columns='index')
final_meal_feature_matrix

Unnamed: 0,tau_time,CGM_diff_normalized,pow_second_max,ind_second_max,pow_third_max,ind_third_max,sec_differential,std_deviation,label
0,20,0.000000,416.268724,1,366.389119,4,0.0,46.670522,1
1,95,-0.037175,293.836881,19,292.116402,5,13.0,54.309837,1
2,40,0.859155,116.518282,6,89.348698,2,11.0,32.735937,1
3,85,-0.051887,222.186841,11,215.500000,19,6.0,40.195642,1
4,35,0.158470,182.553753,3,175.708204,9,5.0,45.076362,1
...,...,...,...,...,...,...,...,...,...
802,50,0.415584,235.131023,3,230.575055,13,4.0,43.296854,1
803,30,-0.064777,395.596026,1,283.236693,3,7.0,67.599598,1
804,90,0.941748,358.001673,1,254.128879,13,12.0,60.114877,1
805,30,0.026846,271.194892,3,256.544915,5,5.0,50.138624,1


In [13]:
#Function for Non Meal data Feature Matrix creation
def create_no_meal_feature_matrix(no_meal_data):
    pow_first_max =[]
    pow_second_max =[]
    pow_third_max =[]
    ind_third_max=[]
    ind_second_max =[]
    no_meal_data_temp = no_meal_data.interpolate(method = 'linear',axis = 1)
    #indices_to_drop = meal_data_temp.isna().sum(axis=1).replace(0,np.nan).dropna().index 
    #cleaned_data = meal_data_temp.drop(meal_data.index[indices_to_drop])
    cleaned_data_no_meal = no_meal_data_temp.dropna().reset_index().drop(columns='index')
    for i in range(len(cleaned_data_no_meal)):
        arr = abs(rfft(cleaned_data_no_meal.iloc[:,0:24].iloc[i].values.tolist())).tolist()
        arr_copy = arr.copy()
        arr_copy.sort()
        pow_first_max.append(arr_copy[-2])
        pow_second_max.append(arr_copy[-3])
        pow_third_max.append(arr_copy[-4])
        ind_second_max.append(arr.index(arr_copy[-3]))
        ind_third_max.append(arr.index(arr_copy[-4]))
    no_meal_feat_matrix = pd.DataFrame()
    no_meal_feat_matrix['tau_time']=(int(24)-(cleaned_data_no_meal.iloc[:,0:19].idxmax(axis=1)))*5
    no_meal_feat_matrix['CGM_diff_normalized']=(cleaned_data_no_meal.iloc[:,0:19].max(axis=1)-cleaned_data_no_meal.iloc[:,23])/(cleaned_data_no_meal.iloc[:,23])
    no_meal_feat_matrix['pow_second_max']=pow_second_max
    no_meal_feat_matrix['ind_second_max']=ind_second_max
    no_meal_feat_matrix['pow_third_max']=pow_third_max
    no_meal_feat_matrix['ind_third_max']=ind_third_max
    time = cleaned_data_no_meal.idxmin(axis = 1)
    maximum = cleaned_data_no_meal.idxmax(axis = 1)
    temp_list = []
    sec_diff_data = []
    std_dev = []
    for i in range(len(cleaned_data_no_meal)):
        temp_list.append(np.diff(cleaned_data_no_meal.iloc[:,0:24].iloc[i].tolist()).max())
        sec_diff_data.append(np.diff(np.diff(cleaned_data_no_meal.iloc[:,0:24].iloc[i].tolist())).max())
        std_dev.append(np.std(cleaned_data_no_meal.iloc[i]))
    no_meal_feat_matrix['sec_differential']=sec_diff_data
    no_meal_feat_matrix['std_deviation']=std_dev
    no_meal_feat_matrix['label']=0
    return no_meal_feat_matrix

In [14]:
#Concatenate the feature matrices
no_meal_f_mat_data1 = create_no_meal_feature_matrix(no_meal_data_final_1)
no_meal_f_mat_data2 = create_no_meal_feature_matrix(no_meal_data_final_2)
final_no_meal_feature_matrix = pd.concat([no_meal_f_mat_data1,no_meal_f_mat_data2]).reset_index().drop(columns='index')
final_no_meal_feature_matrix

Unnamed: 0,tau_time,CGM_diff_normalized,pow_second_max,ind_second_max,pow_third_max,ind_third_max,sec_differential,std_deviation,label
0,110,0.862069,404.891614,4,200.183779,1,13.0,44.576947,0
1,120,0.975309,185.062145,1,151.880109,4,6.0,28.027485,0
2,120,2.098361,237.293548,1,230.789074,4,12.0,41.052114,0
3,110,-0.115385,52.559291,2,38.016660,3,3.0,5.826037,0
4,55,-0.103774,37.804982,2,23.487874,10,8.0,4.936929,0
...,...,...,...,...,...,...,...,...,...
2311,120,0.392157,75.641016,4,56.870362,1,2.0,12.566214,0
2312,40,0.009009,18.500838,1,17.294229,4,1.0,4.358102,0
2313,40,0.000000,27.956149,1,23.382686,8,6.0,4.606758,0
2314,85,0.041667,51.784610,3,45.784610,4,4.0,6.837397,0


In [15]:
#Training model
full_data = pd.concat([final_meal_feature_matrix,final_no_meal_feature_matrix]).reset_index().drop(columns='index')
shuffled_full_data = shuffle(full_data,random_state=1).reset_index().drop(columns='index')
kfold = KFold(n_splits = 10,shuffle = False)
full_data_without_label = shuffled_full_data.drop(columns='label')
model = DecisionTreeClassifier()
scores=[]
for tr_ind,ts_ind in kfold.split(shuffled_full_data):
    X_train,Y_train,X_test,Y_test = shuffled_full_data.iloc[tr_ind].iloc[:,:-1].values,shuffled_full_data.iloc[tr_ind].iloc[:,-1].values,shuffled_full_data.iloc[ts_ind].iloc[:,:-1].values,shuffled_full_data.iloc[ts_ind].iloc[:,-1].values
    model.fit(X_train,Y_train)
    scores.append(model.score(X_test,Y_test))
y_pred = model.predict(X_test)
dump(model,'DecisionTreeClassifier.pickle')


['DecisionTreeClassifier.pickle']

In [351]:
#Prepare for Test csv

#test_data = shuffled_full_data[231:462]
#a = meal_data_final_1[20:60]
#b = meal_data_final_2[40:100]
#c = no_meal_data_2[50:131]
#d = no_meal_data_2[100:150]
#test_data = pd.concat([a,b,c,d]).reset_index()
#test_data
#test_data.to_csv("test.csv",index=False,header=False)

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,14,15,16,17,18,19,20,21,22,23
0,26,106.0,103.0,98.0,92.0,96.0,99.0,100.0,96.0,94.0,...,143.0,152.0,157.0,160.0,157.0,153.0,152.0,152.0,153.0,153.0
1,27,147.0,146.0,141.0,138.0,137.0,138.0,140.0,140.0,135.0,...,113.0,109.0,111.0,115.0,116.0,116.0,117.0,127.0,147.0,158.0
2,28,273.0,265.0,247.0,236.0,234.0,228.0,219.0,203.0,188.0,...,126.0,126.0,126.0,130.0,130.0,131.0,136.0,138.0,139.0,142.0
3,29,66.0,65.0,65.0,68.0,74.0,88.0,100.0,110.0,129.0,...,131.0,124.0,121.0,115.0,109.0,109.0,111.0,113.0,111.0,110.0
4,30,95.0,96.0,98.0,99.0,102.0,110.0,130.0,150.0,176.0,...,233.0,234.0,238.0,225.0,219.0,225.0,226.0,221.0,220.0,219.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,145,163.0,158.0,145.0,131.0,121.0,108.0,104.0,106.0,108.0,...,79.0,76.0,78.0,83.0,86.0,88.0,87.0,86.0,85.0,87.0
227,146,92.0,88.0,85.0,86.0,91.0,93.0,94.0,93.0,91.0,...,98.0,101.0,108.0,114.0,119.0,121.0,123.0,128.0,135.0,141.0
228,147,147.0,152.0,154.0,154.0,156.0,160.0,161.0,161.0,161.0,...,170.0,169.0,168.0,166.0,165.0,162.0,156.0,152.0,151.0,152.0
229,148,155.0,156.0,156.0,156.0,155.0,152.0,150.0,149.0,150.0,...,154.0,152.0,151.0,149.0,146.0,142.0,137.0,136.0,138.0,139.0


In [22]:
#pd.read_pickle('DecisionTreeClassifier_1.pickle')

In [382]:
#shuffled_full_data

Unnamed: 0,tau_time,CGM_diff_normalized,pow_second_max,ind_second_max,pow_third_max,ind_third_max,sec_differential,std_deviation,label
0,30,0.023121,226.521125,4,138.024387,6,12.0,52.171975,0
1,65,0.614458,109.038164,1,89.171789,4,2.0,12.890306,1
2,30,0.023923,16.778175,5,10.500000,7,3.0,3.574359,0
3,30,-0.068783,180.973566,5,167.120783,11,22.0,37.134006,1
4,65,0.560976,383.957575,5,325.493762,11,5.0,65.000603,1
...,...,...,...,...,...,...,...,...,...
3118,30,-0.038217,53.382686,4,45.899346,8,3.0,13.130488,0
3119,80,0.107692,43.333966,1,32.820508,4,4.0,5.798557,0
3120,35,0.024000,33.088457,4,24.371847,1,2.0,8.812475,0
3121,95,1.937500,468.311271,4,282.054754,6,2.0,71.279102,1


In [None]:
#shuffled_full_data.loc[tr_ind],shuffled_full_data.loc[ts_ind],shuffled_full_data.label.loc[tr_ind],shuffled_full_data.label.loc[ts_ind]