In [3]:
# importing the required packages
import pandas as pd
import numpy as np
from scipy.stats import skew
from scipy.fftpack import fft
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate,train_test_split,StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import pickle

def DataPreProcessing(CGM_Data):
    no_of_rows=CGM_Data.shape[0]
    no_of_columns = CGM_Data.shape[1]
    CGM_Data.dropna(axis=0, how='all', thresh=no_of_columns/4, subset=None, inplace=True)
    CGM_Data.dropna(axis=1, how='all', thresh=no_of_rows/4, subset=None, inplace=True)
    CGM_Data.interpolate(axis=0, method ='linear', limit_direction ='forward', inplace=True)
    CGM_Data.bfill(axis=1,inplace=True)
    return CGM_Data

def ExtractFeatures(CGM_Data):
    
    Feature_Matrix = pd.DataFrame() 
    
    # Feature 1 - Windowed Mean (for 30 min interval)
    win_size=6
    total_vals = CGM_Data.shape[1]-win_size
    for index in range(0, total_vals, win_size):
        dm = CGM_Data.iloc[:, index:index + win_size].mean(axis=1)
        Feature_Matrix['Mean ' + str(index)] = dm

    print(Feature_Matrix.shape)

    
    # Feature 2 - Windowed Standard Deviation (for 30 min interval)
    win_size=6
    total_vals = CGM_Data.shape[1]-win_size
    for index in range(0, total_vals, win_size):
        dstd = CGM_Data.iloc[:, index:index + win_size].std(axis=1)
        Feature_Matrix['Std_deviation ' + str(index)] = dstd
        
    print(Feature_Matrix.shape)
    
    
    # Feature 3 - Fast Fourier Transform
    FFT = pd.DataFrame()
    def calculate_fft_vals(series):
        FFT_abs = abs(fft(series))
        FFT_abs.sort()
        return np.flip(FFT_abs)[0:8]

    FFT['FFT_vals'] = CGM_Data.apply(lambda series: calculate_fft_vals(series), axis=1)
    FFT_Vals= pd.DataFrame(FFT.FFT_vals.tolist(), columns=['FFT1', 'FFT2', 'FFT3', 'FFT4', 'FFT5', 'FFT6', 'FFT7','FFT8'],index=FFT.FFT_vals.index)
    Feature_Matrix = pd.concat([Feature_Matrix,FFT_Vals],axis=1)
    
    print(Feature_Matrix.shape)
    
    
    # Feature 4 - Max of CGM Velocity 
    
    Velocity_Data = pd.DataFrame()
    win_size=6
    total_vals=CGM_Data.shape[1]-win_size

    for index in range(0, total_vals):
        dv = CGM_Data.iloc[:, index + win_size] - CGM_Data.iloc[:, index]
        Velocity_Data['vel'+str(index)] = dv

    Feature_Matrix['Max CGM Vel']=Velocity_Data.max(axis = 1,skipna=True)
    
    print(Feature_Matrix.shape)
    
    
    # Feature 5 - Skewness
    def calculate_skewness(series):
        series_counts = series.value_counts()
        skewness_vals = skew(series_counts)
        return skewness_vals

    Feature_Matrix['skewness'] = CGM_Data.apply(lambda row: calculate_skewness(row), axis=1)
    
    print(Feature_Matrix.shape)
    
    
    # Feature 6 - polyfit   
    def calculate_polyfit(series,degree=3):
        row_arr = np.array(series.index)
        return np.polyfit(row_arr, series, degree)
    
    Polyfit_vals = CGM_Data.apply(calculate_polyfit,axis=1,result_type='expand')
    Feature_Matrix = pd.concat([Feature_Matrix,Polyfit_vals],axis=1)
    
    print(Feature_Matrix.shape)
    
    return Feature_Matrix

    

In [4]:
if __name__=="__main__":
    # Meal Data
    column_names = [i for i in range(0,31)]
    data_file_1 = pd.read_csv("mealData1.csv",names=column_names)
    data_file_2 = pd.read_csv("mealData2.csv",names=column_names)
    data_file_3 = pd.read_csv("mealData3.csv",names=column_names)
    data_file_4 = pd.read_csv("mealData4.csv",names=column_names)
    data_file_5 = pd.read_csv("mealData5.csv",names=column_names)
    CGM_Data_Meal = pd.concat([data_file_1,data_file_2,data_file_3,data_file_4,data_file_5],axis=0)

    # No Meal Data
    column_names = [i for i in range(0,31)]
    data_file_1 = pd.read_csv("Nomeal1.csv",names=column_names)
    data_file_2 = pd.read_csv("Nomeal2.csv",names=column_names)
    data_file_3 = pd.read_csv("Nomeal3.csv",names=column_names)
    data_file_4 = pd.read_csv("Nomeal4.csv",names=column_names)
    data_file_5 = pd.read_csv("Nomeal5.csv",names=column_names)
    CGM_Data_No_Meal = pd.concat([data_file_1,data_file_2,data_file_3,data_file_4,data_file_5],axis=0)
    
    # Data Pre-Processing
    Meal_Data = DataPreProcessing(CGM_Data_Meal)
    No_Meal_Data = DataPreProcessing(CGM_Data_No_Meal)
    
    #Extract Features
    Meal_Data_Features = ExtractFeatures(Meal_Data) 
    No_Meal_Data_Features = ExtractFeatures(No_Meal_Data) 
    
    # Merge both Meal and No Meal Data features
    Feature_Matrix = pd.concat([Meal_Data_Features,No_Meal_Data_Features]) 
    
    # Standardize feature matrix
    Feature_Matrix = StandardScaler().fit_transform(Feature_Matrix)
    
    #Class labels 
    Class_labels = np.append(np.ones(len(Meal_Data_Features)),np.zeros(len(No_Meal_Data_Features)))

    
    # Training KNN Model
    
    print("--------KNN MODELS-----------")    
    X_train, X_test, y_train, y_test = train_test_split(Feature_Matrix, Class_labels, test_size=0.2, random_state=4)
    
    K = 5 
    for n in range(1,K+1):
        neighbours = KNeighborsClassifier(n_neighbors = n).fit(X_train,y_train)
        yhat=neighbours.predict(X_test)
        skfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=777 )
        scores = cross_validate(neighbours, Feature_Matrix, Class_labels, cv=skfold,
                                scoring=('accuracy', 'precision','recall','f1'),
                                 return_train_score=True)   
        print("For KNN with K value:"+str(n))
        print("Accuracy:",scores['test_accuracy'].mean(), 
              "Precision:",scores['test_precision'].mean(),
              "Recall:",scores['test_recall'].mean(),
              "F1 Measure:",scores['test_f1'].mean())
        neighbours.fit(Feature_Matrix, Class_labels)
        handler = open("KNN"+str(n)+".model","wb")
        pickle.dump(neighbours,handler)
        handler.close()
       
    
    #Training SVC model
    print("--------SVM MODEL-----------") 
    svc = SVC(gamma='auto',random_state=777)
    skfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=777 )
    scores = cross_validate(svc, Feature_Matrix, Class_labels, cv=skfold,
                            scoring=('accuracy', 'precision','recall','f1'),
                            return_train_score=True)   
    print("Accuracy:",scores['test_accuracy'].mean(), 
          "Precision:",scores['test_precision'].mean(),
          "Recall:",scores['test_recall'].mean(),
          "F1 Measure:",scores['test_f1'].mean())
    svc.fit(Feature_Matrix, Class_labels)
    handler = open("SVC.model","wb")
    pickle.dump(svc,handler)
    handler.close()
    
    
    
       

(245, 4)
(245, 8)
(245, 16)
(245, 17)
(245, 18)
(245, 22)
(242, 4)
(242, 8)
(242, 16)
(242, 17)
(242, 18)
(242, 22)
--------KNN MODELS-----------
For KNN with K value:1
Accuracy: 0.5913528297917104 Precision: 0.5918900883805127 Recall: 0.6000000000000001 F1 Measure: 0.5953513632965299
For KNN with K value:2
Accuracy: 0.5667367978119082 Precision: 0.6126617038875103 Recall: 0.3714285714285714 F1 Measure: 0.46054934159774075
For KNN with K value:3
Accuracy: 0.5791289711760993 Precision: 0.577863631825896 Recall: 0.6163265306122448 F1 Measure: 0.5955939607819992
For KNN with K value:4
Accuracy: 0.5708184304649695 Precision: 0.5946935036151981 Recall: 0.4612244897959184 F1 Measure: 0.519207695617432
For KNN with K value:5
Accuracy: 0.5995792131285504 Precision: 0.5994430538172717 Recall: 0.616326530612245 F1 Measure: 0.6073010837959292
--------SVM MODEL-----------
Accuracy: 0.6571638964864295 Precision: 0.674132909808318 Recall: 0.6244897959183673 F1 Measure: 0.6474579954758881
